LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
370 VT, Expand);
378 }
380 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
381 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
382 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
397 }
399 }
400
401 // Set operations for 'LASX' feature.
402
403 if (Subtarget.hasExtLASX()) {
404 for (MVT VT : LASXVTs) {
408
414
418 }
419 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
422 Legal);
424 VT, Legal);
431 Expand);
443 }
444 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
446 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
448 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
451 }
452 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
460 VT, Expand);
468 }
470 }
471
472 // Set DAG combine for LA32 and LA64.
473 if (Subtarget.hasBasicF()) {
475 }
476
481
482 // Set DAG combine for 'LSX' feature.
483
484 if (Subtarget.hasExtLSX()) {
487 }
488
489 // Set DAG combine for 'LASX' feature.
490 if (Subtarget.hasExtLASX()) {
495 }
496
497 // Compute derived properties from the register classes.
498 computeRegisterProperties(Subtarget.getRegisterInfo());
499
501
504
505 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
506
508
509 // Function alignments.
511 // Set preferred alignments.
512 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
513 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
514 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
515
516 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
517 if (Subtarget.hasLAMCAS())
519
520 if (Subtarget.hasSCQ()) {
523 }
524
525 // Disable strict node mutation.
526 IsStrictFPEnabled = true;
527}
528
530 const GlobalAddressSDNode *GA) const {
531 // In order to maximise the opportunity for common subexpression elimination,
532 // keep a separate ADD node for the global address offset instead of folding
533 // it in the global address node. Later peephole optimisations may choose to
534 // fold it back in when profitable.
535 return false;
536}
537
539 SelectionDAG &DAG) const {
540 switch (Op.getOpcode()) {
542 return lowerATOMIC_FENCE(Op, DAG);
544 return lowerEH_DWARF_CFA(Op, DAG);
546 return lowerGlobalAddress(Op, DAG);
548 return lowerGlobalTLSAddress(Op, DAG);
550 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
552 return lowerINTRINSIC_W_CHAIN(Op, DAG);
554 return lowerINTRINSIC_VOID(Op, DAG);
556 return lowerBlockAddress(Op, DAG);
557 case ISD::JumpTable:
558 return lowerJumpTable(Op, DAG);
559 case ISD::SHL_PARTS:
560 return lowerShiftLeftParts(Op, DAG);
561 case ISD::SRA_PARTS:
562 return lowerShiftRightParts(Op, DAG, true);
563 case ISD::SRL_PARTS:
564 return lowerShiftRightParts(Op, DAG, false);
566 return lowerConstantPool(Op, DAG);
567 case ISD::FP_TO_SINT:
568 return lowerFP_TO_SINT(Op, DAG);
569 case ISD::BITCAST:
570 return lowerBITCAST(Op, DAG);
571 case ISD::UINT_TO_FP:
572 return lowerUINT_TO_FP(Op, DAG);
573 case ISD::SINT_TO_FP:
574 return lowerSINT_TO_FP(Op, DAG);
575 case ISD::VASTART:
576 return lowerVASTART(Op, DAG);
577 case ISD::FRAMEADDR:
578 return lowerFRAMEADDR(Op, DAG);
579 case ISD::RETURNADDR:
580 return lowerRETURNADDR(Op, DAG);
582 return lowerWRITE_REGISTER(Op, DAG);
584 return lowerINSERT_VECTOR_ELT(Op, DAG);
586 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
588 return lowerBUILD_VECTOR(Op, DAG);
590 return lowerCONCAT_VECTORS(Op, DAG);
592 return lowerVECTOR_SHUFFLE(Op, DAG);
593 case ISD::BITREVERSE:
594 return lowerBITREVERSE(Op, DAG);
596 return lowerSCALAR_TO_VECTOR(Op, DAG);
597 case ISD::PREFETCH:
598 return lowerPREFETCH(Op, DAG);
599 case ISD::SELECT:
600 return lowerSELECT(Op, DAG);
601 case ISD::BRCOND:
602 return lowerBRCOND(Op, DAG);
603 case ISD::FP_TO_FP16:
604 return lowerFP_TO_FP16(Op, DAG);
605 case ISD::FP16_TO_FP:
606 return lowerFP16_TO_FP(Op, DAG);
607 case ISD::FP_TO_BF16:
608 return lowerFP_TO_BF16(Op, DAG);
609 case ISD::BF16_TO_FP:
610 return lowerBF16_TO_FP(Op, DAG);
612 return lowerVECREDUCE_ADD(Op, DAG);
613 case ISD::ROTL:
614 case ISD::ROTR:
615 return lowerRotate(Op, DAG);
623 return lowerVECREDUCE(Op, DAG);
624 case ISD::ConstantFP:
625 return lowerConstantFP(Op, DAG);
626 case ISD::SETCC:
627 return lowerSETCC(Op, DAG);
628 case ISD::FP_ROUND:
629 return lowerFP_ROUND(Op, DAG);
630 }
631 return SDValue();
632}
633
634// Helper to attempt to return a cheaper, bit-inverted version of \p V.
636 // TODO: don't always ignore oneuse constraints.
637 V = peekThroughBitcasts(V);
638 EVT VT = V.getValueType();
639
640 // Match not(xor X, -1) -> X.
641 if (V.getOpcode() == ISD::XOR &&
642 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
643 isAllOnesConstant(V.getOperand(1))))
644 return V.getOperand(0);
645
646 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
647 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
648 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
649 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
650 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
651 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
652 V.getOperand(1));
653 }
654 }
655
656 // Match not(SplatVector(not(X)) -> SplatVector(X).
657 if (V.getOpcode() == ISD::BUILD_VECTOR) {
658 if (SDValue SplatValue =
659 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
660 if (!V->isOnlyUserOf(SplatValue.getNode()))
661 return SDValue();
662
663 if (SDValue Not = isNOT(SplatValue, DAG)) {
664 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
665 return DAG.getSplat(VT, SDLoc(Not), Not);
666 }
667 }
668 }
669
670 // Match not(or(not(X),not(Y))) -> and(X, Y).
671 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
672 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
673 // TODO: Handle cases with single NOT operand -> VANDN
674 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
675 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
676 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
677 DAG.getBitcast(VT, Op1));
678 }
679
680 // TODO: Add more matching patterns. Such as,
681 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
682 // not(slt(C, X)) -> slt(X - 1, C)
683 return SDValue();
684}
685
686// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
687// LoongArchISD::VFCVT. For example:
688// x1 = fp_round x, 0
689// y1 = fp_round y, 0
690// z = concat_vectors x1, y1
691// Or
692// x1 = LoongArch::VFCVT undef, x
693// y1 = LoongArch::VFCVT undef, y
694// z = LoongArchISD::VPACKEV y1, x1
695// can be combined to:
696// z = LoongArch::VFCVT y, x
698 const LoongArchSubtarget &Subtarget) {
699 assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
700 (N->getOpcode() == LoongArchISD::VPACKEV)) &&
701 "Invalid Node");
702
703 SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
704 SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
705 unsigned Opcode0 = Op0.getOpcode();
706 unsigned Opcode1 = Op1.getOpcode();
707 if (Opcode0 != Opcode1)
708 return SDValue();
709
710 if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
711 return SDValue();
712
713 // Check if two nodes have only one use.
714 if (!Op0.hasOneUse() || !Op1.hasOneUse())
715 return SDValue();
716
717 EVT VT = N.getValueType();
718 EVT SVT0 = Op0.getValueType();
719 EVT SVT1 = Op1.getValueType();
720 // Check if two nodes have the same result type.
721 if (SVT0 != SVT1)
722 return SDValue();
723
724 // Check if two nodes have the same operand type.
725 EVT SSVT0 = Op0.getOperand(0).getValueType();
726 EVT SSVT1 = Op1.getOperand(0).getValueType();
727 if (SSVT0 != SSVT1)
728 return SDValue();
729
730 if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
731 if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
732 SSVT0 == MVT::v4f64) {
733 // A vector_shuffle is required in the final step, as xvfcvt instruction
734 // operates on each 128-bit segament as a lane.
735 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
736 Op1.getOperand(0), Op0.getOperand(0));
737 SDValue Undef = DAG.getUNDEF(Res.getValueType());
738 // After VFCVT, the high part of Res comes from the high parts of Op0 and
739 // Op1, and the low part comes from the low parts of Op0 and Op1. However,
740 // the desired order requires Op0 to fully occupy the lower half and Op1
741 // the upper half of Res. The Mask reorders the elements of Res to achieve
742 // this:
743 // - The first four elements (0, 1, 4, 5) come from Op0.
744 // - The next four elements (2, 3, 6, 7) come from Op1.
745 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
746 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
747 return DAG.getBitcast(VT, Res);
748 }
749 }
750
751 if (N->getOpcode() == LoongArchISD::VPACKEV &&
752 Opcode0 == LoongArchISD::VFCVT) {
753 // For VPACKEV, check if the first operation of LoongArchISD::VFCVT is
754 // undef.
755 if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
756 return SDValue();
757
758 if (Subtarget.hasExtLSX() && (VT == MVT::v2i64 || VT == MVT::v2f64) &&
759 SVT0 == MVT::v4f32 && SSVT0 == MVT::v2f64) {
760 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
761 Op0.getOperand(1), Op1.getOperand(1));
762 return DAG.getBitcast(VT, Res);
763 }
764 }
765
766 return SDValue();
767}
768
769SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
770 SelectionDAG &DAG) const {
771 SDLoc DL(Op);
772 SDValue In = Op.getOperand(0);
773 MVT VT = Op.getSimpleValueType();
774 MVT SVT = In.getSimpleValueType();
775
776 if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
777 SDValue Lo, Hi;
778 std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
779 return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
780 }
781
782 return SDValue();
783}
784
785SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
786 SelectionDAG &DAG) const {
787 EVT VT = Op.getValueType();
788 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
789 const APFloat &FPVal = CFP->getValueAPF();
790 SDLoc DL(CFP);
791
792 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
793 (VT == MVT::f64 && Subtarget.hasBasicD()));
794
795 // If value is 0.0 or -0.0, just ignore it.
796 if (FPVal.isZero())
797 return SDValue();
798
799 // If lsx enabled, use cheaper 'vldi' instruction if possible.
800 if (isFPImmVLDILegal(FPVal, VT))
801 return SDValue();
802
803 // Construct as integer, and move to float register.
804 APInt INTVal = FPVal.bitcastToAPInt();
805
806 // If more than MaterializeFPImmInsNum instructions will be used to
807 // generate the INTVal and move it to float register, fallback to
808 // use floating point load from the constant pool.
810 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
811 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
812 return SDValue();
813
814 switch (VT.getSimpleVT().SimpleTy) {
815 default:
816 llvm_unreachable("Unexpected floating point type!");
817 break;
818 case MVT::f32: {
819 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
820 if (Subtarget.is64Bit())
821 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
822 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
823 : LoongArchISD::MOVGR2FR_W,
824 DL, VT, NewVal);
825 }
826 case MVT::f64: {
827 if (Subtarget.is64Bit()) {
828 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
829 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
830 }
831 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
832 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
833 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
834 }
835 }
836
837 return SDValue();
838}
839
840// Ensure SETCC result and operand have the same bit width; isel does not
841// support mismatched widths.
842SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
843 SelectionDAG &DAG) const {
844 SDLoc DL(Op);
845 EVT ResultVT = Op.getValueType();
846 EVT OperandVT = Op.getOperand(0).getValueType();
847
848 EVT SetCCResultVT =
849 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
850
851 if (ResultVT == SetCCResultVT)
852 return Op;
853
854 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
855 "SETCC operands must have the same type!");
856
857 SDValue SetCCNode =
858 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
859 Op.getOperand(1), Op.getOperand(2));
860
861 if (ResultVT.bitsGT(SetCCResultVT))
862 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
863 else if (ResultVT.bitsLT(SetCCResultVT))
864 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
865
866 return SetCCNode;
867}
868
869// Lower vecreduce_add using vhaddw instructions.
870// For Example:
871// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
872// can be lowered to:
873// VHADDW_D_W vr0, vr0, vr0
874// VHADDW_Q_D vr0, vr0, vr0
875// VPICKVE2GR_D a0, vr0, 0
876// ADDI_W a0, a0, 0
877SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
878 SelectionDAG &DAG) const {
879
880 SDLoc DL(Op);
881 MVT OpVT = Op.getSimpleValueType();
882 SDValue Val = Op.getOperand(0);
883
884 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
885 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
886 unsigned ResBits = OpVT.getScalarSizeInBits();
887
888 unsigned LegalVecSize = 128;
889 bool isLASX256Vector =
890 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
891
892 // Ensure operand type legal or enable it legal.
893 while (!isTypeLegal(Val.getSimpleValueType())) {
894 Val = DAG.WidenVector(Val, DL);
895 }
896
897 // NumEles is designed for iterations count, v4i32 for LSX
898 // and v8i32 for LASX should have the same count.
899 if (isLASX256Vector) {
900 NumEles /= 2;
901 LegalVecSize = 256;
902 }
903
904 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
905 MVT IntTy = MVT::getIntegerVT(EleBits);
906 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
907 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
908 }
909
910 if (isLASX256Vector) {
911 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
912 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
913 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
914 }
915
916 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
917 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
918 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
919}
920
921// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
922// For Example:
923// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
924// can be lowered to:
925// VBSRL_V vr1, vr0, 8
926// VMAX_W vr0, vr1, vr0
927// VBSRL_V vr1, vr0, 4
928// VMAX_W vr0, vr1, vr0
929// VPICKVE2GR_W a0, vr0, 0
930// For 256 bit vector, it is illegal and will be spilt into
931// two 128 bit vector by default then processed by this.
932SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
933 SelectionDAG &DAG) const {
934 SDLoc DL(Op);
935
936 MVT OpVT = Op.getSimpleValueType();
937 SDValue Val = Op.getOperand(0);
938
939 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
940 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
941
942 // Ensure operand type legal or enable it legal.
943 while (!isTypeLegal(Val.getSimpleValueType())) {
944 Val = DAG.WidenVector(Val, DL);
945 }
946
947 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
948 MVT VecTy = Val.getSimpleValueType();
949 MVT GRLenVT = Subtarget.getGRLenVT();
950
951 for (int i = NumEles; i > 1; i /= 2) {
952 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
953 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
954 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
955 }
956
957 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
958 DAG.getConstant(0, DL, GRLenVT));
959}
960
961SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
962 SelectionDAG &DAG) const {
963 unsigned IsData = Op.getConstantOperandVal(4);
964
965 // We don't support non-data prefetch.
966 // Just preserve the chain.
967 if (!IsData)
968 return Op.getOperand(0);
969
970 return Op;
971}
972
973SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
974 SelectionDAG &DAG) const {
975 MVT VT = Op.getSimpleValueType();
976 assert(VT.isVector() && "Unexpected type");
977
978 SDLoc DL(Op);
979 SDValue R = Op.getOperand(0);
980 SDValue Amt = Op.getOperand(1);
981 unsigned Opcode = Op.getOpcode();
982 unsigned EltSizeInBits = VT.getScalarSizeInBits();
983
984 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
985 if (V.getOpcode() != ISD::BUILD_VECTOR)
986 return false;
987 if (SDValue SplatValue =
988 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
989 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
990 CstSplatValue = C->getAPIntValue();
991 return true;
992 }
993 }
994 return false;
995 };
996
997 // Check for constant splat rotation amount.
998 APInt CstSplatValue;
999 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
1000 bool isROTL = Opcode == ISD::ROTL;
1001
1002 // Check for splat rotate by zero.
1003 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
1004 return R;
1005
1006 // LoongArch targets always prefer ISD::ROTR.
1007 if (isROTL) {
1008 SDValue Zero = DAG.getConstant(0, DL, VT);
1009 return DAG.getNode(ISD::ROTR, DL, VT, R,
1010 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
1011 }
1012
1013 // Rotate by a immediate.
1014 if (IsCstSplat) {
1015 // ISD::ROTR: Attemp to rotate by a positive immediate.
1016 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
1017 if (SDValue Urem =
1018 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
1019 return DAG.getNode(Opcode, DL, VT, R, Urem);
1020 }
1021
1022 return Op;
1023}
1024
1025// Return true if Val is equal to (setcc LHS, RHS, CC).
1026// Return false if Val is the inverse of (setcc LHS, RHS, CC).
1027// Otherwise, return std::nullopt.
1028static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
1029 ISD::CondCode CC, SDValue Val) {
1030 assert(Val->getOpcode() == ISD::SETCC);
1031 SDValue LHS2 = Val.getOperand(0);
1032 SDValue RHS2 = Val.getOperand(1);
1033 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
1034
1035 if (LHS == LHS2 && RHS == RHS2) {
1036 if (CC == CC2)
1037 return true;
1038 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1039 return false;
1040 } else if (LHS == RHS2 && RHS == LHS2) {
1042 if (CC == CC2)
1043 return true;
1044 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1045 return false;
1046 }
1047
1048 return std::nullopt;
1049}
1050
1052 const LoongArchSubtarget &Subtarget) {
1053 SDValue CondV = N->getOperand(0);
1054 SDValue TrueV = N->getOperand(1);
1055 SDValue FalseV = N->getOperand(2);
1056 MVT VT = N->getSimpleValueType(0);
1057 SDLoc DL(N);
1058
1059 // (select c, -1, y) -> -c | y
1060 if (isAllOnesConstant(TrueV)) {
1061 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1062 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
1063 }
1064 // (select c, y, -1) -> (c-1) | y
1065 if (isAllOnesConstant(FalseV)) {
1066 SDValue Neg =
1067 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1068 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
1069 }
1070
1071 // (select c, 0, y) -> (c-1) & y
1072 if (isNullConstant(TrueV)) {
1073 SDValue Neg =
1074 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1075 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
1076 }
1077 // (select c, y, 0) -> -c & y
1078 if (isNullConstant(FalseV)) {
1079 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1080 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
1081 }
1082
1083 // select c, ~x, x --> xor -c, x
1084 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
1085 const APInt &TrueVal = TrueV->getAsAPIntVal();
1086 const APInt &FalseVal = FalseV->getAsAPIntVal();
1087 if (~TrueVal == FalseVal) {
1088 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1089 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
1090 }
1091 }
1092
1093 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1094 // when both truev and falsev are also setcc.
1095 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1096 FalseV.getOpcode() == ISD::SETCC) {
1097 SDValue LHS = CondV.getOperand(0);
1098 SDValue RHS = CondV.getOperand(1);
1099 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1100
1101 // (select x, x, y) -> x | y
1102 // (select !x, x, y) -> x & y
1103 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1104 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1105 DAG.getFreeze(FalseV));
1106 }
1107 // (select x, y, x) -> x & y
1108 // (select !x, y, x) -> x | y
1109 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1110 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1111 DAG.getFreeze(TrueV), FalseV);
1112 }
1113 }
1114
1115 return SDValue();
1116}
1117
1118// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1119// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1120// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1121// being `0` or `-1`. In such cases we can replace `select` with `and`.
1122// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1123// than `c0`?
1124static SDValue
1126 const LoongArchSubtarget &Subtarget) {
1127 unsigned SelOpNo = 0;
1128 SDValue Sel = BO->getOperand(0);
1129 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1130 SelOpNo = 1;
1131 Sel = BO->getOperand(1);
1132 }
1133
1134 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1135 return SDValue();
1136
1137 unsigned ConstSelOpNo = 1;
1138 unsigned OtherSelOpNo = 2;
1139 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1140 ConstSelOpNo = 2;
1141 OtherSelOpNo = 1;
1142 }
1143 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1144 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1145 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1146 return SDValue();
1147
1148 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1149 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1150 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1151 return SDValue();
1152
1153 SDLoc DL(Sel);
1154 EVT VT = BO->getValueType(0);
1155
1156 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1157 if (SelOpNo == 1)
1158 std::swap(NewConstOps[0], NewConstOps[1]);
1159
1160 SDValue NewConstOp =
1161 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1162 if (!NewConstOp)
1163 return SDValue();
1164
1165 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1166 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1167 return SDValue();
1168
1169 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1170 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1171 if (SelOpNo == 1)
1172 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1173 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1174
1175 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1176 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1177 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1178}
1179
1180// Changes the condition code and swaps operands if necessary, so the SetCC
1181// operation matches one of the comparisons supported directly by branches
1182// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1183// compare with 1/-1.
1185 ISD::CondCode &CC, SelectionDAG &DAG) {
1186 // If this is a single bit test that can't be handled by ANDI, shift the
1187 // bit to be tested to the MSB and perform a signed compare with 0.
1188 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1189 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1190 isa<ConstantSDNode>(LHS.getOperand(1))) {
1191 uint64_t Mask = LHS.getConstantOperandVal(1);
1192 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1193 unsigned ShAmt = 0;
1194 if (isPowerOf2_64(Mask)) {
1195 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1196 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1197 } else {
1198 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1199 }
1200
1201 LHS = LHS.getOperand(0);
1202 if (ShAmt != 0)
1203 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1204 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1205 return;
1206 }
1207 }
1208
1209 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1210 int64_t C = RHSC->getSExtValue();
1211 switch (CC) {
1212 default:
1213 break;
1214 case ISD::SETGT:
1215 // Convert X > -1 to X >= 0.
1216 if (C == -1) {
1217 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1218 CC = ISD::SETGE;
1219 return;
1220 }
1221 break;
1222 case ISD::SETLT:
1223 // Convert X < 1 to 0 >= X.
1224 if (C == 1) {
1225 RHS = LHS;
1226 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1227 CC = ISD::SETGE;
1228 return;
1229 }
1230 break;
1231 }
1232 }
1233
1234 switch (CC) {
1235 default:
1236 break;
1237 case ISD::SETGT:
1238 case ISD::SETLE:
1239 case ISD::SETUGT:
1240 case ISD::SETULE:
1242 std::swap(LHS, RHS);
1243 break;
1244 }
1245}
1246
1247SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1248 SelectionDAG &DAG) const {
1249 SDValue CondV = Op.getOperand(0);
1250 SDValue TrueV = Op.getOperand(1);
1251 SDValue FalseV = Op.getOperand(2);
1252 SDLoc DL(Op);
1253 MVT VT = Op.getSimpleValueType();
1254 MVT GRLenVT = Subtarget.getGRLenVT();
1255
1256 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1257 return V;
1258
1259 if (Op.hasOneUse()) {
1260 unsigned UseOpc = Op->user_begin()->getOpcode();
1261 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1262 SDNode *BinOp = *Op->user_begin();
1263 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1264 DAG, Subtarget)) {
1265 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1266 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1267 // may return a constant node and cause crash in lowerSELECT.
1268 if (NewSel.getOpcode() == ISD::SELECT)
1269 return lowerSELECT(NewSel, DAG);
1270 return NewSel;
1271 }
1272 }
1273 }
1274
1275 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1276 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1277 // (select condv, truev, falsev)
1278 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1279 if (CondV.getOpcode() != ISD::SETCC ||
1280 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1281 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1282 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1283
1284 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1285
1286 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1287 }
1288
1289 // If the CondV is the output of a SETCC node which operates on GRLenVT
1290 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1291 // to take advantage of the integer compare+branch instructions. i.e.: (select
1292 // (setcc lhs, rhs, cc), truev, falsev)
1293 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1294 SDValue LHS = CondV.getOperand(0);
1295 SDValue RHS = CondV.getOperand(1);
1296 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1297
1298 // Special case for a select of 2 constants that have a difference of 1.
1299 // Normally this is done by DAGCombine, but if the select is introduced by
1300 // type legalization or op legalization, we miss it. Restricting to SETLT
1301 // case for now because that is what signed saturating add/sub need.
1302 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1303 // but we would probably want to swap the true/false values if the condition
1304 // is SETGE/SETLE to avoid an XORI.
1305 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1306 CCVal == ISD::SETLT) {
1307 const APInt &TrueVal = TrueV->getAsAPIntVal();
1308 const APInt &FalseVal = FalseV->getAsAPIntVal();
1309 if (TrueVal - 1 == FalseVal)
1310 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1311 if (TrueVal + 1 == FalseVal)
1312 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1313 }
1314
1315 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1316 // 1 < x ? x : 1 -> 0 < x ? x : 1
1317 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1318 RHS == TrueV && LHS == FalseV) {
1319 LHS = DAG.getConstant(0, DL, VT);
1320 // 0 <u x is the same as x != 0.
1321 if (CCVal == ISD::SETULT) {
1322 std::swap(LHS, RHS);
1323 CCVal = ISD::SETNE;
1324 }
1325 }
1326
1327 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1328 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1329 RHS == FalseV) {
1330 RHS = DAG.getConstant(0, DL, VT);
1331 }
1332
1333 SDValue TargetCC = DAG.getCondCode(CCVal);
1334
1335 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1336 // (select (setcc lhs, rhs, CC), constant, falsev)
1337 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1338 std::swap(TrueV, FalseV);
1339 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1340 }
1341
1342 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1343 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1344}
1345
1346SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1347 SelectionDAG &DAG) const {
1348 SDValue CondV = Op.getOperand(1);
1349 SDLoc DL(Op);
1350 MVT GRLenVT = Subtarget.getGRLenVT();
1351
1352 if (CondV.getOpcode() == ISD::SETCC) {
1353 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1354 SDValue LHS = CondV.getOperand(0);
1355 SDValue RHS = CondV.getOperand(1);
1356 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1357
1358 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1359
1360 SDValue TargetCC = DAG.getCondCode(CCVal);
1361 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1362 Op.getOperand(0), LHS, RHS, TargetCC,
1363 Op.getOperand(2));
1364 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1365 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1366 Op.getOperand(0), CondV, Op.getOperand(2));
1367 }
1368 }
1369
1370 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1371 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1372 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1373}
1374
1375SDValue
1376LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1377 SelectionDAG &DAG) const {
1378 SDLoc DL(Op);
1379 MVT OpVT = Op.getSimpleValueType();
1380
1381 SDValue Vector = DAG.getUNDEF(OpVT);
1382 SDValue Val = Op.getOperand(0);
1383 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1384
1385 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1386}
1387
1388SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1389 SelectionDAG &DAG) const {
1390 EVT ResTy = Op->getValueType(0);
1391 SDValue Src = Op->getOperand(0);
1392 SDLoc DL(Op);
1393
1394 // LoongArchISD::BITREV_8B is not supported on LA32.
1395 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1396 return SDValue();
1397
1398 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1399 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1400 unsigned int NewEltNum = NewVT.getVectorNumElements();
1401
1402 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1403
1405 for (unsigned int i = 0; i < NewEltNum; i++) {
1406 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1407 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1408 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1409 ? (unsigned)LoongArchISD::BITREV_8B
1410 : (unsigned)ISD::BITREVERSE;
1411 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1412 }
1413 SDValue Res =
1414 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1415
1416 switch (ResTy.getSimpleVT().SimpleTy) {
1417 default:
1418 return SDValue();
1419 case MVT::v16i8:
1420 case MVT::v32i8:
1421 return Res;
1422 case MVT::v8i16:
1423 case MVT::v16i16:
1424 case MVT::v4i32:
1425 case MVT::v8i32: {
1427 for (unsigned int i = 0; i < NewEltNum; i++)
1428 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1429 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1430 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1431 }
1432 }
1433}
1434
1435// Widen element type to get a new mask value (if possible).
1436// For example:
1437// shufflevector <4 x i32> %a, <4 x i32> %b,
1438// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1439// is equivalent to:
1440// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1441// can be lowered to:
1442// VPACKOD_D vr0, vr0, vr1
1444 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1445 unsigned EltBits = VT.getScalarSizeInBits();
1446
1447 if (EltBits > 32 || EltBits == 1)
1448 return SDValue();
1449
1450 SmallVector<int, 8> NewMask;
1451 if (widenShuffleMaskElts(Mask, NewMask)) {
1452 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1453 : MVT::getIntegerVT(EltBits * 2);
1454 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1455 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1456 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1457 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1458 return DAG.getBitcast(
1459 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1460 }
1461 }
1462
1463 return SDValue();
1464}
1465
1466/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1467/// instruction.
1468// The funciton matches elements from one of the input vector shuffled to the
1469// left or right with zeroable elements 'shifted in'. It handles both the
1470// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1471// lane.
1472// Mostly copied from X86.
1473static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1474 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1475 int MaskOffset, const APInt &Zeroable) {
1476 int Size = Mask.size();
1477 unsigned SizeInBits = Size * ScalarSizeInBits;
1478
1479 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1480 for (int i = 0; i < Size; i += Scale)
1481 for (int j = 0; j < Shift; ++j)
1482 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1483 return false;
1484
1485 return true;
1486 };
1487
1488 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1489 int Step = 1) {
1490 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1491 if (!(Mask[i] == -1 || Mask[i] == Low))
1492 return false;
1493 return true;
1494 };
1495
1496 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1497 for (int i = 0; i != Size; i += Scale) {
1498 unsigned Pos = Left ? i + Shift : i;
1499 unsigned Low = Left ? i : i + Shift;
1500 unsigned Len = Scale - Shift;
1501 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1502 return -1;
1503 }
1504
1505 int ShiftEltBits = ScalarSizeInBits * Scale;
1506 bool ByteShift = ShiftEltBits > 64;
1507 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1508 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1509 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1510
1511 // Normalize the scale for byte shifts to still produce an i64 element
1512 // type.
1513 Scale = ByteShift ? Scale / 2 : Scale;
1514
1515 // We need to round trip through the appropriate type for the shift.
1516 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1517 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1518 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1519 return (int)ShiftAmt;
1520 };
1521
1522 unsigned MaxWidth = 128;
1523 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1524 for (int Shift = 1; Shift != Scale; ++Shift)
1525 for (bool Left : {true, false})
1526 if (CheckZeros(Shift, Scale, Left)) {
1527 int ShiftAmt = MatchShift(Shift, Scale, Left);
1528 if (0 < ShiftAmt)
1529 return ShiftAmt;
1530 }
1531
1532 // no match
1533 return -1;
1534}
1535
1536/// Lower VECTOR_SHUFFLE as shift (if possible).
1537///
1538/// For example:
1539/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1540/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1541/// is lowered to:
1542/// (VBSLL_V $v0, $v0, 4)
1543///
1544/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1545/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1546/// is lowered to:
1547/// (VSLLI_D $v0, $v0, 32)
1549 MVT VT, SDValue V1, SDValue V2,
1550 SelectionDAG &DAG,
1551 const LoongArchSubtarget &Subtarget,
1552 const APInt &Zeroable) {
1553 int Size = Mask.size();
1554 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1555
1556 MVT ShiftVT;
1557 SDValue V = V1;
1558 unsigned Opcode;
1559
1560 // Try to match shuffle against V1 shift.
1561 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1562 Mask, 0, Zeroable);
1563
1564 // If V1 failed, try to match shuffle against V2 shift.
1565 if (ShiftAmt < 0) {
1566 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1567 Mask, Size, Zeroable);
1568 V = V2;
1569 }
1570
1571 if (ShiftAmt < 0)
1572 return SDValue();
1573
1574 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1575 "Illegal integer vector type");
1576 V = DAG.getBitcast(ShiftVT, V);
1577 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1578 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1579 return DAG.getBitcast(VT, V);
1580}
1581
1582/// Determine whether a range fits a regular pattern of values.
1583/// This function accounts for the possibility of jumping over the End iterator.
1584template <typename ValType>
1585static bool
1587 unsigned CheckStride,
1589 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1590 auto &I = Begin;
1591
1592 while (I != End) {
1593 if (*I != -1 && *I != ExpectedIndex)
1594 return false;
1595 ExpectedIndex += ExpectedIndexStride;
1596
1597 // Incrementing past End is undefined behaviour so we must increment one
1598 // step at a time and check for End at each step.
1599 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1600 ; // Empty loop body.
1601 }
1602 return true;
1603}
1604
1605/// Compute whether each element of a shuffle is zeroable.
1606///
1607/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1609 SDValue V2, APInt &KnownUndef,
1610 APInt &KnownZero) {
1611 int Size = Mask.size();
1612 KnownUndef = KnownZero = APInt::getZero(Size);
1613
1614 V1 = peekThroughBitcasts(V1);
1615 V2 = peekThroughBitcasts(V2);
1616
1617 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1618 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1619
1620 int VectorSizeInBits = V1.getValueSizeInBits();
1621 int ScalarSizeInBits = VectorSizeInBits / Size;
1622 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1623 (void)ScalarSizeInBits;
1624
1625 for (int i = 0; i < Size; ++i) {
1626 int M = Mask[i];
1627 if (M < 0) {
1628 KnownUndef.setBit(i);
1629 continue;
1630 }
1631 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1632 KnownZero.setBit(i);
1633 continue;
1634 }
1635 }
1636}
1637
1638/// Test whether a shuffle mask is equivalent within each sub-lane.
1639///
1640/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1641/// non-trivial to compute in the face of undef lanes. The representation is
1642/// suitable for use with existing 128-bit shuffles as entries from the second
1643/// vector have been remapped to [LaneSize, 2*LaneSize).
1644static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1645 ArrayRef<int> Mask,
1646 SmallVectorImpl<int> &RepeatedMask) {
1647 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1648 RepeatedMask.assign(LaneSize, -1);
1649 int Size = Mask.size();
1650 for (int i = 0; i < Size; ++i) {
1651 assert(Mask[i] == -1 || Mask[i] >= 0);
1652 if (Mask[i] < 0)
1653 continue;
1654 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1655 // This entry crosses lanes, so there is no way to model this shuffle.
1656 return false;
1657
1658 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1659 // Adjust second vector indices to start at LaneSize instead of Size.
1660 int LocalM =
1661 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1662 if (RepeatedMask[i % LaneSize] < 0)
1663 // This is the first non-undef entry in this slot of a 128-bit lane.
1664 RepeatedMask[i % LaneSize] = LocalM;
1665 else if (RepeatedMask[i % LaneSize] != LocalM)
1666 // Found a mismatch with the repeated mask.
1667 return false;
1668 }
1669 return true;
1670}
1671
1672/// Attempts to match vector shuffle as byte rotation.
1674 ArrayRef<int> Mask) {
1675
1676 SDValue Lo, Hi;
1677 SmallVector<int, 16> RepeatedMask;
1678
1679 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1680 return -1;
1681
1682 int NumElts = RepeatedMask.size();
1683 int Rotation = 0;
1684 int Scale = 16 / NumElts;
1685
1686 for (int i = 0; i < NumElts; ++i) {
1687 int M = RepeatedMask[i];
1688 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1689 "Unexpected mask index.");
1690 if (M < 0)
1691 continue;
1692
1693 // Determine where a rotated vector would have started.
1694 int StartIdx = i - (M % NumElts);
1695 if (StartIdx == 0)
1696 return -1;
1697
1698 // If we found the tail of a vector the rotation must be the missing
1699 // front. If we found the head of a vector, it must be how much of the
1700 // head.
1701 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1702
1703 if (Rotation == 0)
1704 Rotation = CandidateRotation;
1705 else if (Rotation != CandidateRotation)
1706 return -1;
1707
1708 // Compute which value this mask is pointing at.
1709 SDValue MaskV = M < NumElts ? V1 : V2;
1710
1711 // Compute which of the two target values this index should be assigned
1712 // to. This reflects whether the high elements are remaining or the low
1713 // elements are remaining.
1714 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1715
1716 // Either set up this value if we've not encountered it before, or check
1717 // that it remains consistent.
1718 if (!TargetV)
1719 TargetV = MaskV;
1720 else if (TargetV != MaskV)
1721 return -1;
1722 }
1723
1724 // Check that we successfully analyzed the mask, and normalize the results.
1725 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1726 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1727 if (!Lo)
1728 Lo = Hi;
1729 else if (!Hi)
1730 Hi = Lo;
1731
1732 V1 = Lo;
1733 V2 = Hi;
1734
1735 return Rotation * Scale;
1736}
1737
1738/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1739///
1740/// For example:
1741/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1742/// <2 x i32> <i32 3, i32 0>
1743/// is lowered to:
1744/// (VBSRL_V $v1, $v1, 8)
1745/// (VBSLL_V $v0, $v0, 8)
1746/// (VOR_V $v0, $V0, $v1)
1747static SDValue
1749 SDValue V1, SDValue V2, SelectionDAG &DAG,
1750 const LoongArchSubtarget &Subtarget) {
1751
1752 SDValue Lo = V1, Hi = V2;
1753 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1754 if (ByteRotation <= 0)
1755 return SDValue();
1756
1757 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1758 Lo = DAG.getBitcast(ByteVT, Lo);
1759 Hi = DAG.getBitcast(ByteVT, Hi);
1760
1761 int LoByteShift = 16 - ByteRotation;
1762 int HiByteShift = ByteRotation;
1763 MVT GRLenVT = Subtarget.getGRLenVT();
1764
1765 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1766 DAG.getConstant(LoByteShift, DL, GRLenVT));
1767 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1768 DAG.getConstant(HiByteShift, DL, GRLenVT));
1769 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1770}
1771
1772/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1773///
1774/// For example:
1775/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1776/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1777/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1778/// is lowered to:
1779/// (VREPLI $v1, 0)
1780/// (VILVL $v0, $v1, $v0)
1782 ArrayRef<int> Mask, MVT VT,
1783 SDValue V1, SDValue V2,
1784 SelectionDAG &DAG,
1785 const APInt &Zeroable) {
1786 int Bits = VT.getSizeInBits();
1787 int EltBits = VT.getScalarSizeInBits();
1788 int NumElements = VT.getVectorNumElements();
1789
1790 if (Zeroable.isAllOnes())
1791 return DAG.getConstant(0, DL, VT);
1792
1793 // Define a helper function to check a particular ext-scale and lower to it if
1794 // valid.
1795 auto Lower = [&](int Scale) -> SDValue {
1796 SDValue InputV;
1797 bool AnyExt = true;
1798 int Offset = 0;
1799 for (int i = 0; i < NumElements; i++) {
1800 int M = Mask[i];
1801 if (M < 0)
1802 continue;
1803 if (i % Scale != 0) {
1804 // Each of the extended elements need to be zeroable.
1805 if (!Zeroable[i])
1806 return SDValue();
1807
1808 AnyExt = false;
1809 continue;
1810 }
1811
1812 // Each of the base elements needs to be consecutive indices into the
1813 // same input vector.
1814 SDValue V = M < NumElements ? V1 : V2;
1815 M = M % NumElements;
1816 if (!InputV) {
1817 InputV = V;
1818 Offset = M - (i / Scale);
1819
1820 // These offset can't be handled
1821 if (Offset % (NumElements / Scale))
1822 return SDValue();
1823 } else if (InputV != V)
1824 return SDValue();
1825
1826 if (M != (Offset + (i / Scale)))
1827 return SDValue(); // Non-consecutive strided elements.
1828 }
1829
1830 // If we fail to find an input, we have a zero-shuffle which should always
1831 // have already been handled.
1832 if (!InputV)
1833 return SDValue();
1834
1835 do {
1836 unsigned VilVLoHi = LoongArchISD::VILVL;
1837 if (Offset >= (NumElements / 2)) {
1838 VilVLoHi = LoongArchISD::VILVH;
1839 Offset -= (NumElements / 2);
1840 }
1841
1842 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1843 SDValue Ext =
1844 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1845 InputV = DAG.getBitcast(InputVT, InputV);
1846 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1847 Scale /= 2;
1848 EltBits *= 2;
1849 NumElements /= 2;
1850 } while (Scale > 1);
1851 return DAG.getBitcast(VT, InputV);
1852 };
1853
1854 // Each iteration, try extending the elements half as much, but into twice as
1855 // many elements.
1856 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1857 NumExtElements *= 2) {
1858 if (SDValue V = Lower(NumElements / NumExtElements))
1859 return V;
1860 }
1861 return SDValue();
1862}
1863
1864/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1865///
1866/// VREPLVEI performs vector broadcast based on an element specified by an
1867/// integer immediate, with its mask being similar to:
1868/// <x, x, x, ...>
1869/// where x is any valid index.
1870///
1871/// When undef's appear in the mask they are treated as if they were whatever
1872/// value is necessary in order to fit the above form.
1873static SDValue
1875 SDValue V1, SelectionDAG &DAG,
1876 const LoongArchSubtarget &Subtarget) {
1877 int SplatIndex = -1;
1878 for (const auto &M : Mask) {
1879 if (M != -1) {
1880 SplatIndex = M;
1881 break;
1882 }
1883 }
1884
1885 if (SplatIndex == -1)
1886 return DAG.getUNDEF(VT);
1887
1888 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1889 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1890 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1891 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1892 }
1893
1894 return SDValue();
1895}
1896
1897/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1898///
1899/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1900/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1901///
1902/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1903/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1904/// When undef's appear they are treated as if they were whatever value is
1905/// necessary in order to fit the above forms.
1906///
1907/// For example:
1908/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1909/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1910/// i32 7, i32 6, i32 5, i32 4>
1911/// is lowered to:
1912/// (VSHUF4I_H $v0, $v1, 27)
1913/// where the 27 comes from:
1914/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1915static SDValue
1917 SDValue V1, SDValue V2, SelectionDAG &DAG,
1918 const LoongArchSubtarget &Subtarget) {
1919
1920 unsigned SubVecSize = 4;
1921 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1922 SubVecSize = 2;
1923
1924 int SubMask[4] = {-1, -1, -1, -1};
1925 for (unsigned i = 0; i < SubVecSize; ++i) {
1926 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1927 int M = Mask[j];
1928
1929 // Convert from vector index to 4-element subvector index
1930 // If an index refers to an element outside of the subvector then give up
1931 if (M != -1) {
1932 M -= 4 * (j / SubVecSize);
1933 if (M < 0 || M >= 4)
1934 return SDValue();
1935 }
1936
1937 // If the mask has an undef, replace it with the current index.
1938 // Note that it might still be undef if the current index is also undef
1939 if (SubMask[i] == -1)
1940 SubMask[i] = M;
1941 // Check that non-undef values are the same as in the mask. If they
1942 // aren't then give up
1943 else if (M != -1 && M != SubMask[i])
1944 return SDValue();
1945 }
1946 }
1947
1948 // Calculate the immediate. Replace any remaining undefs with zero
1949 int Imm = 0;
1950 for (int i = SubVecSize - 1; i >= 0; --i) {
1951 int M = SubMask[i];
1952
1953 if (M == -1)
1954 M = 0;
1955
1956 Imm <<= 2;
1957 Imm |= M & 0x3;
1958 }
1959
1960 MVT GRLenVT = Subtarget.getGRLenVT();
1961
1962 // Return vshuf4i.d
1963 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1964 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
1965 DAG.getConstant(Imm, DL, GRLenVT));
1966
1967 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1968 DAG.getConstant(Imm, DL, GRLenVT));
1969}
1970
1971/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1972///
1973/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1974/// reverse whose mask likes:
1975/// <7, 6, 5, 4, 3, 2, 1, 0>
1976///
1977/// When undef's appear in the mask they are treated as if they were whatever
1978/// value is necessary in order to fit the above forms.
1979static SDValue
1981 SDValue V1, SelectionDAG &DAG,
1982 const LoongArchSubtarget &Subtarget) {
1983 // Only vectors with i8/i16 elements which cannot match other patterns
1984 // directly needs to do this.
1985 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1986 VT != MVT::v16i16)
1987 return SDValue();
1988
1989 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1990 return SDValue();
1991
1992 int WidenNumElts = VT.getVectorNumElements() / 4;
1993 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1994 for (int i = 0; i < WidenNumElts; ++i)
1995 WidenMask[i] = WidenNumElts - 1 - i;
1996
1997 MVT WidenVT = MVT::getVectorVT(
1998 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1999 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
2000 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
2001 DAG.getUNDEF(WidenVT), WidenMask);
2002
2003 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
2004 DAG.getBitcast(VT, WidenRev),
2005 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
2006}
2007
2008/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
2009///
2010/// VPACKEV interleaves the even elements from each vector.
2011///
2012/// It is possible to lower into VPACKEV when the mask consists of two of the
2013/// following forms interleaved:
2014/// <0, 2, 4, ...>
2015/// <n, n+2, n+4, ...>
2016/// where n is the number of elements in the vector.
2017/// For example:
2018/// <0, 0, 2, 2, 4, 4, ...>
2019/// <0, n, 2, n+2, 4, n+4, ...>
2020///
2021/// When undef's appear in the mask they are treated as if they were whatever
2022/// value is necessary in order to fit the above forms.
2024 MVT VT, SDValue V1, SDValue V2,
2025 SelectionDAG &DAG) {
2026
2027 const auto &Begin = Mask.begin();
2028 const auto &End = Mask.end();
2029 SDValue OriV1 = V1, OriV2 = V2;
2030
2031 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2032 V1 = OriV1;
2033 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
2034 V1 = OriV2;
2035 else
2036 return SDValue();
2037
2038 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2039 V2 = OriV1;
2040 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
2041 V2 = OriV2;
2042 else
2043 return SDValue();
2044
2045 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
2046}
2047
2048/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
2049///
2050/// VPACKOD interleaves the odd elements from each vector.
2051///
2052/// It is possible to lower into VPACKOD when the mask consists of two of the
2053/// following forms interleaved:
2054/// <1, 3, 5, ...>
2055/// <n+1, n+3, n+5, ...>
2056/// where n is the number of elements in the vector.
2057/// For example:
2058/// <1, 1, 3, 3, 5, 5, ...>
2059/// <1, n+1, 3, n+3, 5, n+5, ...>
2060///
2061/// When undef's appear in the mask they are treated as if they were whatever
2062/// value is necessary in order to fit the above forms.
2064 MVT VT, SDValue V1, SDValue V2,
2065 SelectionDAG &DAG) {
2066
2067 const auto &Begin = Mask.begin();
2068 const auto &End = Mask.end();
2069 SDValue OriV1 = V1, OriV2 = V2;
2070
2071 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2072 V1 = OriV1;
2073 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
2074 V1 = OriV2;
2075 else
2076 return SDValue();
2077
2078 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2079 V2 = OriV1;
2080 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
2081 V2 = OriV2;
2082 else
2083 return SDValue();
2084
2085 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
2086}
2087
2088/// Lower VECTOR_SHUFFLE into VILVH (if possible).
2089///
2090/// VILVH interleaves consecutive elements from the left (highest-indexed) half
2091/// of each vector.
2092///
2093/// It is possible to lower into VILVH when the mask consists of two of the
2094/// following forms interleaved:
2095/// <x, x+1, x+2, ...>
2096/// <n+x, n+x+1, n+x+2, ...>
2097/// where n is the number of elements in the vector and x is half n.
2098/// For example:
2099/// <x, x, x+1, x+1, x+2, x+2, ...>
2100/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2101///
2102/// When undef's appear in the mask they are treated as if they were whatever
2103/// value is necessary in order to fit the above forms.
2105 MVT VT, SDValue V1, SDValue V2,
2106 SelectionDAG &DAG) {
2107
2108 const auto &Begin = Mask.begin();
2109 const auto &End = Mask.end();
2110 unsigned HalfSize = Mask.size() / 2;
2111 SDValue OriV1 = V1, OriV2 = V2;
2112
2113 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2114 V1 = OriV1;
2115 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2116 V1 = OriV2;
2117 else
2118 return SDValue();
2119
2120 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2121 V2 = OriV1;
2122 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2123 1))
2124 V2 = OriV2;
2125 else
2126 return SDValue();
2127
2128 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2129}
2130
2131/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2132///
2133/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2134/// of each vector.
2135///
2136/// It is possible to lower into VILVL when the mask consists of two of the
2137/// following forms interleaved:
2138/// <0, 1, 2, ...>
2139/// <n, n+1, n+2, ...>
2140/// where n is the number of elements in the vector.
2141/// For example:
2142/// <0, 0, 1, 1, 2, 2, ...>
2143/// <0, n, 1, n+1, 2, n+2, ...>
2144///
2145/// When undef's appear in the mask they are treated as if they were whatever
2146/// value is necessary in order to fit the above forms.
2148 MVT VT, SDValue V1, SDValue V2,
2149 SelectionDAG &DAG) {
2150
2151 const auto &Begin = Mask.begin();
2152 const auto &End = Mask.end();
2153 SDValue OriV1 = V1, OriV2 = V2;
2154
2155 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2156 V1 = OriV1;
2157 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2158 V1 = OriV2;
2159 else
2160 return SDValue();
2161
2162 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2163 V2 = OriV1;
2164 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2165 V2 = OriV2;
2166 else
2167 return SDValue();
2168
2169 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2170}
2171
2172/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2173///
2174/// VPICKEV copies the even elements of each vector into the result vector.
2175///
2176/// It is possible to lower into VPICKEV when the mask consists of two of the
2177/// following forms concatenated:
2178/// <0, 2, 4, ...>
2179/// <n, n+2, n+4, ...>
2180/// where n is the number of elements in the vector.
2181/// For example:
2182/// <0, 2, 4, ..., 0, 2, 4, ...>
2183/// <0, 2, 4, ..., n, n+2, n+4, ...>
2184///
2185/// When undef's appear in the mask they are treated as if they were whatever
2186/// value is necessary in order to fit the above forms.
2188 MVT VT, SDValue V1, SDValue V2,
2189 SelectionDAG &DAG) {
2190
2191 const auto &Begin = Mask.begin();
2192 const auto &Mid = Mask.begin() + Mask.size() / 2;
2193 const auto &End = Mask.end();
2194 SDValue OriV1 = V1, OriV2 = V2;
2195
2196 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2197 V1 = OriV1;
2198 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2199 V1 = OriV2;
2200 else
2201 return SDValue();
2202
2203 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2204 V2 = OriV1;
2205 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2206 V2 = OriV2;
2207
2208 else
2209 return SDValue();
2210
2211 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2212}
2213
2214/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2215///
2216/// VPICKOD copies the odd elements of each vector into the result vector.
2217///
2218/// It is possible to lower into VPICKOD when the mask consists of two of the
2219/// following forms concatenated:
2220/// <1, 3, 5, ...>
2221/// <n+1, n+3, n+5, ...>
2222/// where n is the number of elements in the vector.
2223/// For example:
2224/// <1, 3, 5, ..., 1, 3, 5, ...>
2225/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2226///
2227/// When undef's appear in the mask they are treated as if they were whatever
2228/// value is necessary in order to fit the above forms.
2230 MVT VT, SDValue V1, SDValue V2,
2231 SelectionDAG &DAG) {
2232
2233 const auto &Begin = Mask.begin();
2234 const auto &Mid = Mask.begin() + Mask.size() / 2;
2235 const auto &End = Mask.end();
2236 SDValue OriV1 = V1, OriV2 = V2;
2237
2238 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2239 V1 = OriV1;
2240 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2241 V1 = OriV2;
2242 else
2243 return SDValue();
2244
2245 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2246 V2 = OriV1;
2247 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2248 V2 = OriV2;
2249 else
2250 return SDValue();
2251
2252 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2253}
2254
2255/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2256///
2257/// VEXTRINS copies one element of a vector into any place of the result
2258/// vector and makes no change to the rest elements of the result vector.
2259///
2260/// It is possible to lower into VEXTRINS when the mask takes the form:
2261/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2262/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2263/// where n is the number of elements in the vector and i is in [0, n).
2264/// For example:
2265/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2266/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2267///
2268/// When undef's appear in the mask they are treated as if they were whatever
2269/// value is necessary in order to fit the above forms.
2270static SDValue
2272 SDValue V1, SDValue V2, SelectionDAG &DAG,
2273 const LoongArchSubtarget &Subtarget) {
2274 unsigned NumElts = VT.getVectorNumElements();
2275 MVT EltVT = VT.getVectorElementType();
2276 MVT GRLenVT = Subtarget.getGRLenVT();
2277
2278 if (Mask.size() != NumElts)
2279 return SDValue();
2280
2281 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2282 int DiffCount = 0;
2283 int DiffPos = -1;
2284 for (unsigned i = 0; i < NumElts; ++i) {
2285 if (Mask[i] == -1)
2286 continue;
2287 if (Mask[i] != int(Base + i)) {
2288 ++DiffCount;
2289 DiffPos = int(i);
2290 if (DiffCount > 1)
2291 return SDValue();
2292 }
2293 }
2294
2295 // Need exactly one differing element to lower into VEXTRINS.
2296 if (DiffCount != 1)
2297 return SDValue();
2298
2299 // DiffMask must be in [0, 2N).
2300 int DiffMask = Mask[DiffPos];
2301 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2302 return SDValue();
2303
2304 // Determine source vector and source index.
2305 SDValue SrcVec;
2306 unsigned SrcIdx;
2307 if (unsigned(DiffMask) < NumElts) {
2308 SrcVec = V1;
2309 SrcIdx = unsigned(DiffMask);
2310 } else {
2311 SrcVec = V2;
2312 SrcIdx = unsigned(DiffMask) - NumElts;
2313 }
2314
2315 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2316 // patterns of VEXTRINS in tablegen.
2317 SDValue Extracted = DAG.getNode(
2318 ISD::EXTRACT_VECTOR_ELT, DL, EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2319 SrcVec, DAG.getConstant(SrcIdx, DL, GRLenVT));
2320 SDValue Result =
2321 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, (Base == 0) ? V1 : V2,
2322 Extracted, DAG.getConstant(DiffPos, DL, GRLenVT));
2323
2324 return Result;
2325 };
2326
2327 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2328 if (SDValue Result = tryLowerToExtrAndIns(0))
2329 return Result;
2330 return tryLowerToExtrAndIns(NumElts);
2331}
2332
2333/// Lower VECTOR_SHUFFLE into VSHUF.
2334///
2335/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2336/// adding it as an operand to the resulting VSHUF.
2338 MVT VT, SDValue V1, SDValue V2,
2339 SelectionDAG &DAG,
2340 const LoongArchSubtarget &Subtarget) {
2341
2343 for (auto M : Mask)
2344 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2345
2346 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2347 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2348
2349 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2350 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2351 // VSHF concatenates the vectors in a bitwise fashion:
2352 // <0b00, 0b01> + <0b10, 0b11> ->
2353 // 0b0100 + 0b1110 -> 0b01001110
2354 // <0b10, 0b11, 0b00, 0b01>
2355 // We must therefore swap the operands to get the correct result.
2356 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2357}
2358
2359/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2360///
2361/// This routine breaks down the specific type of 128-bit shuffle and
2362/// dispatches to the lowering routines accordingly.
2364 SDValue V1, SDValue V2, SelectionDAG &DAG,
2365 const LoongArchSubtarget &Subtarget) {
2366 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2367 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2368 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2369 "Vector type is unsupported for lsx!");
2371 "Two operands have different types!");
2372 assert(VT.getVectorNumElements() == Mask.size() &&
2373 "Unexpected mask size for shuffle!");
2374 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2375
2376 APInt KnownUndef, KnownZero;
2377 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2378 APInt Zeroable = KnownUndef | KnownZero;
2379
2380 SDValue Result;
2381 // TODO: Add more comparison patterns.
2382 if (V2.isUndef()) {
2383 if ((Result =
2384 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2385 return Result;
2386 if ((Result =
2387 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2388 return Result;
2389 if ((Result =
2390 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2391 return Result;
2392
2393 // TODO: This comment may be enabled in the future to better match the
2394 // pattern for instruction selection.
2395 /* V2 = V1; */
2396 }
2397
2398 // It is recommended not to change the pattern comparison order for better
2399 // performance.
2400 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2401 return Result;
2402 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2403 return Result;
2404 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2405 return Result;
2406 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2407 return Result;
2408 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2409 return Result;
2410 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2411 return Result;
2412 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2413 (Result =
2414 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2415 return Result;
2416 if ((Result =
2417 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2418 return Result;
2419 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2420 Zeroable)))
2421 return Result;
2422 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2423 Zeroable)))
2424 return Result;
2425 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2426 Subtarget)))
2427 return Result;
2428 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2429 return NewShuffle;
2430 if ((Result =
2431 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2432 return Result;
2433 return SDValue();
2434}
2435
2436/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2437///
2438/// It is a XVREPLVEI when the mask is:
2439/// <x, x, x, ..., x+n, x+n, x+n, ...>
2440/// where the number of x is equal to n and n is half the length of vector.
2441///
2442/// When undef's appear in the mask they are treated as if they were whatever
2443/// value is necessary in order to fit the above form.
2444static SDValue
2446 SDValue V1, SelectionDAG &DAG,
2447 const LoongArchSubtarget &Subtarget) {
2448 int SplatIndex = -1;
2449 for (const auto &M : Mask) {
2450 if (M != -1) {
2451 SplatIndex = M;
2452 break;
2453 }
2454 }
2455
2456 if (SplatIndex == -1)
2457 return DAG.getUNDEF(VT);
2458
2459 const auto &Begin = Mask.begin();
2460 const auto &End = Mask.end();
2461 int HalfSize = Mask.size() / 2;
2462
2463 if (SplatIndex >= HalfSize)
2464 return SDValue();
2465
2466 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2467 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2468 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2469 0)) {
2470 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2471 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2472 }
2473
2474 return SDValue();
2475}
2476
2477/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2478static SDValue
2480 SDValue V1, SDValue V2, SelectionDAG &DAG,
2481 const LoongArchSubtarget &Subtarget) {
2482 // When the size is less than or equal to 4, lower cost instructions may be
2483 // used.
2484 if (Mask.size() <= 4)
2485 return SDValue();
2486 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2487}
2488
2489/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2490static SDValue
2492 SDValue V1, SelectionDAG &DAG,
2493 const LoongArchSubtarget &Subtarget) {
2494 // Only consider XVPERMI_D.
2495 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2496 return SDValue();
2497
2498 unsigned MaskImm = 0;
2499 for (unsigned i = 0; i < Mask.size(); ++i) {
2500 if (Mask[i] == -1)
2501 continue;
2502 MaskImm |= Mask[i] << (i * 2);
2503 }
2504
2505 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2506 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2507}
2508
2509/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2511 MVT VT, SDValue V1, SelectionDAG &DAG,
2512 const LoongArchSubtarget &Subtarget) {
2513 // LoongArch LASX only have XVPERM_W.
2514 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2515 return SDValue();
2516
2517 unsigned NumElts = VT.getVectorNumElements();
2518 unsigned HalfSize = NumElts / 2;
2519 bool FrontLo = true, FrontHi = true;
2520 bool BackLo = true, BackHi = true;
2521
2522 auto inRange = [](int val, int low, int high) {
2523 return (val == -1) || (val >= low && val < high);
2524 };
2525
2526 for (unsigned i = 0; i < HalfSize; ++i) {
2527 int Fronti = Mask[i];
2528 int Backi = Mask[i + HalfSize];
2529
2530 FrontLo &= inRange(Fronti, 0, HalfSize);
2531 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2532 BackLo &= inRange(Backi, 0, HalfSize);
2533 BackHi &= inRange(Backi, HalfSize, NumElts);
2534 }
2535
2536 // If both the lower and upper 128-bit parts access only one half of the
2537 // vector (either lower or upper), avoid using xvperm.w. The latency of
2538 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2539 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2540 return SDValue();
2541
2543 MVT GRLenVT = Subtarget.getGRLenVT();
2544 for (unsigned i = 0; i < NumElts; ++i)
2545 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2546 : DAG.getConstant(Mask[i], DL, GRLenVT));
2547 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2548
2549 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2550}
2551
2552/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2554 MVT VT, SDValue V1, SDValue V2,
2555 SelectionDAG &DAG) {
2556 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2557}
2558
2559/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2561 MVT VT, SDValue V1, SDValue V2,
2562 SelectionDAG &DAG) {
2563 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2564}
2565
2566/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2568 MVT VT, SDValue V1, SDValue V2,
2569 SelectionDAG &DAG) {
2570
2571 const auto &Begin = Mask.begin();
2572 const auto &End = Mask.end();
2573 unsigned HalfSize = Mask.size() / 2;
2574 unsigned LeftSize = HalfSize / 2;
2575 SDValue OriV1 = V1, OriV2 = V2;
2576
2577 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2578 1) &&
2579 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2580 V1 = OriV1;
2581 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2582 Mask.size() + HalfSize - LeftSize, 1) &&
2583 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2584 Mask.size() + HalfSize + LeftSize, 1))
2585 V1 = OriV2;
2586 else
2587 return SDValue();
2588
2589 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2590 1) &&
2591 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2592 1))
2593 V2 = OriV1;
2594 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2595 Mask.size() + HalfSize - LeftSize, 1) &&
2596 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2597 Mask.size() + HalfSize + LeftSize, 1))
2598 V2 = OriV2;
2599 else
2600 return SDValue();
2601
2602 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2603}
2604
2605/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2607 MVT VT, SDValue V1, SDValue V2,
2608 SelectionDAG &DAG) {
2609
2610 const auto &Begin = Mask.begin();
2611 const auto &End = Mask.end();
2612 unsigned HalfSize = Mask.size() / 2;
2613 SDValue OriV1 = V1, OriV2 = V2;
2614
2615 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2616 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2617 V1 = OriV1;
2618 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2619 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2620 Mask.size() + HalfSize, 1))
2621 V1 = OriV2;
2622 else
2623 return SDValue();
2624
2625 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2626 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2627 V2 = OriV1;
2628 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2629 1) &&
2630 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2631 Mask.size() + HalfSize, 1))
2632 V2 = OriV2;
2633 else
2634 return SDValue();
2635
2636 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2637}
2638
2639/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2641 MVT VT, SDValue V1, SDValue V2,
2642 SelectionDAG &DAG) {
2643
2644 const auto &Begin = Mask.begin();
2645 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2646 const auto &Mid = Mask.begin() + Mask.size() / 2;
2647 const auto &RightMid = Mask.end() - Mask.size() / 4;
2648 const auto &End = Mask.end();
2649 unsigned HalfSize = Mask.size() / 2;
2650 SDValue OriV1 = V1, OriV2 = V2;
2651
2652 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2653 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2654 V1 = OriV1;
2655 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2656 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2657 V1 = OriV2;
2658 else
2659 return SDValue();
2660
2661 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2662 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2663 V2 = OriV1;
2664 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2665 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2666 V2 = OriV2;
2667
2668 else
2669 return SDValue();
2670
2671 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2672}
2673
2674/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2676 MVT VT, SDValue V1, SDValue V2,
2677 SelectionDAG &DAG) {
2678
2679 const auto &Begin = Mask.begin();
2680 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2681 const auto &Mid = Mask.begin() + Mask.size() / 2;
2682 const auto &RightMid = Mask.end() - Mask.size() / 4;
2683 const auto &End = Mask.end();
2684 unsigned HalfSize = Mask.size() / 2;
2685 SDValue OriV1 = V1, OriV2 = V2;
2686
2687 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2688 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2689 V1 = OriV1;
2690 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2691 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2692 2))
2693 V1 = OriV2;
2694 else
2695 return SDValue();
2696
2697 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2698 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2699 V2 = OriV1;
2700 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2701 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2702 2))
2703 V2 = OriV2;
2704 else
2705 return SDValue();
2706
2707 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2708}
2709
2710/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2711static SDValue
2713 SDValue V1, SDValue V2, SelectionDAG &DAG,
2714 const LoongArchSubtarget &Subtarget) {
2715 // LoongArch LASX only supports xvinsve0.{w/d}.
2716 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2717 VT != MVT::v4f64)
2718 return SDValue();
2719
2720 MVT GRLenVT = Subtarget.getGRLenVT();
2721 int MaskSize = Mask.size();
2722 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2723
2724 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2725 // all other elements are either 'Base + i' or undef (-1). On success, return
2726 // the index of the replaced element. Otherwise, just return -1.
2727 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2728 int Idx = -1;
2729 for (int i = 0; i < MaskSize; ++i) {
2730 if (Mask[i] == Base + i || Mask[i] == -1)
2731 continue;
2732 if (Mask[i] != Replaced)
2733 return -1;
2734 if (Idx == -1)
2735 Idx = i;
2736 else
2737 return -1;
2738 }
2739 return Idx;
2740 };
2741
2742 // Case 1: the lowest element of V2 replaces one element in V1.
2743 int Idx = checkReplaceOne(0, MaskSize);
2744 if (Idx != -1)
2745 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2746 DAG.getConstant(Idx, DL, GRLenVT));
2747
2748 // Case 2: the lowest element of V1 replaces one element in V2.
2749 Idx = checkReplaceOne(MaskSize, 0);
2750 if (Idx != -1)
2751 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2752 DAG.getConstant(Idx, DL, GRLenVT));
2753
2754 return SDValue();
2755}
2756
2757/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2759 MVT VT, SDValue V1, SDValue V2,
2760 SelectionDAG &DAG) {
2761
2762 int MaskSize = Mask.size();
2763 int HalfSize = Mask.size() / 2;
2764 const auto &Begin = Mask.begin();
2765 const auto &Mid = Mask.begin() + HalfSize;
2766 const auto &End = Mask.end();
2767
2768 // VECTOR_SHUFFLE concatenates the vectors:
2769 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2770 // shuffling ->
2771 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2772 //
2773 // XVSHUF concatenates the vectors:
2774 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2775 // shuffling ->
2776 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2777 SmallVector<SDValue, 8> MaskAlloc;
2778 for (auto it = Begin; it < Mid; it++) {
2779 if (*it < 0) // UNDEF
2780 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2781 else if ((*it >= 0 && *it < HalfSize) ||
2782 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2783 int M = *it < HalfSize ? *it : *it - HalfSize;
2784 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2785 } else
2786 return SDValue();
2787 }
2788 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2789
2790 for (auto it = Mid; it < End; it++) {
2791 if (*it < 0) // UNDEF
2792 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2793 else if ((*it >= HalfSize && *it < MaskSize) ||
2794 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2795 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2796 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2797 } else
2798 return SDValue();
2799 }
2800 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2801
2802 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2803 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2804 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2805}
2806
2807/// Shuffle vectors by lane to generate more optimized instructions.
2808/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2809///
2810/// Therefore, except for the following four cases, other cases are regarded
2811/// as cross-lane shuffles, where optimization is relatively limited.
2812///
2813/// - Shuffle high, low lanes of two inputs vector
2814/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2815/// - Shuffle low, high lanes of two inputs vector
2816/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2817/// - Shuffle low, low lanes of two inputs vector
2818/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2819/// - Shuffle high, high lanes of two inputs vector
2820/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2821///
2822/// The first case is the closest to LoongArch instructions and the other
2823/// cases need to be converted to it for processing.
2824///
2825/// This function will return true for the last three cases above and will
2826/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2827/// cross-lane shuffle cases.
2829 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2830 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2831
2832 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2833
2834 int MaskSize = Mask.size();
2835 int HalfSize = Mask.size() / 2;
2836 MVT GRLenVT = Subtarget.getGRLenVT();
2837
2838 HalfMaskType preMask = None, postMask = None;
2839
2840 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2841 return M < 0 || (M >= 0 && M < HalfSize) ||
2842 (M >= MaskSize && M < MaskSize + HalfSize);
2843 }))
2844 preMask = HighLaneTy;
2845 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2846 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2847 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2848 }))
2849 preMask = LowLaneTy;
2850
2851 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2852 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2853 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2854 }))
2855 postMask = LowLaneTy;
2856 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2857 return M < 0 || (M >= 0 && M < HalfSize) ||
2858 (M >= MaskSize && M < MaskSize + HalfSize);
2859 }))
2860 postMask = HighLaneTy;
2861
2862 // The pre-half of mask is high lane type, and the post-half of mask
2863 // is low lane type, which is closest to the LoongArch instructions.
2864 //
2865 // Note: In the LoongArch architecture, the high lane of mask corresponds
2866 // to the lower 128-bit of vector register, and the low lane of mask
2867 // corresponds the higher 128-bit of vector register.
2868 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2869 return false;
2870 }
2871 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2872 V1 = DAG.getBitcast(MVT::v4i64, V1);
2873 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2874 DAG.getConstant(0b01001110, DL, GRLenVT));
2875 V1 = DAG.getBitcast(VT, V1);
2876
2877 if (!V2.isUndef()) {
2878 V2 = DAG.getBitcast(MVT::v4i64, V2);
2879 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2880 DAG.getConstant(0b01001110, DL, GRLenVT));
2881 V2 = DAG.getBitcast(VT, V2);
2882 }
2883
2884 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2885 *it = *it < 0 ? *it : *it - HalfSize;
2886 }
2887 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2888 *it = *it < 0 ? *it : *it + HalfSize;
2889 }
2890 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2891 V1 = DAG.getBitcast(MVT::v4i64, V1);
2892 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2893 DAG.getConstant(0b11101110, DL, GRLenVT));
2894 V1 = DAG.getBitcast(VT, V1);
2895
2896 if (!V2.isUndef()) {
2897 V2 = DAG.getBitcast(MVT::v4i64, V2);
2898 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2899 DAG.getConstant(0b11101110, DL, GRLenVT));
2900 V2 = DAG.getBitcast(VT, V2);
2901 }
2902
2903 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2904 *it = *it < 0 ? *it : *it - HalfSize;
2905 }
2906 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2907 V1 = DAG.getBitcast(MVT::v4i64, V1);
2908 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2909 DAG.getConstant(0b01000100, DL, GRLenVT));
2910 V1 = DAG.getBitcast(VT, V1);
2911
2912 if (!V2.isUndef()) {
2913 V2 = DAG.getBitcast(MVT::v4i64, V2);
2914 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2915 DAG.getConstant(0b01000100, DL, GRLenVT));
2916 V2 = DAG.getBitcast(VT, V2);
2917 }
2918
2919 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2920 *it = *it < 0 ? *it : *it + HalfSize;
2921 }
2922 } else { // cross-lane
2923 return false;
2924 }
2925
2926 return true;
2927}
2928
2929/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2930/// Only for 256-bit vector.
2931///
2932/// For example:
2933/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2934/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2935/// is lowerded to:
2936/// (XVPERMI $xr2, $xr0, 78)
2937/// (XVSHUF $xr1, $xr2, $xr0)
2938/// (XVORI $xr0, $xr1, 0)
2940 ArrayRef<int> Mask,
2941 MVT VT, SDValue V1,
2942 SDValue V2,
2943 SelectionDAG &DAG) {
2944 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2945 int Size = Mask.size();
2946 int LaneSize = Size / 2;
2947
2948 bool LaneCrossing[2] = {false, false};
2949 for (int i = 0; i < Size; ++i)
2950 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2951 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2952
2953 // Ensure that all lanes ared involved.
2954 if (!LaneCrossing[0] && !LaneCrossing[1])
2955 return SDValue();
2956
2957 SmallVector<int> InLaneMask;
2958 InLaneMask.assign(Mask.begin(), Mask.end());
2959 for (int i = 0; i < Size; ++i) {
2960 int &M = InLaneMask[i];
2961 if (M < 0)
2962 continue;
2963 if (((M % Size) / LaneSize) != (i / LaneSize))
2964 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2965 }
2966
2967 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2968 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2969 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2970 Flipped = DAG.getBitcast(VT, Flipped);
2971 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2972}
2973
2974/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2975///
2976/// This routine breaks down the specific type of 256-bit shuffle and
2977/// dispatches to the lowering routines accordingly.
2979 SDValue V1, SDValue V2, SelectionDAG &DAG,
2980 const LoongArchSubtarget &Subtarget) {
2981 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2982 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2983 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2984 "Vector type is unsupported for lasx!");
2986 "Two operands have different types!");
2987 assert(VT.getVectorNumElements() == Mask.size() &&
2988 "Unexpected mask size for shuffle!");
2989 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2990 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2991
2992 APInt KnownUndef, KnownZero;
2993 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2994 APInt Zeroable = KnownUndef | KnownZero;
2995
2996 SDValue Result;
2997 // TODO: Add more comparison patterns.
2998 if (V2.isUndef()) {
2999 if ((Result =
3000 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
3001 return Result;
3002 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
3003 Subtarget)))
3004 return Result;
3005 // Try to widen vectors to gain more optimization opportunities.
3006 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
3007 return NewShuffle;
3008 if ((Result =
3009 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
3010 return Result;
3011 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
3012 return Result;
3013 if ((Result =
3014 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
3015 return Result;
3016
3017 // TODO: This comment may be enabled in the future to better match the
3018 // pattern for instruction selection.
3019 /* V2 = V1; */
3020 }
3021
3022 // It is recommended not to change the pattern comparison order for better
3023 // performance.
3024 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
3025 return Result;
3026 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
3027 return Result;
3028 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
3029 return Result;
3030 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
3031 return Result;
3032 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
3033 return Result;
3034 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
3035 return Result;
3036 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
3037 Zeroable)))
3038 return Result;
3039 if ((Result =
3040 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3041 return Result;
3042 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
3043 Subtarget)))
3044 return Result;
3045
3046 // canonicalize non cross-lane shuffle vector
3047 SmallVector<int> NewMask(Mask);
3048 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
3049 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
3050
3051 // FIXME: Handling the remaining cases earlier can degrade performance
3052 // in some situations. Further analysis is required to enable more
3053 // effective optimizations.
3054 if (V2.isUndef()) {
3055 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
3056 V1, V2, DAG)))
3057 return Result;
3058 }
3059
3060 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
3061 return NewShuffle;
3062 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
3063 return Result;
3064
3065 return SDValue();
3066}
3067
3068SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3069 SelectionDAG &DAG) const {
3070 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
3071 ArrayRef<int> OrigMask = SVOp->getMask();
3072 SDValue V1 = Op.getOperand(0);
3073 SDValue V2 = Op.getOperand(1);
3074 MVT VT = Op.getSimpleValueType();
3075 int NumElements = VT.getVectorNumElements();
3076 SDLoc DL(Op);
3077
3078 bool V1IsUndef = V1.isUndef();
3079 bool V2IsUndef = V2.isUndef();
3080 if (V1IsUndef && V2IsUndef)
3081 return DAG.getUNDEF(VT);
3082
3083 // When we create a shuffle node we put the UNDEF node to second operand,
3084 // but in some cases the first operand may be transformed to UNDEF.
3085 // In this case we should just commute the node.
3086 if (V1IsUndef)
3087 return DAG.getCommutedVectorShuffle(*SVOp);
3088
3089 // Check for non-undef masks pointing at an undef vector and make the masks
3090 // undef as well. This makes it easier to match the shuffle based solely on
3091 // the mask.
3092 if (V2IsUndef &&
3093 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
3094 SmallVector<int, 8> NewMask(OrigMask);
3095 for (int &M : NewMask)
3096 if (M >= NumElements)
3097 M = -1;
3098 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
3099 }
3100
3101 // Check for illegal shuffle mask element index values.
3102 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3103 (void)MaskUpperLimit;
3104 assert(llvm::all_of(OrigMask,
3105 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3106 "Out of bounds shuffle index");
3107
3108 // For each vector width, delegate to a specialized lowering routine.
3109 if (VT.is128BitVector())
3110 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3111
3112 if (VT.is256BitVector())
3113 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3114
3115 return SDValue();
3116}
3117
3118SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3119 SelectionDAG &DAG) const {
3120 // Custom lower to ensure the libcall return is passed in an FPR on hard
3121 // float ABIs.
3122 SDLoc DL(Op);
3123 MakeLibCallOptions CallOptions;
3124 SDValue Op0 = Op.getOperand(0);
3125 SDValue Chain = SDValue();
3126 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
3127 SDValue Res;
3128 std::tie(Res, Chain) =
3129 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
3130 if (Subtarget.is64Bit())
3131 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3132 return DAG.getBitcast(MVT::i32, Res);
3133}
3134
3135SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3136 SelectionDAG &DAG) const {
3137 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3138 // float ABIs.
3139 SDLoc DL(Op);
3140 MakeLibCallOptions CallOptions;
3141 SDValue Op0 = Op.getOperand(0);
3142 SDValue Chain = SDValue();
3143 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3144 DL, MVT::f32, Op0)
3145 : DAG.getBitcast(MVT::f32, Op0);
3146 SDValue Res;
3147 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
3148 CallOptions, DL, Chain);
3149 return Res;
3150}
3151
3152SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3153 SelectionDAG &DAG) const {
3154 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3155 SDLoc DL(Op);
3156 MakeLibCallOptions CallOptions;
3157 RTLIB::Libcall LC =
3158 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
3159 SDValue Res =
3160 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
3161 if (Subtarget.is64Bit())
3162 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3163 return DAG.getBitcast(MVT::i32, Res);
3164}
3165
3166SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3167 SelectionDAG &DAG) const {
3168 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3169 MVT VT = Op.getSimpleValueType();
3170 SDLoc DL(Op);
3171 Op = DAG.getNode(
3172 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
3173 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3174 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3175 DL, MVT::f32, Op)
3176 : DAG.getBitcast(MVT::f32, Op);
3177 if (VT != MVT::f32)
3178 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3179 return Res;
3180}
3181
3182// Lower BUILD_VECTOR as broadcast load (if possible).
3183// For example:
3184// %a = load i8, ptr %ptr
3185// %b = build_vector %a, %a, %a, %a
3186// is lowered to :
3187// (VLDREPL_B $a0, 0)
3189 const SDLoc &DL,
3190 SelectionDAG &DAG) {
3191 MVT VT = BVOp->getSimpleValueType(0);
3192 int NumOps = BVOp->getNumOperands();
3193
3194 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3195 "Unsupported vector type for broadcast.");
3196
3197 SDValue IdentitySrc;
3198 bool IsIdeneity = true;
3199
3200 for (int i = 0; i != NumOps; i++) {
3201 SDValue Op = BVOp->getOperand(i);
3202 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3203 IsIdeneity = false;
3204 break;
3205 }
3206 IdentitySrc = BVOp->getOperand(0);
3207 }
3208
3209 // make sure that this load is valid and only has one user.
3210 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3211 return SDValue();
3212
3213 auto *LN = cast<LoadSDNode>(IdentitySrc);
3214 auto ExtType = LN->getExtensionType();
3215
3216 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3217 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3218 // Indexed loads and stores are not supported on LoongArch.
3219 assert(LN->isUnindexed() && "Unexpected indexed load.");
3220
3221 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3222 // The offset operand of unindexed load is always undefined, so there is
3223 // no need to pass it to VLDREPL.
3224 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3225 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3226 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3227 return BCast;
3228 }
3229 return SDValue();
3230}
3231
3232// Sequentially insert elements from Ops into Vector, from low to high indices.
3233// Note: Ops can have fewer elements than Vector.
3235 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3236 EVT ResTy) {
3237 assert(Ops.size() <= ResTy.getVectorNumElements());
3238
3239 SDValue Op0 = Ops[0];
3240 if (!Op0.isUndef())
3241 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3242 for (unsigned i = 1; i < Ops.size(); ++i) {
3243 SDValue Opi = Ops[i];
3244 if (Opi.isUndef())
3245 continue;
3246 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3247 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3248 }
3249}
3250
3251// Build a ResTy subvector from Node, taking NumElts elements starting at index
3252// 'first'.
3254 SelectionDAG &DAG, SDLoc DL,
3255 const LoongArchSubtarget &Subtarget,
3256 EVT ResTy, unsigned first) {
3257 unsigned NumElts = ResTy.getVectorNumElements();
3258
3259 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3260
3261 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3262 Node->op_begin() + first + NumElts);
3263 SDValue Vector = DAG.getUNDEF(ResTy);
3264 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3265 return Vector;
3266}
3267
3268SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3269 SelectionDAG &DAG) const {
3270 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3271 MVT VT = Node->getSimpleValueType(0);
3272 EVT ResTy = Op->getValueType(0);
3273 unsigned NumElts = ResTy.getVectorNumElements();
3274 SDLoc DL(Op);
3275 APInt SplatValue, SplatUndef;
3276 unsigned SplatBitSize;
3277 bool HasAnyUndefs;
3278 bool IsConstant = false;
3279 bool UseSameConstant = true;
3280 SDValue ConstantValue;
3281 bool Is128Vec = ResTy.is128BitVector();
3282 bool Is256Vec = ResTy.is256BitVector();
3283
3284 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3285 (!Subtarget.hasExtLASX() || !Is256Vec))
3286 return SDValue();
3287
3288 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3289 return Result;
3290
3291 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3292 /*MinSplatBits=*/8) &&
3293 SplatBitSize <= 64) {
3294 // We can only cope with 8, 16, 32, or 64-bit elements.
3295 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3296 SplatBitSize != 64)
3297 return SDValue();
3298
3299 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3300 // We can only handle 64-bit elements that are within
3301 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3302 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3303 if (!SplatValue.isSignedIntN(10) &&
3304 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3305 return SDValue();
3306 if ((Is128Vec && ResTy == MVT::v4i32) ||
3307 (Is256Vec && ResTy == MVT::v8i32))
3308 return Op;
3309 }
3310
3311 EVT ViaVecTy;
3312
3313 switch (SplatBitSize) {
3314 default:
3315 return SDValue();
3316 case 8:
3317 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3318 break;
3319 case 16:
3320 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3321 break;
3322 case 32:
3323 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3324 break;
3325 case 64:
3326 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3327 break;
3328 }
3329
3330 // SelectionDAG::getConstant will promote SplatValue appropriately.
3331 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3332
3333 // Bitcast to the type we originally wanted.
3334 if (ViaVecTy != ResTy)
3335 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3336
3337 return Result;
3338 }
3339
3340 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3341 return Op;
3342
3343 for (unsigned i = 0; i < NumElts; ++i) {
3344 SDValue Opi = Node->getOperand(i);
3345 if (isIntOrFPConstant(Opi)) {
3346 IsConstant = true;
3347 if (!ConstantValue.getNode())
3348 ConstantValue = Opi;
3349 else if (ConstantValue != Opi)
3350 UseSameConstant = false;
3351 }
3352 }
3353
3354 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3355 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3356 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3357 for (unsigned i = 0; i < NumElts; ++i) {
3358 SDValue Opi = Node->getOperand(i);
3359 if (!isIntOrFPConstant(Opi))
3360 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3361 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3362 }
3363 return Result;
3364 }
3365
3366 if (!IsConstant) {
3367 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3368 // the sub-sequence of the vector and then broadcast the sub-sequence.
3369 //
3370 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3371 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3372 // generates worse code in some cases. This could be further optimized
3373 // with more consideration.
3375 BitVector UndefElements;
3376 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3377 UndefElements.count() == 0) {
3378 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3379 // because the high part can be simply treated as undef.
3380 SDValue Vector = DAG.getUNDEF(ResTy);
3381 EVT FillTy = Is256Vec
3383 : ResTy;
3384 SDValue FillVec =
3385 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3386
3387 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3388
3389 unsigned SeqLen = Sequence.size();
3390 unsigned SplatLen = NumElts / SeqLen;
3391 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3392 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3393
3394 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3395 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3396 if (SplatEltTy == MVT::i128)
3397 SplatTy = MVT::v4i64;
3398
3399 SDValue SplatVec;
3400 SDValue SrcVec = DAG.getBitcast(
3401 SplatTy,
3402 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3403 if (Is256Vec) {
3404 SplatVec =
3405 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3406 : LoongArchISD::XVREPLVE0,
3407 DL, SplatTy, SrcVec);
3408 } else {
3409 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3410 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3411 }
3412
3413 return DAG.getBitcast(ResTy, SplatVec);
3414 }
3415
3416 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3417 // using memory operations is much lower.
3418 //
3419 // For 256-bit vectors, normally split into two halves and concatenate.
3420 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3421 // one non-undef element, skip spliting to avoid a worse result.
3422 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3423 ResTy == MVT::v4f64) {
3424 unsigned NonUndefCount = 0;
3425 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3426 if (!Node->getOperand(i).isUndef()) {
3427 ++NonUndefCount;
3428 if (NonUndefCount > 1)
3429 break;
3430 }
3431 }
3432 if (NonUndefCount == 1)
3433 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3434 }
3435
3436 EVT VecTy =
3437 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3438 SDValue Vector =
3439 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3440
3441 if (Is128Vec)
3442 return Vector;
3443
3444 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3445 VecTy, NumElts / 2);
3446
3447 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3448 }
3449
3450 return SDValue();
3451}
3452
3453SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3454 SelectionDAG &DAG) const {
3455 SDLoc DL(Op);
3456 MVT ResVT = Op.getSimpleValueType();
3457 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3458
3459 unsigned NumOperands = Op.getNumOperands();
3460 unsigned NumFreezeUndef = 0;
3461 unsigned NumZero = 0;
3462 unsigned NumNonZero = 0;
3463 unsigned NonZeros = 0;
3464 SmallSet<SDValue, 4> Undefs;
3465 for (unsigned i = 0; i != NumOperands; ++i) {
3466 SDValue SubVec = Op.getOperand(i);
3467 if (SubVec.isUndef())
3468 continue;
3469 if (ISD::isFreezeUndef(SubVec.getNode())) {
3470 // If the freeze(undef) has multiple uses then we must fold to zero.
3471 if (SubVec.hasOneUse()) {
3472 ++NumFreezeUndef;
3473 } else {
3474 ++NumZero;
3475 Undefs.insert(SubVec);
3476 }
3477 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3478 ++NumZero;
3479 else {
3480 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3481 NonZeros |= 1 << i;
3482 ++NumNonZero;
3483 }
3484 }
3485
3486 // If we have more than 2 non-zeros, build each half separately.
3487 if (NumNonZero > 2) {
3488 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3489 ArrayRef<SDUse> Ops = Op->ops();
3490 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3491 Ops.slice(0, NumOperands / 2));
3492 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3493 Ops.slice(NumOperands / 2));
3494 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3495 }
3496
3497 // Otherwise, build it up through insert_subvectors.
3498 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3499 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3500 : DAG.getUNDEF(ResVT));
3501
3502 // Replace Undef operands with ZeroVector.
3503 for (SDValue U : Undefs)
3504 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3505
3506 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3507 unsigned NumSubElems = SubVT.getVectorNumElements();
3508 for (unsigned i = 0; i != NumOperands; ++i) {
3509 if ((NonZeros & (1 << i)) == 0)
3510 continue;
3511
3512 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3513 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3514 }
3515
3516 return Vec;
3517}
3518
3519SDValue
3520LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3521 SelectionDAG &DAG) const {
3522 MVT EltVT = Op.getSimpleValueType();
3523 SDValue Vec = Op->getOperand(0);
3524 EVT VecTy = Vec->getValueType(0);
3525 SDValue Idx = Op->getOperand(1);
3526 SDLoc DL(Op);
3527 MVT GRLenVT = Subtarget.getGRLenVT();
3528
3529 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3530
3531 if (isa<ConstantSDNode>(Idx))
3532 return Op;
3533
3534 switch (VecTy.getSimpleVT().SimpleTy) {
3535 default:
3536 llvm_unreachable("Unexpected type");
3537 case MVT::v32i8:
3538 case MVT::v16i16:
3539 case MVT::v4i64:
3540 case MVT::v4f64: {
3541 // Extract the high half subvector and place it to the low half of a new
3542 // vector. It doesn't matter what the high half of the new vector is.
3543 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3544 SDValue VecHi =
3545 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3546 SDValue TmpVec =
3547 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3548 VecHi, DAG.getConstant(0, DL, GRLenVT));
3549
3550 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3551 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3552 // desired element.
3553 SDValue IdxCp =
3554 Subtarget.is64Bit()
3555 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3556 : DAG.getBitcast(MVT::f32, Idx);
3557 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3558 SDValue MaskVec =
3559 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3560 SDValue ResVec =
3561 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3562
3563 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3564 DAG.getConstant(0, DL, GRLenVT));
3565 }
3566 case MVT::v8i32:
3567 case MVT::v8f32: {
3568 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3569 SDValue SplatValue =
3570 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3571
3572 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3573 DAG.getConstant(0, DL, GRLenVT));
3574 }
3575 }
3576}
3577
3578SDValue
3579LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3580 SelectionDAG &DAG) const {
3581 MVT VT = Op.getSimpleValueType();
3582 MVT EltVT = VT.getVectorElementType();
3583 unsigned NumElts = VT.getVectorNumElements();
3584 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3585 SDLoc DL(Op);
3586 SDValue Op0 = Op.getOperand(0);
3587 SDValue Op1 = Op.getOperand(1);
3588 SDValue Op2 = Op.getOperand(2);
3589
3590 if (isa<ConstantSDNode>(Op2))
3591 return Op;
3592
3593 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3594 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3595
3596 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3597 return SDValue();
3598
3599 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3600 SmallVector<SDValue, 32> RawIndices;
3601 SDValue SplatIdx;
3602 SDValue Indices;
3603
3604 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3605 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3606 for (unsigned i = 0; i < NumElts; ++i) {
3607 RawIndices.push_back(Op2);
3608 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3609 }
3610 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3611 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3612
3613 RawIndices.clear();
3614 for (unsigned i = 0; i < NumElts; ++i) {
3615 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3616 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3617 }
3618 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3619 Indices = DAG.getBitcast(IdxVTy, Indices);
3620 } else {
3621 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3622
3623 for (unsigned i = 0; i < NumElts; ++i)
3624 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3625 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3626 }
3627
3628 // insert vec, elt, idx
3629 // =>
3630 // select (splatidx == {0,1,2...}) ? splatelt : vec
3631 SDValue SelectCC =
3632 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3633 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3634}
3635
3636SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3637 SelectionDAG &DAG) const {
3638 SDLoc DL(Op);
3639 SyncScope::ID FenceSSID =
3640 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3641
3642 // singlethread fences only synchronize with signal handlers on the same
3643 // thread and thus only need to preserve instruction order, not actually
3644 // enforce memory ordering.
3645 if (FenceSSID == SyncScope::SingleThread)
3646 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3647 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3648
3649 return Op;
3650}
3651
3652SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3653 SelectionDAG &DAG) const {
3654
3655 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3656 DAG.getContext()->emitError(
3657 "On LA64, only 64-bit registers can be written.");
3658 return Op.getOperand(0);
3659 }
3660
3661 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3662 DAG.getContext()->emitError(
3663 "On LA32, only 32-bit registers can be written.");
3664 return Op.getOperand(0);
3665 }
3666
3667 return Op;
3668}
3669
3670SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3671 SelectionDAG &DAG) const {
3672 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3673 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3674 "be a constant integer");
3675 return SDValue();
3676 }
3677
3678 MachineFunction &MF = DAG.getMachineFunction();
3680 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3681 EVT VT = Op.getValueType();
3682 SDLoc DL(Op);
3683 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3684 unsigned Depth = Op.getConstantOperandVal(0);
3685 int GRLenInBytes = Subtarget.getGRLen() / 8;
3686
3687 while (Depth--) {
3688 int Offset = -(GRLenInBytes * 2);
3689 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3690 DAG.getSignedConstant(Offset, DL, VT));
3691 FrameAddr =
3692 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3693 }
3694 return FrameAddr;
3695}
3696
3697SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3698 SelectionDAG &DAG) const {
3699 // Currently only support lowering return address for current frame.
3700 if (Op.getConstantOperandVal(0) != 0) {
3701 DAG.getContext()->emitError(
3702 "return address can only be determined for the current frame");
3703 return SDValue();
3704 }
3705
3706 MachineFunction &MF = DAG.getMachineFunction();
3708 MVT GRLenVT = Subtarget.getGRLenVT();
3709
3710 // Return the value of the return address register, marking it an implicit
3711 // live-in.
3712 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3713 getRegClassFor(GRLenVT));
3714 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3715}
3716
3717SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3718 SelectionDAG &DAG) const {
3719 MachineFunction &MF = DAG.getMachineFunction();
3720 auto Size = Subtarget.getGRLen() / 8;
3721 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3722 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3723}
3724
3725SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3726 SelectionDAG &DAG) const {
3727 MachineFunction &MF = DAG.getMachineFunction();
3728 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3729
3730 SDLoc DL(Op);
3731 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3733
3734 // vastart just stores the address of the VarArgsFrameIndex slot into the
3735 // memory location argument.
3736 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3737 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3738 MachinePointerInfo(SV));
3739}
3740
3741SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3742 SelectionDAG &DAG) const {
3743 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3744 !Subtarget.hasBasicD() && "unexpected target features");
3745
3746 SDLoc DL(Op);
3747 SDValue Op0 = Op.getOperand(0);
3748 if (Op0->getOpcode() == ISD::AND) {
3749 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3750 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3751 return Op;
3752 }
3753
3754 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3755 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3756 Op0.getConstantOperandVal(2) == UINT64_C(0))
3757 return Op;
3758
3759 if (Op0.getOpcode() == ISD::AssertZext &&
3760 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3761 return Op;
3762
3763 EVT OpVT = Op0.getValueType();
3764 EVT RetVT = Op.getValueType();
3765 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3766 MakeLibCallOptions CallOptions;
3767 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3768 SDValue Chain = SDValue();
3770 std::tie(Result, Chain) =
3771 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3772 return Result;
3773}
3774
3775SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3776 SelectionDAG &DAG) const {
3777 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3778 !Subtarget.hasBasicD() && "unexpected target features");
3779
3780 SDLoc DL(Op);
3781 SDValue Op0 = Op.getOperand(0);
3782
3783 if ((Op0.getOpcode() == ISD::AssertSext ||
3785 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3786 return Op;
3787
3788 EVT OpVT = Op0.getValueType();
3789 EVT RetVT = Op.getValueType();
3790 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3791 MakeLibCallOptions CallOptions;
3792 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3793 SDValue Chain = SDValue();
3795 std::tie(Result, Chain) =
3796 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3797 return Result;
3798}
3799
3800SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3801 SelectionDAG &DAG) const {
3802
3803 SDLoc DL(Op);
3804 EVT VT = Op.getValueType();
3805 SDValue Op0 = Op.getOperand(0);
3806 EVT Op0VT = Op0.getValueType();
3807
3808 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3809 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3810 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3811 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3812 }
3813 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3814 SDValue Lo, Hi;
3815 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3816 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3817 }
3818 return Op;
3819}
3820
3821SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3822 SelectionDAG &DAG) const {
3823
3824 SDLoc DL(Op);
3825 SDValue Op0 = Op.getOperand(0);
3826
3827 if (Op0.getValueType() == MVT::f16)
3828 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3829
3830 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3831 !Subtarget.hasBasicD()) {
3832 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3833 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3834 }
3835
3836 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3837 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3838 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3839}
3840
3842 SelectionDAG &DAG, unsigned Flags) {
3843 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3844}
3845
3847 SelectionDAG &DAG, unsigned Flags) {
3848 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3849 Flags);
3850}
3851
3853 SelectionDAG &DAG, unsigned Flags) {
3854 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3855 N->getOffset(), Flags);
3856}
3857
3859 SelectionDAG &DAG, unsigned Flags) {
3860 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3861}
3862
3863template <class NodeTy>
3864SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3866 bool IsLocal) const {
3867 SDLoc DL(N);
3868 EVT Ty = getPointerTy(DAG.getDataLayout());
3869 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3870 SDValue Load;
3871
3872 switch (M) {
3873 default:
3874 report_fatal_error("Unsupported code model");
3875
3876 case CodeModel::Large: {
3877 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3878
3879 // This is not actually used, but is necessary for successfully matching
3880 // the PseudoLA_*_LARGE nodes.
3881 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3882 if (IsLocal) {
3883 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3884 // eventually becomes the desired 5-insn code sequence.
3885 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3886 Tmp, Addr),
3887 0);
3888 } else {
3889 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3890 // eventually becomes the desired 5-insn code sequence.
3891 Load = SDValue(
3892 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3893 0);
3894 }
3895 break;
3896 }
3897
3898 case CodeModel::Small:
3899 case CodeModel::Medium:
3900 if (IsLocal) {
3901 // This generates the pattern (PseudoLA_PCREL sym), which
3902 //
3903 // for la32r expands to:
3904 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3905 //
3906 // for la32s and la64 expands to:
3907 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3908 Load = SDValue(
3909 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3910 } else {
3911 // This generates the pattern (PseudoLA_GOT sym), which
3912 //
3913 // for la32r expands to:
3914 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3915 //
3916 // for la32s and la64 expands to:
3917 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3918 Load =
3919 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3920 }
3921 }
3922
3923 if (!IsLocal) {
3924 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3925 MachineFunction &MF = DAG.getMachineFunction();
3926 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3930 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3931 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3932 }
3933
3934 return Load;
3935}
3936
3937SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3938 SelectionDAG &DAG) const {
3939 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3940 DAG.getTarget().getCodeModel());
3941}
3942
3943SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3944 SelectionDAG &DAG) const {
3945 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3946 DAG.getTarget().getCodeModel());
3947}
3948
3949SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3950 SelectionDAG &DAG) const {
3951 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3952 DAG.getTarget().getCodeModel());
3953}
3954
3955SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3956 SelectionDAG &DAG) const {
3957 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3958 assert(N->getOffset() == 0 && "unexpected offset in global node");
3959 auto CM = DAG.getTarget().getCodeModel();
3960 const GlobalValue *GV = N->getGlobal();
3961
3962 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3963 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3964 CM = *GCM;
3965 }
3966
3967 return getAddr(N, DAG, CM, GV->isDSOLocal());
3968}
3969
3970SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3971 SelectionDAG &DAG,
3972 unsigned Opc, bool UseGOT,
3973 bool Large) const {
3974 SDLoc DL(N);
3975 EVT Ty = getPointerTy(DAG.getDataLayout());
3976 MVT GRLenVT = Subtarget.getGRLenVT();
3977
3978 // This is not actually used, but is necessary for successfully matching the
3979 // PseudoLA_*_LARGE nodes.
3980 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3981 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3982
3983 // Only IE needs an extra argument for large code model.
3984 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3985 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3986 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3987
3988 // If it is LE for normal/medium code model, the add tp operation will occur
3989 // during the pseudo-instruction expansion.
3990 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3991 return Offset;
3992
3993 if (UseGOT) {
3994 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3995 MachineFunction &MF = DAG.getMachineFunction();
3996 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4000 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4001 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
4002 }
4003
4004 // Add the thread pointer.
4005 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
4006 DAG.getRegister(LoongArch::R2, GRLenVT));
4007}
4008
4009SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
4010 SelectionDAG &DAG,
4011 unsigned Opc,
4012 bool Large) const {
4013 SDLoc DL(N);
4014 EVT Ty = getPointerTy(DAG.getDataLayout());
4015 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
4016
4017 // This is not actually used, but is necessary for successfully matching the
4018 // PseudoLA_*_LARGE nodes.
4019 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4020
4021 // Use a PC-relative addressing mode to access the dynamic GOT address.
4022 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4023 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4024 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4025
4026 // Prepare argument list to generate call.
4028 Args.emplace_back(Load, CallTy);
4029
4030 // Setup call to __tls_get_addr.
4031 TargetLowering::CallLoweringInfo CLI(DAG);
4032 CLI.setDebugLoc(DL)
4033 .setChain(DAG.getEntryNode())
4034 .setLibCallee(CallingConv::C, CallTy,
4035 DAG.getExternalSymbol("__tls_get_addr", Ty),
4036 std::move(Args));
4037
4038 return LowerCallTo(CLI).first;
4039}
4040
4041SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
4042 SelectionDAG &DAG, unsigned Opc,
4043 bool Large) const {
4044 SDLoc DL(N);
4045 EVT Ty = getPointerTy(DAG.getDataLayout());
4046 const GlobalValue *GV = N->getGlobal();
4047
4048 // This is not actually used, but is necessary for successfully matching the
4049 // PseudoLA_*_LARGE nodes.
4050 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4051
4052 // Use a PC-relative addressing mode to access the global dynamic GOT address.
4053 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
4054 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
4055 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4056 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4057}
4058
4059SDValue
4060LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
4061 SelectionDAG &DAG) const {
4064 report_fatal_error("In GHC calling convention TLS is not supported");
4065
4066 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
4067 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
4068
4069 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4070 assert(N->getOffset() == 0 && "unexpected offset in global node");
4071
4072 if (DAG.getTarget().useEmulatedTLS())
4073 reportFatalUsageError("the emulated TLS is prohibited");
4074
4075 bool IsDesc = DAG.getTarget().useTLSDESC();
4076
4077 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
4079 // In this model, application code calls the dynamic linker function
4080 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
4081 // runtime.
4082 if (!IsDesc)
4083 return getDynamicTLSAddr(N, DAG,
4084 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
4085 : LoongArch::PseudoLA_TLS_GD,
4086 Large);
4087 break;
4089 // Same as GeneralDynamic, except for assembly modifiers and relocation
4090 // records.
4091 if (!IsDesc)
4092 return getDynamicTLSAddr(N, DAG,
4093 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
4094 : LoongArch::PseudoLA_TLS_LD,
4095 Large);
4096 break;
4098 // This model uses the GOT to resolve TLS offsets.
4099 return getStaticTLSAddr(N, DAG,
4100 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
4101 : LoongArch::PseudoLA_TLS_IE,
4102 /*UseGOT=*/true, Large);
4104 // This model is used when static linking as the TLS offsets are resolved
4105 // during program linking.
4106 //
4107 // This node doesn't need an extra argument for the large code model.
4108 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
4109 /*UseGOT=*/false, Large);
4110 }
4111
4112 return getTLSDescAddr(N, DAG,
4113 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4114 : LoongArch::PseudoLA_TLS_DESC,
4115 Large);
4116}
4117
4118template <unsigned N>
4120 SelectionDAG &DAG, bool IsSigned = false) {
4121 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
4122 // Check the ImmArg.
4123 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4124 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4125 DAG.getContext()->emitError(Op->getOperationName(0) +
4126 ": argument out of range.");
4127 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
4128 }
4129 return SDValue();
4130}
4131
4132SDValue
4133LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4134 SelectionDAG &DAG) const {
4135 switch (Op.getConstantOperandVal(0)) {
4136 default:
4137 return SDValue(); // Don't custom lower most intrinsics.
4138 case Intrinsic::thread_pointer: {
4139 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4140 return DAG.getRegister(LoongArch::R2, PtrVT);
4141 }
4142 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4143 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4144 case Intrinsic::loongarch_lsx_vreplvei_d:
4145 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4146 return checkIntrinsicImmArg<1>(Op, 2, DAG);
4147 case Intrinsic::loongarch_lsx_vreplvei_w:
4148 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4149 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4150 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4151 case Intrinsic::loongarch_lasx_xvpickve_d:
4152 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4153 return checkIntrinsicImmArg<2>(Op, 2, DAG);
4154 case Intrinsic::loongarch_lasx_xvinsve0_d:
4155 return checkIntrinsicImmArg<2>(Op, 3, DAG);
4156 case Intrinsic::loongarch_lsx_vsat_b:
4157 case Intrinsic::loongarch_lsx_vsat_bu:
4158 case Intrinsic::loongarch_lsx_vrotri_b:
4159 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4160 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4161 case Intrinsic::loongarch_lsx_vsrlri_b:
4162 case Intrinsic::loongarch_lsx_vsrari_b:
4163 case Intrinsic::loongarch_lsx_vreplvei_h:
4164 case Intrinsic::loongarch_lasx_xvsat_b:
4165 case Intrinsic::loongarch_lasx_xvsat_bu:
4166 case Intrinsic::loongarch_lasx_xvrotri_b:
4167 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4168 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4169 case Intrinsic::loongarch_lasx_xvsrlri_b:
4170 case Intrinsic::loongarch_lasx_xvsrari_b:
4171 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4172 case Intrinsic::loongarch_lasx_xvpickve_w:
4173 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4174 return checkIntrinsicImmArg<3>(Op, 2, DAG);
4175 case Intrinsic::loongarch_lasx_xvinsve0_w:
4176 return checkIntrinsicImmArg<3>(Op, 3, DAG);
4177 case Intrinsic::loongarch_lsx_vsat_h:
4178 case Intrinsic::loongarch_lsx_vsat_hu:
4179 case Intrinsic::loongarch_lsx_vrotri_h:
4180 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4181 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4182 case Intrinsic::loongarch_lsx_vsrlri_h:
4183 case Intrinsic::loongarch_lsx_vsrari_h:
4184 case Intrinsic::loongarch_lsx_vreplvei_b:
4185 case Intrinsic::loongarch_lasx_xvsat_h:
4186 case Intrinsic::loongarch_lasx_xvsat_hu:
4187 case Intrinsic::loongarch_lasx_xvrotri_h:
4188 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4189 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4190 case Intrinsic::loongarch_lasx_xvsrlri_h:
4191 case Intrinsic::loongarch_lasx_xvsrari_h:
4192 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4193 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4194 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4195 case Intrinsic::loongarch_lsx_vsrani_b_h:
4196 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4197 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4198 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4199 case Intrinsic::loongarch_lsx_vssrani_b_h:
4200 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4201 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4202 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4203 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4204 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4205 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4206 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4207 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4208 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4209 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4210 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4211 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4212 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4213 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4214 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4215 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4216 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4217 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4218 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4219 case Intrinsic::loongarch_lsx_vsat_w:
4220 case Intrinsic::loongarch_lsx_vsat_wu:
4221 case Intrinsic::loongarch_lsx_vrotri_w:
4222 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4223 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4224 case Intrinsic::loongarch_lsx_vsrlri_w:
4225 case Intrinsic::loongarch_lsx_vsrari_w:
4226 case Intrinsic::loongarch_lsx_vslei_bu:
4227 case Intrinsic::loongarch_lsx_vslei_hu:
4228 case Intrinsic::loongarch_lsx_vslei_wu:
4229 case Intrinsic::loongarch_lsx_vslei_du:
4230 case Intrinsic::loongarch_lsx_vslti_bu:
4231 case Intrinsic::loongarch_lsx_vslti_hu:
4232 case Intrinsic::loongarch_lsx_vslti_wu:
4233 case Intrinsic::loongarch_lsx_vslti_du:
4234 case Intrinsic::loongarch_lsx_vbsll_v:
4235 case Intrinsic::loongarch_lsx_vbsrl_v:
4236 case Intrinsic::loongarch_lasx_xvsat_w:
4237 case Intrinsic::loongarch_lasx_xvsat_wu:
4238 case Intrinsic::loongarch_lasx_xvrotri_w:
4239 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4240 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4241 case Intrinsic::loongarch_lasx_xvsrlri_w:
4242 case Intrinsic::loongarch_lasx_xvsrari_w:
4243 case Intrinsic::loongarch_lasx_xvslei_bu:
4244 case Intrinsic::loongarch_lasx_xvslei_hu:
4245 case Intrinsic::loongarch_lasx_xvslei_wu:
4246 case Intrinsic::loongarch_lasx_xvslei_du:
4247 case Intrinsic::loongarch_lasx_xvslti_bu:
4248 case Intrinsic::loongarch_lasx_xvslti_hu:
4249 case Intrinsic::loongarch_lasx_xvslti_wu:
4250 case Intrinsic::loongarch_lasx_xvslti_du:
4251 case Intrinsic::loongarch_lasx_xvbsll_v:
4252 case Intrinsic::loongarch_lasx_xvbsrl_v:
4253 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4254 case Intrinsic::loongarch_lsx_vseqi_b:
4255 case Intrinsic::loongarch_lsx_vseqi_h:
4256 case Intrinsic::loongarch_lsx_vseqi_w:
4257 case Intrinsic::loongarch_lsx_vseqi_d:
4258 case Intrinsic::loongarch_lsx_vslei_b:
4259 case Intrinsic::loongarch_lsx_vslei_h:
4260 case Intrinsic::loongarch_lsx_vslei_w:
4261 case Intrinsic::loongarch_lsx_vslei_d:
4262 case Intrinsic::loongarch_lsx_vslti_b:
4263 case Intrinsic::loongarch_lsx_vslti_h:
4264 case Intrinsic::loongarch_lsx_vslti_w:
4265 case Intrinsic::loongarch_lsx_vslti_d:
4266 case Intrinsic::loongarch_lasx_xvseqi_b:
4267 case Intrinsic::loongarch_lasx_xvseqi_h:
4268 case Intrinsic::loongarch_lasx_xvseqi_w:
4269 case Intrinsic::loongarch_lasx_xvseqi_d:
4270 case Intrinsic::loongarch_lasx_xvslei_b:
4271 case Intrinsic::loongarch_lasx_xvslei_h:
4272 case Intrinsic::loongarch_lasx_xvslei_w:
4273 case Intrinsic::loongarch_lasx_xvslei_d:
4274 case Intrinsic::loongarch_lasx_xvslti_b:
4275 case Intrinsic::loongarch_lasx_xvslti_h:
4276 case Intrinsic::loongarch_lasx_xvslti_w:
4277 case Intrinsic::loongarch_lasx_xvslti_d:
4278 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4279 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4280 case Intrinsic::loongarch_lsx_vsrani_h_w:
4281 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4282 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4283 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4284 case Intrinsic::loongarch_lsx_vssrani_h_w:
4285 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4286 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4287 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4288 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4289 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4290 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4291 case Intrinsic::loongarch_lsx_vfrstpi_b:
4292 case Intrinsic::loongarch_lsx_vfrstpi_h:
4293 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4294 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4295 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4296 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4297 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4298 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4299 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4300 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4301 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4302 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4303 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4304 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4305 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4306 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4307 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4308 case Intrinsic::loongarch_lsx_vsat_d:
4309 case Intrinsic::loongarch_lsx_vsat_du:
4310 case Intrinsic::loongarch_lsx_vrotri_d:
4311 case Intrinsic::loongarch_lsx_vsrlri_d:
4312 case Intrinsic::loongarch_lsx_vsrari_d:
4313 case Intrinsic::loongarch_lasx_xvsat_d:
4314 case Intrinsic::loongarch_lasx_xvsat_du:
4315 case Intrinsic::loongarch_lasx_xvrotri_d:
4316 case Intrinsic::loongarch_lasx_xvsrlri_d:
4317 case Intrinsic::loongarch_lasx_xvsrari_d:
4318 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4319 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4320 case Intrinsic::loongarch_lsx_vsrani_w_d:
4321 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4322 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4323 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4324 case Intrinsic::loongarch_lsx_vssrani_w_d:
4325 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4326 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4327 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4328 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4329 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4330 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4331 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4332 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4333 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4334 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4335 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4336 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4337 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4338 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4339 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4340 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4341 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4342 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4343 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4344 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4345 case Intrinsic::loongarch_lsx_vsrani_d_q:
4346 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4347 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4348 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4349 case Intrinsic::loongarch_lsx_vssrani_d_q:
4350 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4351 case Intrinsic::loongarch_lsx_vssrani_du_q:
4352 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4353 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4354 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4355 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4356 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4357 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4358 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4359 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4360 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4361 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4362 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4363 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4364 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4365 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4366 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4367 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4368 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4369 case Intrinsic::loongarch_lsx_vnori_b:
4370 case Intrinsic::loongarch_lsx_vshuf4i_b:
4371 case Intrinsic::loongarch_lsx_vshuf4i_h:
4372 case Intrinsic::loongarch_lsx_vshuf4i_w:
4373 case Intrinsic::loongarch_lasx_xvnori_b:
4374 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4375 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4376 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4377 case Intrinsic::loongarch_lasx_xvpermi_d:
4378 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4379 case Intrinsic::loongarch_lsx_vshuf4i_d:
4380 case Intrinsic::loongarch_lsx_vpermi_w:
4381 case Intrinsic::loongarch_lsx_vbitseli_b:
4382 case Intrinsic::loongarch_lsx_vextrins_b:
4383 case Intrinsic::loongarch_lsx_vextrins_h:
4384 case Intrinsic::loongarch_lsx_vextrins_w:
4385 case Intrinsic::loongarch_lsx_vextrins_d:
4386 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4387 case Intrinsic::loongarch_lasx_xvpermi_w:
4388 case Intrinsic::loongarch_lasx_xvpermi_q:
4389 case Intrinsic::loongarch_lasx_xvbitseli_b:
4390 case Intrinsic::loongarch_lasx_xvextrins_b:
4391 case Intrinsic::loongarch_lasx_xvextrins_h:
4392 case Intrinsic::loongarch_lasx_xvextrins_w:
4393 case Intrinsic::loongarch_lasx_xvextrins_d:
4394 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4395 case Intrinsic::loongarch_lsx_vrepli_b:
4396 case Intrinsic::loongarch_lsx_vrepli_h:
4397 case Intrinsic::loongarch_lsx_vrepli_w:
4398 case Intrinsic::loongarch_lsx_vrepli_d:
4399 case Intrinsic::loongarch_lasx_xvrepli_b:
4400 case Intrinsic::loongarch_lasx_xvrepli_h:
4401 case Intrinsic::loongarch_lasx_xvrepli_w:
4402 case Intrinsic::loongarch_lasx_xvrepli_d:
4403 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4404 case Intrinsic::loongarch_lsx_vldi:
4405 case Intrinsic::loongarch_lasx_xvldi:
4406 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4407 }
4408}
4409
4410// Helper function that emits error message for intrinsics with chain and return
4411// merge values of a UNDEF and the chain.
4413 StringRef ErrorMsg,
4414 SelectionDAG &DAG) {
4415 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4416 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4417 SDLoc(Op));
4418}
4419
4420SDValue
4421LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4422 SelectionDAG &DAG) const {
4423 SDLoc DL(Op);
4424 MVT GRLenVT = Subtarget.getGRLenVT();
4425 EVT VT = Op.getValueType();
4426 SDValue Chain = Op.getOperand(0);
4427 const StringRef ErrorMsgOOR = "argument out of range";
4428 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4429 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4430
4431 switch (Op.getConstantOperandVal(1)) {
4432 default:
4433 return Op;
4434 case Intrinsic::loongarch_crc_w_b_w:
4435 case Intrinsic::loongarch_crc_w_h_w:
4436 case Intrinsic::loongarch_crc_w_w_w:
4437 case Intrinsic::loongarch_crc_w_d_w:
4438 case Intrinsic::loongarch_crcc_w_b_w:
4439 case Intrinsic::loongarch_crcc_w_h_w:
4440 case Intrinsic::loongarch_crcc_w_w_w:
4441 case Intrinsic::loongarch_crcc_w_d_w:
4442 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4443 case Intrinsic::loongarch_csrrd_w:
4444 case Intrinsic::loongarch_csrrd_d: {
4445 unsigned Imm = Op.getConstantOperandVal(2);
4446 return !isUInt<14>(Imm)
4447 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4448 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4449 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4450 }
4451 case Intrinsic::loongarch_csrwr_w:
4452 case Intrinsic::loongarch_csrwr_d: {
4453 unsigned Imm = Op.getConstantOperandVal(3);
4454 return !isUInt<14>(Imm)
4455 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4456 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4457 {Chain, Op.getOperand(2),
4458 DAG.getConstant(Imm, DL, GRLenVT)});
4459 }
4460 case Intrinsic::loongarch_csrxchg_w:
4461 case Intrinsic::loongarch_csrxchg_d: {
4462 unsigned Imm = Op.getConstantOperandVal(4);
4463 return !isUInt<14>(Imm)
4464 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4465 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4466 {Chain, Op.getOperand(2), Op.getOperand(3),
4467 DAG.getConstant(Imm, DL, GRLenVT)});
4468 }
4469 case Intrinsic::loongarch_iocsrrd_d: {
4470 return DAG.getNode(
4471 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4472 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4473 }
4474#define IOCSRRD_CASE(NAME, NODE) \
4475 case Intrinsic::loongarch_##NAME: { \
4476 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4477 {Chain, Op.getOperand(2)}); \
4478 }
4479 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4480 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4481 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4482#undef IOCSRRD_CASE
4483 case Intrinsic::loongarch_cpucfg: {
4484 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4485 {Chain, Op.getOperand(2)});
4486 }
4487 case Intrinsic::loongarch_lddir_d: {
4488 unsigned Imm = Op.getConstantOperandVal(3);
4489 return !isUInt<8>(Imm)
4490 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4491 : Op;
4492 }
4493 case Intrinsic::loongarch_movfcsr2gr: {
4494 if (!Subtarget.hasBasicF())
4495 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4496 unsigned Imm = Op.getConstantOperandVal(2);
4497 return !isUInt<2>(Imm)
4498 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4499 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4500 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4501 }
4502 case Intrinsic::loongarch_lsx_vld:
4503 case Intrinsic::loongarch_lsx_vldrepl_b:
4504 case Intrinsic::loongarch_lasx_xvld:
4505 case Intrinsic::loongarch_lasx_xvldrepl_b:
4506 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4507 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4508 : SDValue();
4509 case Intrinsic::loongarch_lsx_vldrepl_h:
4510 case Intrinsic::loongarch_lasx_xvldrepl_h:
4511 return !isShiftedInt<11, 1>(
4512 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4514 Op, "argument out of range or not a multiple of 2", DAG)
4515 : SDValue();
4516 case Intrinsic::loongarch_lsx_vldrepl_w:
4517 case Intrinsic::loongarch_lasx_xvldrepl_w:
4518 return !isShiftedInt<10, 2>(
4519 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4521 Op, "argument out of range or not a multiple of 4", DAG)
4522 : SDValue();
4523 case Intrinsic::loongarch_lsx_vldrepl_d:
4524 case Intrinsic::loongarch_lasx_xvldrepl_d:
4525 return !isShiftedInt<9, 3>(
4526 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4528 Op, "argument out of range or not a multiple of 8", DAG)
4529 : SDValue();
4530 }
4531}
4532
4533// Helper function that emits error message for intrinsics with void return
4534// value and return the chain.
4536 SelectionDAG &DAG) {
4537
4538 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4539 return Op.getOperand(0);
4540}
4541
4542SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4543 SelectionDAG &DAG) const {
4544 SDLoc DL(Op);
4545 MVT GRLenVT = Subtarget.getGRLenVT();
4546 SDValue Chain = Op.getOperand(0);
4547 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4548 SDValue Op2 = Op.getOperand(2);
4549 const StringRef ErrorMsgOOR = "argument out of range";
4550 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4551 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4552 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4553
4554 switch (IntrinsicEnum) {
4555 default:
4556 // TODO: Add more Intrinsics.
4557 return SDValue();
4558 case Intrinsic::loongarch_cacop_d:
4559 case Intrinsic::loongarch_cacop_w: {
4560 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4561 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4562 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4563 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4564 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4565 unsigned Imm1 = Op2->getAsZExtVal();
4566 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4567 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4568 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4569 return Op;
4570 }
4571 case Intrinsic::loongarch_dbar: {
4572 unsigned Imm = Op2->getAsZExtVal();
4573 return !isUInt<15>(Imm)
4574 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4575 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4576 DAG.getConstant(Imm, DL, GRLenVT));
4577 }
4578 case Intrinsic::loongarch_ibar: {
4579 unsigned Imm = Op2->getAsZExtVal();
4580 return !isUInt<15>(Imm)
4581 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4582 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4583 DAG.getConstant(Imm, DL, GRLenVT));
4584 }
4585 case Intrinsic::loongarch_break: {
4586 unsigned Imm = Op2->getAsZExtVal();
4587 return !isUInt<15>(Imm)
4588 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4589 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4590 DAG.getConstant(Imm, DL, GRLenVT));
4591 }
4592 case Intrinsic::loongarch_movgr2fcsr: {
4593 if (!Subtarget.hasBasicF())
4594 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4595 unsigned Imm = Op2->getAsZExtVal();
4596 return !isUInt<2>(Imm)
4597 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4598 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4599 DAG.getConstant(Imm, DL, GRLenVT),
4600 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4601 Op.getOperand(3)));
4602 }
4603 case Intrinsic::loongarch_syscall: {
4604 unsigned Imm = Op2->getAsZExtVal();
4605 return !isUInt<15>(Imm)
4606 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4607 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4608 DAG.getConstant(Imm, DL, GRLenVT));
4609 }
4610#define IOCSRWR_CASE(NAME, NODE) \
4611 case Intrinsic::loongarch_##NAME: { \
4612 SDValue Op3 = Op.getOperand(3); \
4613 return Subtarget.is64Bit() \
4614 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4615 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4616 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4617 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4618 Op3); \
4619 }
4620 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4621 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4622 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4623#undef IOCSRWR_CASE
4624 case Intrinsic::loongarch_iocsrwr_d: {
4625 return !Subtarget.is64Bit()
4626 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4627 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4628 Op2,
4629 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4630 Op.getOperand(3)));
4631 }
4632#define ASRT_LE_GT_CASE(NAME) \
4633 case Intrinsic::loongarch_##NAME: { \
4634 return !Subtarget.is64Bit() \
4635 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4636 : Op; \
4637 }
4638 ASRT_LE_GT_CASE(asrtle_d)
4639 ASRT_LE_GT_CASE(asrtgt_d)
4640#undef ASRT_LE_GT_CASE
4641 case Intrinsic::loongarch_ldpte_d: {
4642 unsigned Imm = Op.getConstantOperandVal(3);
4643 return !Subtarget.is64Bit()
4644 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4645 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4646 : Op;
4647 }
4648 case Intrinsic::loongarch_lsx_vst:
4649 case Intrinsic::loongarch_lasx_xvst:
4650 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4651 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4652 : SDValue();
4653 case Intrinsic::loongarch_lasx_xvstelm_b:
4654 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4655 !isUInt<5>(Op.getConstantOperandVal(5)))
4656 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4657 : SDValue();
4658 case Intrinsic::loongarch_lsx_vstelm_b:
4659 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4660 !isUInt<4>(Op.getConstantOperandVal(5)))
4661 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4662 : SDValue();
4663 case Intrinsic::loongarch_lasx_xvstelm_h:
4664 return (!isShiftedInt<8, 1>(
4665 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4666 !isUInt<4>(Op.getConstantOperandVal(5)))
4668 Op, "argument out of range or not a multiple of 2", DAG)
4669 : SDValue();
4670 case Intrinsic::loongarch_lsx_vstelm_h:
4671 return (!isShiftedInt<8, 1>(
4672 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4673 !isUInt<3>(Op.getConstantOperandVal(5)))
4675 Op, "argument out of range or not a multiple of 2", DAG)
4676 : SDValue();
4677 case Intrinsic::loongarch_lasx_xvstelm_w:
4678 return (!isShiftedInt<8, 2>(
4679 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4680 !isUInt<3>(Op.getConstantOperandVal(5)))
4682 Op, "argument out of range or not a multiple of 4", DAG)
4683 : SDValue();
4684 case Intrinsic::loongarch_lsx_vstelm_w:
4685 return (!isShiftedInt<8, 2>(
4686 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4687 !isUInt<2>(Op.getConstantOperandVal(5)))
4689 Op, "argument out of range or not a multiple of 4", DAG)
4690 : SDValue();
4691 case Intrinsic::loongarch_lasx_xvstelm_d:
4692 return (!isShiftedInt<8, 3>(
4693 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4694 !isUInt<2>(Op.getConstantOperandVal(5)))
4696 Op, "argument out of range or not a multiple of 8", DAG)
4697 : SDValue();
4698 case Intrinsic::loongarch_lsx_vstelm_d:
4699 return (!isShiftedInt<8, 3>(
4700 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4701 !isUInt<1>(Op.getConstantOperandVal(5)))
4703 Op, "argument out of range or not a multiple of 8", DAG)
4704 : SDValue();
4705 }
4706}
4707
4708SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4709 SelectionDAG &DAG) const {
4710 SDLoc DL(Op);
4711 SDValue Lo = Op.getOperand(0);
4712 SDValue Hi = Op.getOperand(1);
4713 SDValue Shamt = Op.getOperand(2);
4714 EVT VT = Lo.getValueType();
4715
4716 // if Shamt-GRLen < 0: // Shamt < GRLen
4717 // Lo = Lo << Shamt
4718 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4719 // else:
4720 // Lo = 0
4721 // Hi = Lo << (Shamt-GRLen)
4722
4723 SDValue Zero = DAG.getConstant(0, DL, VT);
4724 SDValue One = DAG.getConstant(1, DL, VT);
4725 SDValue MinusGRLen =
4726 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4727 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4728 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4729 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4730
4731 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4732 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4733 SDValue ShiftRightLo =
4734 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4735 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4736 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4737 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4738
4739 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4740
4741 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4742 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4743
4744 SDValue Parts[2] = {Lo, Hi};
4745 return DAG.getMergeValues(Parts, DL);
4746}
4747
4748SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4749 SelectionDAG &DAG,
4750 bool IsSRA) const {
4751 SDLoc DL(Op);
4752 SDValue Lo = Op.getOperand(0);
4753 SDValue Hi = Op.getOperand(1);
4754 SDValue Shamt = Op.getOperand(2);
4755 EVT VT = Lo.getValueType();
4756
4757 // SRA expansion:
4758 // if Shamt-GRLen < 0: // Shamt < GRLen
4759 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4760 // Hi = Hi >>s Shamt
4761 // else:
4762 // Lo = Hi >>s (Shamt-GRLen);
4763 // Hi = Hi >>s (GRLen-1)
4764 //
4765 // SRL expansion:
4766 // if Shamt-GRLen < 0: // Shamt < GRLen
4767 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4768 // Hi = Hi >>u Shamt
4769 // else:
4770 // Lo = Hi >>u (Shamt-GRLen);
4771 // Hi = 0;
4772
4773 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4774
4775 SDValue Zero = DAG.getConstant(0, DL, VT);
4776 SDValue One = DAG.getConstant(1, DL, VT);
4777 SDValue MinusGRLen =
4778 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4779 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4780 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4781 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4782
4783 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4784 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4785 SDValue ShiftLeftHi =
4786 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4787 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4788 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4789 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4790 SDValue HiFalse =
4791 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4792
4793 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4794
4795 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4796 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4797
4798 SDValue Parts[2] = {Lo, Hi};
4799 return DAG.getMergeValues(Parts, DL);
4800}
4801
4802// Returns the opcode of the target-specific SDNode that implements the 32-bit
4803// form of the given Opcode.
4804static unsigned getLoongArchWOpcode(unsigned Opcode) {
4805 switch (Opcode) {
4806 default:
4807 llvm_unreachable("Unexpected opcode");
4808 case ISD::SDIV:
4809 return LoongArchISD::DIV_W;
4810 case ISD::UDIV:
4811 return LoongArchISD::DIV_WU;
4812 case ISD::SREM:
4813 return LoongArchISD::MOD_W;
4814 case ISD::UREM:
4815 return LoongArchISD::MOD_WU;
4816 case ISD::SHL:
4817 return LoongArchISD::SLL_W;
4818 case ISD::SRA:
4819 return LoongArchISD::SRA_W;
4820 case ISD::SRL:
4821 return LoongArchISD::SRL_W;
4822 case ISD::ROTL:
4823 case ISD::ROTR:
4824 return LoongArchISD::ROTR_W;
4825 case ISD::CTTZ:
4826 return LoongArchISD::CTZ_W;
4827 case ISD::CTLZ:
4828 return LoongArchISD::CLZ_W;
4829 }
4830}
4831
4832// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4833// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4834// otherwise be promoted to i64, making it difficult to select the
4835// SLL_W/.../*W later one because the fact the operation was originally of
4836// type i8/i16/i32 is lost.
4838 unsigned ExtOpc = ISD::ANY_EXTEND) {
4839 SDLoc DL(N);
4840 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
4841 SDValue NewOp0, NewRes;
4842
4843 switch (NumOp) {
4844 default:
4845 llvm_unreachable("Unexpected NumOp");
4846 case 1: {
4847 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4848 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4849 break;
4850 }
4851 case 2: {
4852 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4853 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4854 if (N->getOpcode() == ISD::ROTL) {
4855 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4856 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4857 }
4858 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4859 break;
4860 }
4861 // TODO:Handle more NumOp.
4862 }
4863
4864 // ReplaceNodeResults requires we maintain the same type for the return
4865 // value.
4866 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4867}
4868
4869// Converts the given 32-bit operation to a i64 operation with signed extension
4870// semantic to reduce the signed extension instructions.
4872 SDLoc DL(N);
4873 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4874 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4875 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4876 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4877 DAG.getValueType(MVT::i32));
4878 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4879}
4880
4881// Helper function that emits error message for intrinsics with/without chain
4882// and return a UNDEF or and the chain as the results.
4885 StringRef ErrorMsg, bool WithChain = true) {
4886 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4887 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4888 if (!WithChain)
4889 return;
4890 Results.push_back(N->getOperand(0));
4891}
4892
4893template <unsigned N>
4894static void
4896 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4897 unsigned ResOp) {
4898 const StringRef ErrorMsgOOR = "argument out of range";
4899 unsigned Imm = Node->getConstantOperandVal(2);
4900 if (!isUInt<N>(Imm)) {
4902 /*WithChain=*/false);
4903 return;
4904 }
4905 SDLoc DL(Node);
4906 SDValue Vec = Node->getOperand(1);
4907
4908 SDValue PickElt =
4909 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4910 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4912 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4913 PickElt.getValue(0)));
4914}
4915
4918 SelectionDAG &DAG,
4919 const LoongArchSubtarget &Subtarget,
4920 unsigned ResOp) {
4921 SDLoc DL(N);
4922 SDValue Vec = N->getOperand(1);
4923
4924 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4925 Results.push_back(
4926 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4927}
4928
4929static void
4931 SelectionDAG &DAG,
4932 const LoongArchSubtarget &Subtarget) {
4933 switch (N->getConstantOperandVal(0)) {
4934 default:
4935 llvm_unreachable("Unexpected Intrinsic.");
4936 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4937 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4938 LoongArchISD::VPICK_SEXT_ELT);
4939 break;
4940 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4941 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4942 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4943 LoongArchISD::VPICK_SEXT_ELT);
4944 break;
4945 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4946 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4947 LoongArchISD::VPICK_SEXT_ELT);
4948 break;
4949 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4950 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4951 LoongArchISD::VPICK_ZEXT_ELT);
4952 break;
4953 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4954 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4955 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4956 LoongArchISD::VPICK_ZEXT_ELT);
4957 break;
4958 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4959 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4960 LoongArchISD::VPICK_ZEXT_ELT);
4961 break;
4962 case Intrinsic::loongarch_lsx_bz_b:
4963 case Intrinsic::loongarch_lsx_bz_h:
4964 case Intrinsic::loongarch_lsx_bz_w:
4965 case Intrinsic::loongarch_lsx_bz_d:
4966 case Intrinsic::loongarch_lasx_xbz_b:
4967 case Intrinsic::loongarch_lasx_xbz_h:
4968 case Intrinsic::loongarch_lasx_xbz_w:
4969 case Intrinsic::loongarch_lasx_xbz_d:
4970 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4971 LoongArchISD::VALL_ZERO);
4972 break;
4973 case Intrinsic::loongarch_lsx_bz_v:
4974 case Intrinsic::loongarch_lasx_xbz_v:
4975 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4976 LoongArchISD::VANY_ZERO);
4977 break;
4978 case Intrinsic::loongarch_lsx_bnz_b:
4979 case Intrinsic::loongarch_lsx_bnz_h:
4980 case Intrinsic::loongarch_lsx_bnz_w:
4981 case Intrinsic::loongarch_lsx_bnz_d:
4982 case Intrinsic::loongarch_lasx_xbnz_b:
4983 case Intrinsic::loongarch_lasx_xbnz_h:
4984 case Intrinsic::loongarch_lasx_xbnz_w:
4985 case Intrinsic::loongarch_lasx_xbnz_d:
4986 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4987 LoongArchISD::VALL_NONZERO);
4988 break;
4989 case Intrinsic::loongarch_lsx_bnz_v:
4990 case Intrinsic::loongarch_lasx_xbnz_v:
4991 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4992 LoongArchISD::VANY_NONZERO);
4993 break;
4994 }
4995}
4996
4999 SelectionDAG &DAG) {
5000 assert(N->getValueType(0) == MVT::i128 &&
5001 "AtomicCmpSwap on types less than 128 should be legal");
5002 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
5003
5004 unsigned Opcode;
5005 switch (MemOp->getMergedOrdering()) {
5009 Opcode = LoongArch::PseudoCmpXchg128Acquire;
5010 break;
5013 Opcode = LoongArch::PseudoCmpXchg128;
5014 break;
5015 default:
5016 llvm_unreachable("Unexpected ordering!");
5017 }
5018
5019 SDLoc DL(N);
5020 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
5021 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
5022 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
5023 NewVal.first, NewVal.second, N->getOperand(0)};
5024
5025 SDNode *CmpSwap = DAG.getMachineNode(
5026 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
5027 Ops);
5028 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
5029 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
5030 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
5031 Results.push_back(SDValue(CmpSwap, 3));
5032}
5033
5036 SDLoc DL(N);
5037 EVT VT = N->getValueType(0);
5038 switch (N->getOpcode()) {
5039 default:
5040 llvm_unreachable("Don't know how to legalize this operation");
5041 case ISD::ADD:
5042 case ISD::SUB:
5043 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5044 "Unexpected custom legalisation");
5045 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
5046 break;
5047 case ISD::SDIV:
5048 case ISD::UDIV:
5049 case ISD::SREM:
5050 case ISD::UREM:
5051 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5052 "Unexpected custom legalisation");
5053 Results.push_back(customLegalizeToWOp(N, DAG, 2,
5054 Subtarget.hasDiv32() && VT == MVT::i32
5056 : ISD::SIGN_EXTEND));
5057 break;
5058 case ISD::SHL:
5059 case ISD::SRA:
5060 case ISD::SRL:
5061 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5062 "Unexpected custom legalisation");
5063 if (N->getOperand(1).getOpcode() != ISD::Constant) {
5064 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5065 break;
5066 }
5067 break;
5068 case ISD::ROTL:
5069 case ISD::ROTR:
5070 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5071 "Unexpected custom legalisation");
5072 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5073 break;
5074 case ISD::FP_TO_SINT: {
5075 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5076 "Unexpected custom legalisation");
5077 SDValue Src = N->getOperand(0);
5078 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
5079 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
5081 if (!isTypeLegal(Src.getValueType()))
5082 return;
5083 if (Src.getValueType() == MVT::f16)
5084 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
5085 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
5086 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
5087 return;
5088 }
5089 // If the FP type needs to be softened, emit a library call using the 'si'
5090 // version. If we left it to default legalization we'd end up with 'di'.
5091 RTLIB::Libcall LC;
5092 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
5093 MakeLibCallOptions CallOptions;
5094 EVT OpVT = Src.getValueType();
5095 CallOptions.setTypeListBeforeSoften(OpVT, VT);
5096 SDValue Chain = SDValue();
5097 SDValue Result;
5098 std::tie(Result, Chain) =
5099 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
5100 Results.push_back(Result);
5101 break;
5102 }
5103 case ISD::BITCAST: {
5104 SDValue Src = N->getOperand(0);
5105 EVT SrcVT = Src.getValueType();
5106 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5107 Subtarget.hasBasicF()) {
5108 SDValue Dst =
5109 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
5110 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
5111 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5112 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
5113 DAG.getVTList(MVT::i32, MVT::i32), Src);
5114 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5115 NewReg.getValue(0), NewReg.getValue(1));
5116 Results.push_back(RetReg);
5117 }
5118 break;
5119 }
5120 case ISD::FP_TO_UINT: {
5121 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5122 "Unexpected custom legalisation");
5123 auto &TLI = DAG.getTargetLoweringInfo();
5124 SDValue Tmp1, Tmp2;
5125 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
5126 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
5127 break;
5128 }
5129 case ISD::FP_ROUND: {
5130 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5131 "Unexpected custom legalisation");
5132 // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
5133 // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
5134 // a target-specific LoongArchISD::VFCVT to optimize it.
5135 SDValue Op0 = N->getOperand(0);
5136 EVT OpVT = Op0.getValueType();
5137 if (OpVT == MVT::v2f64) {
5138 SDValue Undef = DAG.getUNDEF(OpVT);
5139 SDValue Dst =
5140 DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Op0);
5141 Results.push_back(Dst);
5142 }
5143 break;
5144 }
5145 case ISD::BSWAP: {
5146 SDValue Src = N->getOperand(0);
5147 assert((VT == MVT::i16 || VT == MVT::i32) &&
5148 "Unexpected custom legalization");
5149 MVT GRLenVT = Subtarget.getGRLenVT();
5150 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5151 SDValue Tmp;
5152 switch (VT.getSizeInBits()) {
5153 default:
5154 llvm_unreachable("Unexpected operand width");
5155 case 16:
5156 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
5157 break;
5158 case 32:
5159 // Only LA64 will get to here due to the size mismatch between VT and
5160 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5161 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
5162 break;
5163 }
5164 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5165 break;
5166 }
5167 case ISD::BITREVERSE: {
5168 SDValue Src = N->getOperand(0);
5169 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5170 "Unexpected custom legalization");
5171 MVT GRLenVT = Subtarget.getGRLenVT();
5172 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5173 SDValue Tmp;
5174 switch (VT.getSizeInBits()) {
5175 default:
5176 llvm_unreachable("Unexpected operand width");
5177 case 8:
5178 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
5179 break;
5180 case 32:
5181 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
5182 break;
5183 }
5184 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5185 break;
5186 }
5187 case ISD::CTLZ:
5188 case ISD::CTTZ: {
5189 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5190 "Unexpected custom legalisation");
5191 Results.push_back(customLegalizeToWOp(N, DAG, 1));
5192 break;
5193 }
5195 SDValue Chain = N->getOperand(0);
5196 SDValue Op2 = N->getOperand(2);
5197 MVT GRLenVT = Subtarget.getGRLenVT();
5198 const StringRef ErrorMsgOOR = "argument out of range";
5199 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5200 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5201
5202 switch (N->getConstantOperandVal(1)) {
5203 default:
5204 llvm_unreachable("Unexpected Intrinsic.");
5205 case Intrinsic::loongarch_movfcsr2gr: {
5206 if (!Subtarget.hasBasicF()) {
5207 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5208 return;
5209 }
5210 unsigned Imm = Op2->getAsZExtVal();
5211 if (!isUInt<2>(Imm)) {
5212 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5213 return;
5214 }
5215 SDValue MOVFCSR2GRResults = DAG.getNode(
5216 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5217 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5218 Results.push_back(
5219 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5220 Results.push_back(MOVFCSR2GRResults.getValue(1));
5221 break;
5222 }
5223#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5224 case Intrinsic::loongarch_##NAME: { \
5225 SDValue NODE = DAG.getNode( \
5226 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5227 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5228 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5229 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5230 Results.push_back(NODE.getValue(1)); \
5231 break; \
5232 }
5233 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5234 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5235 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5236 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5237 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5238 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5239#undef CRC_CASE_EXT_BINARYOP
5240
5241#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5242 case Intrinsic::loongarch_##NAME: { \
5243 SDValue NODE = DAG.getNode( \
5244 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5245 {Chain, Op2, \
5246 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5247 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5248 Results.push_back(NODE.getValue(1)); \
5249 break; \
5250 }
5251 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5252 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5253#undef CRC_CASE_EXT_UNARYOP
5254#define CSR_CASE(ID) \
5255 case Intrinsic::loongarch_##ID: { \
5256 if (!Subtarget.is64Bit()) \
5257 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5258 break; \
5259 }
5260 CSR_CASE(csrrd_d);
5261 CSR_CASE(csrwr_d);
5262 CSR_CASE(csrxchg_d);
5263 CSR_CASE(iocsrrd_d);
5264#undef CSR_CASE
5265 case Intrinsic::loongarch_csrrd_w: {
5266 unsigned Imm = Op2->getAsZExtVal();
5267 if (!isUInt<14>(Imm)) {
5268 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5269 return;
5270 }
5271 SDValue CSRRDResults =
5272 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5273 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5274 Results.push_back(
5275 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5276 Results.push_back(CSRRDResults.getValue(1));
5277 break;
5278 }
5279 case Intrinsic::loongarch_csrwr_w: {
5280 unsigned Imm = N->getConstantOperandVal(3);
5281 if (!isUInt<14>(Imm)) {
5282 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5283 return;
5284 }
5285 SDValue CSRWRResults =
5286 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5287 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5288 DAG.getConstant(Imm, DL, GRLenVT)});
5289 Results.push_back(
5290 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5291 Results.push_back(CSRWRResults.getValue(1));
5292 break;
5293 }
5294 case Intrinsic::loongarch_csrxchg_w: {
5295 unsigned Imm = N->getConstantOperandVal(4);
5296 if (!isUInt<14>(Imm)) {
5297 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5298 return;
5299 }
5300 SDValue CSRXCHGResults = DAG.getNode(
5301 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5302 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5303 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5304 DAG.getConstant(Imm, DL, GRLenVT)});
5305 Results.push_back(
5306 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5307 Results.push_back(CSRXCHGResults.getValue(1));
5308 break;
5309 }
5310#define IOCSRRD_CASE(NAME, NODE) \
5311 case Intrinsic::loongarch_##NAME: { \
5312 SDValue IOCSRRDResults = \
5313 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5314 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5315 Results.push_back( \
5316 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5317 Results.push_back(IOCSRRDResults.getValue(1)); \
5318 break; \
5319 }
5320 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5321 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5322 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5323#undef IOCSRRD_CASE
5324 case Intrinsic::loongarch_cpucfg: {
5325 SDValue CPUCFGResults =
5326 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5327 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5328 Results.push_back(
5329 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5330 Results.push_back(CPUCFGResults.getValue(1));
5331 break;
5332 }
5333 case Intrinsic::loongarch_lddir_d: {
5334 if (!Subtarget.is64Bit()) {
5335 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5336 return;
5337 }
5338 break;
5339 }
5340 }
5341 break;
5342 }
5343 case ISD::READ_REGISTER: {
5344 if (Subtarget.is64Bit())
5345 DAG.getContext()->emitError(
5346 "On LA64, only 64-bit registers can be read.");
5347 else
5348 DAG.getContext()->emitError(
5349 "On LA32, only 32-bit registers can be read.");
5350 Results.push_back(DAG.getUNDEF(VT));
5351 Results.push_back(N->getOperand(0));
5352 break;
5353 }
5355 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5356 break;
5357 }
5358 case ISD::LROUND: {
5359 SDValue Op0 = N->getOperand(0);
5360 EVT OpVT = Op0.getValueType();
5361 RTLIB::Libcall LC =
5362 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5363 MakeLibCallOptions CallOptions;
5364 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5365 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5366 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5367 Results.push_back(Result);
5368 break;
5369 }
5370 case ISD::ATOMIC_CMP_SWAP: {
5372 break;
5373 }
5374 case ISD::TRUNCATE: {
5375 MVT VT = N->getSimpleValueType(0);
5376 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5377 return;
5378
5379 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5380 SDValue In = N->getOperand(0);
5381 EVT InVT = In.getValueType();
5382 EVT InEltVT = InVT.getVectorElementType();
5383 EVT EltVT = VT.getVectorElementType();
5384 unsigned MinElts = VT.getVectorNumElements();
5385 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5386 unsigned InBits = InVT.getSizeInBits();
5387
5388 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5389 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5390 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5391 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5392 for (unsigned I = 0; I < MinElts; ++I)
5393 TruncMask[I] = Scale * I;
5394
5395 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5396 MVT SVT = In.getSimpleValueType().getScalarType();
5397 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5398 SDValue WidenIn =
5399 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5400 DAG.getVectorIdxConstant(0, DL));
5401 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5402 "Illegal vector type in truncation");
5403 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5404 Results.push_back(
5405 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5406 return;
5407 }
5408 }
5409
5410 break;
5411 }
5412 }
5413}
5414
5415/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5417 SelectionDAG &DAG) {
5418 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5419
5420 MVT VT = N->getSimpleValueType(0);
5421 if (!VT.is128BitVector() && !VT.is256BitVector())
5422 return SDValue();
5423
5424 SDValue X, Y;
5425 SDValue N0 = N->getOperand(0);
5426 SDValue N1 = N->getOperand(1);
5427
5428 if (SDValue Not = isNOT(N0, DAG)) {
5429 X = Not;
5430 Y = N1;
5431 } else if (SDValue Not = isNOT(N1, DAG)) {
5432 X = Not;
5433 Y = N0;
5434 } else
5435 return SDValue();
5436
5437 X = DAG.getBitcast(VT, X);
5438 Y = DAG.getBitcast(VT, Y);
5439 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5440}
5441
5444 const LoongArchSubtarget &Subtarget) {
5445 if (DCI.isBeforeLegalizeOps())
5446 return SDValue();
5447
5448 SDValue FirstOperand = N->getOperand(0);
5449 SDValue SecondOperand = N->getOperand(1);
5450 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5451 EVT ValTy = N->getValueType(0);
5452 SDLoc DL(N);
5453 uint64_t lsb, msb;
5454 unsigned SMIdx, SMLen;
5455 ConstantSDNode *CN;
5456 SDValue NewOperand;
5457 MVT GRLenVT = Subtarget.getGRLenVT();
5458
5459 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5460 return R;
5461
5462 // BSTRPICK requires the 32S feature.
5463 if (!Subtarget.has32S())
5464 return SDValue();
5465
5466 // Op's second operand must be a shifted mask.
5467 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5468 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5469 return SDValue();
5470
5471 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5472 // Pattern match BSTRPICK.
5473 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5474 // => BSTRPICK $dst, $src, msb, lsb
5475 // where msb = lsb + len - 1
5476
5477 // The second operand of the shift must be an immediate.
5478 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5479 return SDValue();
5480
5481 lsb = CN->getZExtValue();
5482
5483 // Return if the shifted mask does not start at bit 0 or the sum of its
5484 // length and lsb exceeds the word's size.
5485 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5486 return SDValue();
5487
5488 NewOperand = FirstOperand.getOperand(0);
5489 } else {
5490 // Pattern match BSTRPICK.
5491 // $dst = and $src, (2**len- 1) , if len > 12
5492 // => BSTRPICK $dst, $src, msb, lsb
5493 // where lsb = 0 and msb = len - 1
5494
5495 // If the mask is <= 0xfff, andi can be used instead.
5496 if (CN->getZExtValue() <= 0xfff)
5497 return SDValue();
5498
5499 // Return if the MSB exceeds.
5500 if (SMIdx + SMLen > ValTy.getSizeInBits())
5501 return SDValue();
5502
5503 if (SMIdx > 0) {
5504 // Omit if the constant has more than 2 uses. This a conservative
5505 // decision. Whether it is a win depends on the HW microarchitecture.
5506 // However it should always be better for 1 and 2 uses.
5507 if (CN->use_size() > 2)
5508 return SDValue();
5509 // Return if the constant can be composed by a single LU12I.W.
5510 if ((CN->getZExtValue() & 0xfff) == 0)
5511 return SDValue();
5512 // Return if the constand can be composed by a single ADDI with
5513 // the zero register.
5514 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5515 return SDValue();
5516 }
5517
5518 lsb = SMIdx;
5519 NewOperand = FirstOperand;
5520 }
5521
5522 msb = lsb + SMLen - 1;
5523 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5524 DAG.getConstant(msb, DL, GRLenVT),
5525 DAG.getConstant(lsb, DL, GRLenVT));
5526 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5527 return NR0;
5528 // Try to optimize to
5529 // bstrpick $Rd, $Rs, msb, lsb
5530 // slli $Rd, $Rd, lsb
5531 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5532 DAG.getConstant(lsb, DL, GRLenVT));
5533}
5534
5537 const LoongArchSubtarget &Subtarget) {
5538 // BSTRPICK requires the 32S feature.
5539 if (!Subtarget.has32S())
5540 return SDValue();
5541
5542 if (DCI.isBeforeLegalizeOps())
5543 return SDValue();
5544
5545 // $dst = srl (and $src, Mask), Shamt
5546 // =>
5547 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5548 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5549 //
5550
5551 SDValue FirstOperand = N->getOperand(0);
5552 ConstantSDNode *CN;
5553 EVT ValTy = N->getValueType(0);
5554 SDLoc DL(N);
5555 MVT GRLenVT = Subtarget.getGRLenVT();
5556 unsigned MaskIdx, MaskLen;
5557 uint64_t Shamt;
5558
5559 // The first operand must be an AND and the second operand of the AND must be
5560 // a shifted mask.
5561 if (FirstOperand.getOpcode() != ISD::AND ||
5562 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5563 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5564 return SDValue();
5565
5566 // The second operand (shift amount) must be an immediate.
5567 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5568 return SDValue();
5569
5570 Shamt = CN->getZExtValue();
5571 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5572 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5573 FirstOperand->getOperand(0),
5574 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5575 DAG.getConstant(Shamt, DL, GRLenVT));
5576
5577 return SDValue();
5578}
5579
5580// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5581// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5582static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5583 unsigned Depth) {
5584 // Limit recursion.
5586 return false;
5587 switch (Src.getOpcode()) {
5588 case ISD::SETCC:
5589 case ISD::TRUNCATE:
5590 return Src.getOperand(0).getValueSizeInBits() == Size;
5591 case ISD::FREEZE:
5592 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5593 case ISD::AND:
5594 case ISD::XOR:
5595 case ISD::OR:
5596 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5597 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5598 case ISD::SELECT:
5599 case ISD::VSELECT:
5600 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5601 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5602 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5603 case ISD::BUILD_VECTOR:
5604 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5605 ISD::isBuildVectorAllOnes(Src.getNode());
5606 }
5607 return false;
5608}
5609
5610// Helper to push sign extension of vXi1 SETCC result through bitops.
5612 SDValue Src, const SDLoc &DL) {
5613 switch (Src.getOpcode()) {
5614 case ISD::SETCC:
5615 case ISD::FREEZE:
5616 case ISD::TRUNCATE:
5617 case ISD::BUILD_VECTOR:
5618 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5619 case ISD::AND:
5620 case ISD::XOR:
5621 case ISD::OR:
5622 return DAG.getNode(
5623 Src.getOpcode(), DL, SExtVT,
5624 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5625 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5626 case ISD::SELECT:
5627 case ISD::VSELECT:
5628 return DAG.getSelect(
5629 DL, SExtVT, Src.getOperand(0),
5630 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5631 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5632 }
5633 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5634}
5635
5636static SDValue
5639 const LoongArchSubtarget &Subtarget) {
5640 SDLoc DL(N);
5641 EVT VT = N->getValueType(0);
5642 SDValue Src = N->getOperand(0);
5643 EVT SrcVT = Src.getValueType();
5644
5645 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5646 return SDValue();
5647
5648 bool UseLASX;
5649 unsigned Opc = ISD::DELETED_NODE;
5650 EVT CmpVT = Src.getOperand(0).getValueType();
5651 EVT EltVT = CmpVT.getVectorElementType();
5652
5653 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5654 UseLASX = false;
5655 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5656 CmpVT.getSizeInBits() == 256)
5657 UseLASX = true;
5658 else
5659 return SDValue();
5660
5661 SDValue SrcN1 = Src.getOperand(1);
5662 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5663 default:
5664 break;
5665 case ISD::SETEQ:
5666 // x == 0 => not (vmsknez.b x)
5667 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5668 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5669 break;
5670 case ISD::SETGT:
5671 // x > -1 => vmskgez.b x
5672 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5673 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5674 break;
5675 case ISD::SETGE:
5676 // x >= 0 => vmskgez.b x
5677 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5678 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5679 break;
5680 case ISD::SETLT:
5681 // x < 0 => vmskltz.{b,h,w,d} x
5682 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5683 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5684 EltVT == MVT::i64))
5685 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5686 break;
5687 case ISD::SETLE:
5688 // x <= -1 => vmskltz.{b,h,w,d} x
5689 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5690 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5691 EltVT == MVT::i64))
5692 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5693 break;
5694 case ISD::SETNE:
5695 // x != 0 => vmsknez.b x
5696 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5697 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5698 break;
5699 }
5700
5701 if (Opc == ISD::DELETED_NODE)
5702 return SDValue();
5703
5704 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5706 V = DAG.getZExtOrTrunc(V, DL, T);
5707 return DAG.getBitcast(VT, V);
5708}
5709
5712 const LoongArchSubtarget &Subtarget) {
5713 SDLoc DL(N);
5714 EVT VT = N->getValueType(0);
5715 SDValue Src = N->getOperand(0);
5716 EVT SrcVT = Src.getValueType();
5717 MVT GRLenVT = Subtarget.getGRLenVT();
5718
5719 if (!DCI.isBeforeLegalizeOps())
5720 return SDValue();
5721
5722 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5723 return SDValue();
5724
5725 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5726 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5727 if (Res)
5728 return Res;
5729
5730 // Generate vXi1 using [X]VMSKLTZ
5731 MVT SExtVT;
5732 unsigned Opc;
5733 bool UseLASX = false;
5734 bool PropagateSExt = false;
5735
5736 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5737 EVT CmpVT = Src.getOperand(0).getValueType();
5738 if (CmpVT.getSizeInBits() > 256)
5739 return SDValue();
5740 }
5741
5742 switch (SrcVT.getSimpleVT().SimpleTy) {
5743 default:
5744 return SDValue();
5745 case MVT::v2i1:
5746 SExtVT = MVT::v2i64;
5747 break;
5748 case MVT::v4i1:
5749 SExtVT = MVT::v4i32;
5750 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5751 SExtVT = MVT::v4i64;
5752 UseLASX = true;
5753 PropagateSExt = true;
5754 }
5755 break;
5756 case MVT::v8i1:
5757 SExtVT = MVT::v8i16;
5758 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5759 SExtVT = MVT::v8i32;
5760 UseLASX = true;
5761 PropagateSExt = true;
5762 }
5763 break;
5764 case MVT::v16i1:
5765 SExtVT = MVT::v16i8;
5766 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5767 SExtVT = MVT::v16i16;
5768 UseLASX = true;
5769 PropagateSExt = true;
5770 }
5771 break;
5772 case MVT::v32i1:
5773 SExtVT = MVT::v32i8;
5774 UseLASX = true;
5775 break;
5776 };
5777 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5778 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5779
5780 SDValue V;
5781 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5782 if (Src.getSimpleValueType() == MVT::v32i8) {
5783 SDValue Lo, Hi;
5784 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5785 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5786 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5787 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5788 DAG.getShiftAmountConstant(16, GRLenVT, DL));
5789 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5790 } else if (UseLASX) {
5791 return SDValue();
5792 }
5793 }
5794
5795 if (!V) {
5796 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5797 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5798 }
5799
5801 V = DAG.getZExtOrTrunc(V, DL, T);
5802 return DAG.getBitcast(VT, V);
5803}
5804
5807 const LoongArchSubtarget &Subtarget) {
5808 MVT GRLenVT = Subtarget.getGRLenVT();
5809 EVT ValTy = N->getValueType(0);
5810 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5811 ConstantSDNode *CN0, *CN1;
5812 SDLoc DL(N);
5813 unsigned ValBits = ValTy.getSizeInBits();
5814 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5815 unsigned Shamt;
5816 bool SwapAndRetried = false;
5817
5818 // BSTRPICK requires the 32S feature.
5819 if (!Subtarget.has32S())
5820 return SDValue();
5821
5822 if (DCI.isBeforeLegalizeOps())
5823 return SDValue();
5824
5825 if (ValBits != 32 && ValBits != 64)
5826 return SDValue();
5827
5828Retry:
5829 // 1st pattern to match BSTRINS:
5830 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5831 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5832 // =>
5833 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5834 if (N0.getOpcode() == ISD::AND &&
5835 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5836 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5837 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5838 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5839 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5840 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5841 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5842 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5843 (MaskIdx0 + MaskLen0 <= ValBits)) {
5844 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5845 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5846 N1.getOperand(0).getOperand(0),
5847 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5848 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5849 }
5850
5851 // 2nd pattern to match BSTRINS:
5852 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5853 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5854 // =>
5855 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5856 if (N0.getOpcode() == ISD::AND &&
5857 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5858 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5859 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5860 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5861 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5862 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5863 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5864 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5865 (MaskIdx0 + MaskLen0 <= ValBits)) {
5866 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5867 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5868 N1.getOperand(0).getOperand(0),
5869 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5870 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5871 }
5872
5873 // 3rd pattern to match BSTRINS:
5874 // R = or (and X, mask0), (and Y, mask1)
5875 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5876 // =>
5877 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5878 // where msb = lsb + size - 1
5879 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5880 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5881 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5882 (MaskIdx0 + MaskLen0 <= 64) &&
5883 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5884 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5885 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5886 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5887 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5888 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5889 DAG.getConstant(ValBits == 32
5890 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5891 : (MaskIdx0 + MaskLen0 - 1),
5892 DL, GRLenVT),
5893 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5894 }
5895
5896 // 4th pattern to match BSTRINS:
5897 // R = or (and X, mask), (shl Y, shamt)
5898 // where mask = (2**shamt - 1)
5899 // =>
5900 // R = BSTRINS X, Y, ValBits - 1, shamt
5901 // where ValBits = 32 or 64
5902 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5903 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5904 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5905 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5906 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5907 (MaskIdx0 + MaskLen0 <= ValBits)) {
5908 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5909 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5910 N1.getOperand(0),
5911 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5912 DAG.getConstant(Shamt, DL, GRLenVT));
5913 }
5914
5915 // 5th pattern to match BSTRINS:
5916 // R = or (and X, mask), const
5917 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5918 // =>
5919 // R = BSTRINS X, (const >> lsb), msb, lsb
5920 // where msb = lsb + size - 1
5921 if (N0.getOpcode() == ISD::AND &&
5922 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5923 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5924 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5925 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5926 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5927 return DAG.getNode(
5928 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5929 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5930 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5931 : (MaskIdx0 + MaskLen0 - 1),
5932 DL, GRLenVT),
5933 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5934 }
5935
5936 // 6th pattern.
5937 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5938 // by the incoming bits are known to be zero.
5939 // =>
5940 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5941 //
5942 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5943 // pattern is more common than the 1st. So we put the 1st before the 6th in
5944 // order to match as many nodes as possible.
5945 ConstantSDNode *CNMask, *CNShamt;
5946 unsigned MaskIdx, MaskLen;
5947 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5948 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5949 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5950 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5951 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5952 Shamt = CNShamt->getZExtValue();
5953 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5954 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5955 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5956 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5957 N1.getOperand(0).getOperand(0),
5958 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5959 DAG.getConstant(Shamt, DL, GRLenVT));
5960 }
5961 }
5962
5963 // 7th pattern.
5964 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5965 // overwritten by the incoming bits are known to be zero.
5966 // =>
5967 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5968 //
5969 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5970 // before the 7th in order to match as many nodes as possible.
5971 if (N1.getOpcode() == ISD::AND &&
5972 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5973 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5974 N1.getOperand(0).getOpcode() == ISD::SHL &&
5975 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5976 CNShamt->getZExtValue() == MaskIdx) {
5977 APInt ShMask(ValBits, CNMask->getZExtValue());
5978 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5979 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5980 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5981 N1.getOperand(0).getOperand(0),
5982 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5983 DAG.getConstant(MaskIdx, DL, GRLenVT));
5984 }
5985 }
5986
5987 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5988 if (!SwapAndRetried) {
5989 std::swap(N0, N1);
5990 SwapAndRetried = true;
5991 goto Retry;
5992 }
5993
5994 SwapAndRetried = false;
5995Retry2:
5996 // 8th pattern.
5997 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5998 // the incoming bits are known to be zero.
5999 // =>
6000 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
6001 //
6002 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
6003 // we put it here in order to match as many nodes as possible or generate less
6004 // instructions.
6005 if (N1.getOpcode() == ISD::AND &&
6006 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6007 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
6008 APInt ShMask(ValBits, CNMask->getZExtValue());
6009 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6010 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
6011 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6012 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
6013 N1->getOperand(0),
6014 DAG.getConstant(MaskIdx, DL, GRLenVT)),
6015 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6016 DAG.getConstant(MaskIdx, DL, GRLenVT));
6017 }
6018 }
6019 // Swap N0/N1 and retry.
6020 if (!SwapAndRetried) {
6021 std::swap(N0, N1);
6022 SwapAndRetried = true;
6023 goto Retry2;
6024 }
6025
6026 return SDValue();
6027}
6028
6029static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
6030 ExtType = ISD::NON_EXTLOAD;
6031
6032 switch (V.getNode()->getOpcode()) {
6033 case ISD::LOAD: {
6034 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
6035 if ((LoadNode->getMemoryVT() == MVT::i8) ||
6036 (LoadNode->getMemoryVT() == MVT::i16)) {
6037 ExtType = LoadNode->getExtensionType();
6038 return true;
6039 }
6040 return false;
6041 }
6042 case ISD::AssertSext: {
6043 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6044 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6045 ExtType = ISD::SEXTLOAD;
6046 return true;
6047 }
6048 return false;
6049 }
6050 case ISD::AssertZext: {
6051 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6052 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6053 ExtType = ISD::ZEXTLOAD;
6054 return true;
6055 }
6056 return false;
6057 }
6058 default:
6059 return false;
6060 }
6061
6062 return false;
6063}
6064
6065// Eliminate redundant truncation and zero-extension nodes.
6066// * Case 1:
6067// +------------+ +------------+ +------------+
6068// | Input1 | | Input2 | | CC |
6069// +------------+ +------------+ +------------+
6070// | | |
6071// V V +----+
6072// +------------+ +------------+ |
6073// | TRUNCATE | | TRUNCATE | |
6074// +------------+ +------------+ |
6075// | | |
6076// V V |
6077// +------------+ +------------+ |
6078// | ZERO_EXT | | ZERO_EXT | |
6079// +------------+ +------------+ |
6080// | | |
6081// | +-------------+ |
6082// V V | |
6083// +----------------+ | |
6084// | AND | | |
6085// +----------------+ | |
6086// | | |
6087// +---------------+ | |
6088// | | |
6089// V V V
6090// +-------------+
6091// | CMP |
6092// +-------------+
6093// * Case 2:
6094// +------------+ +------------+ +-------------+ +------------+ +------------+
6095// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
6096// +------------+ +------------+ +-------------+ +------------+ +------------+
6097// | | | | |
6098// V | | | |
6099// +------------+ | | | |
6100// | XOR |<---------------------+ | |
6101// +------------+ | | |
6102// | | | |
6103// V V +---------------+ |
6104// +------------+ +------------+ | |
6105// | TRUNCATE | | TRUNCATE | | +-------------------------+
6106// +------------+ +------------+ | |
6107// | | | |
6108// V V | |
6109// +------------+ +------------+ | |
6110// | ZERO_EXT | | ZERO_EXT | | |
6111// +------------+ +------------+ | |
6112// | | | |
6113// V V | |
6114// +----------------+ | |
6115// | AND | | |
6116// +----------------+ | |
6117// | | |
6118// +---------------+ | |
6119// | | |
6120// V V V
6121// +-------------+
6122// | CMP |
6123// +-------------+
6126 const LoongArchSubtarget &Subtarget) {
6127 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
6128
6129 SDNode *AndNode = N->getOperand(0).getNode();
6130 if (AndNode->getOpcode() != ISD::AND)
6131 return SDValue();
6132
6133 SDValue AndInputValue2 = AndNode->getOperand(1);
6134 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
6135 return SDValue();
6136
6137 SDValue CmpInputValue = N->getOperand(1);
6138 SDValue AndInputValue1 = AndNode->getOperand(0);
6139 if (AndInputValue1.getOpcode() == ISD::XOR) {
6140 if (CC != ISD::SETEQ && CC != ISD::SETNE)
6141 return SDValue();
6142 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
6143 if (!CN || !CN->isAllOnes())
6144 return SDValue();
6145 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
6146 if (!CN || !CN->isZero())
6147 return SDValue();
6148 AndInputValue1 = AndInputValue1.getOperand(0);
6149 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
6150 return SDValue();
6151 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
6152 if (AndInputValue2 != CmpInputValue)
6153 return SDValue();
6154 } else {
6155 return SDValue();
6156 }
6157
6158 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
6159 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
6160 return SDValue();
6161
6162 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
6163 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
6164 return SDValue();
6165
6166 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
6167 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
6168 ISD::LoadExtType ExtType1;
6169 ISD::LoadExtType ExtType2;
6170
6171 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
6172 !checkValueWidth(TruncInputValue2, ExtType2))
6173 return SDValue();
6174
6175 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
6176 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
6177 return SDValue();
6178
6179 if ((ExtType2 != ISD::ZEXTLOAD) &&
6180 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
6181 return SDValue();
6182
6183 // These truncation and zero-extension nodes are not necessary, remove them.
6184 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
6185 TruncInputValue1, TruncInputValue2);
6186 SDValue NewSetCC =
6187 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
6188 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
6189 return SDValue(N, 0);
6190}
6191
6192// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
6195 const LoongArchSubtarget &Subtarget) {
6196 if (DCI.isBeforeLegalizeOps())
6197 return SDValue();
6198
6199 SDValue Src = N->getOperand(0);
6200 if (Src.getOpcode() != LoongArchISD::REVB_2W)
6201 return SDValue();
6202
6203 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
6204 Src.getOperand(0));
6205}
6206
6207// Perform common combines for BR_CC and SELECT_CC conditions.
6208static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6209 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6210 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6211
6212 // As far as arithmetic right shift always saves the sign,
6213 // shift can be omitted.
6214 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6215 // setge (sra X, N), 0 -> setge X, 0
6216 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6217 LHS.getOpcode() == ISD::SRA) {
6218 LHS = LHS.getOperand(0);
6219 return true;
6220 }
6221
6222 if (!ISD::isIntEqualitySetCC(CCVal))
6223 return false;
6224
6225 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6226 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6227 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6228 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6229 // If we're looking for eq 0 instead of ne 0, we need to invert the
6230 // condition.
6231 bool Invert = CCVal == ISD::SETEQ;
6232 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6233 if (Invert)
6234 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6235
6236 RHS = LHS.getOperand(1);
6237 LHS = LHS.getOperand(0);
6238 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6239
6240 CC = DAG.getCondCode(CCVal);
6241 return true;
6242 }
6243
6244 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6245 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6246 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6247 SDValue LHS0 = LHS.getOperand(0);
6248 if (LHS0.getOpcode() == ISD::AND &&
6249 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6250 uint64_t Mask = LHS0.getConstantOperandVal(1);
6251 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6252 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6253 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6254 CC = DAG.getCondCode(CCVal);
6255
6256 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6257 LHS = LHS0.getOperand(0);
6258 if (ShAmt != 0)
6259 LHS =
6260 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6261 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6262 return true;
6263 }
6264 }
6265 }
6266
6267 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6268 // This can occur when legalizing some floating point comparisons.
6269 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6270 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6271 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6272 CC = DAG.getCondCode(CCVal);
6273 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6274 return true;
6275 }
6276
6277 return false;
6278}
6279
6282 const LoongArchSubtarget &Subtarget) {
6283 SDValue LHS = N->getOperand(1);
6284 SDValue RHS = N->getOperand(2);
6285 SDValue CC = N->getOperand(3);
6286 SDLoc DL(N);
6287
6288 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6289 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6290 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6291
6292 return SDValue();
6293}
6294
6297 const LoongArchSubtarget &Subtarget) {
6298 // Transform
6299 SDValue LHS = N->getOperand(0);
6300 SDValue RHS = N->getOperand(1);
6301 SDValue CC = N->getOperand(2);
6302 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6303 SDValue TrueV = N->getOperand(3);
6304 SDValue FalseV = N->getOperand(4);
6305 SDLoc DL(N);
6306 EVT VT = N->getValueType(0);
6307
6308 // If the True and False values are the same, we don't need a select_cc.
6309 if (TrueV == FalseV)
6310 return TrueV;
6311
6312 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6313 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6314 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6316 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6317 if (CCVal == ISD::CondCode::SETGE)
6318 std::swap(TrueV, FalseV);
6319
6320 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6321 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6322 // Only handle simm12, if it is not in this range, it can be considered as
6323 // register.
6324 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6325 isInt<12>(TrueSImm - FalseSImm)) {
6326 SDValue SRA =
6327 DAG.getNode(ISD::SRA, DL, VT, LHS,
6328 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6329 SDValue AND =
6330 DAG.getNode(ISD::AND, DL, VT, SRA,
6331 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6332 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6333 }
6334
6335 if (CCVal == ISD::CondCode::SETGE)
6336 std::swap(TrueV, FalseV);
6337 }
6338
6339 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6340 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6341 {LHS, RHS, CC, TrueV, FalseV});
6342
6343 return SDValue();
6344}
6345
6346template <unsigned N>
6348 SelectionDAG &DAG,
6349 const LoongArchSubtarget &Subtarget,
6350 bool IsSigned = false) {
6351 SDLoc DL(Node);
6352 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6353 // Check the ImmArg.
6354 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6355 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6356 DAG.getContext()->emitError(Node->getOperationName(0) +
6357 ": argument out of range.");
6358 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6359 }
6360 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6361}
6362
6363template <unsigned N>
6364static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6365 SelectionDAG &DAG, bool IsSigned = false) {
6366 SDLoc DL(Node);
6367 EVT ResTy = Node->getValueType(0);
6368 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6369
6370 // Check the ImmArg.
6371 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6372 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6373 DAG.getContext()->emitError(Node->getOperationName(0) +
6374 ": argument out of range.");
6375 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6376 }
6377 return DAG.getConstant(
6379 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6380 DL, ResTy);
6381}
6382
6384 SDLoc DL(Node);
6385 EVT ResTy = Node->getValueType(0);
6386 SDValue Vec = Node->getOperand(2);
6387 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6388 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6389}
6390
6392 SDLoc DL(Node);
6393 EVT ResTy = Node->getValueType(0);
6394 SDValue One = DAG.getConstant(1, DL, ResTy);
6395 SDValue Bit =
6396 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6397
6398 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6399 DAG.getNOT(DL, Bit, ResTy));
6400}
6401
6402template <unsigned N>
6404 SDLoc DL(Node);
6405 EVT ResTy = Node->getValueType(0);
6406 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6407 // Check the unsigned ImmArg.
6408 if (!isUInt<N>(CImm->getZExtValue())) {
6409 DAG.getContext()->emitError(Node->getOperationName(0) +
6410 ": argument out of range.");
6411 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6412 }
6413
6414 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6415 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6416
6417 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6418}
6419
6420template <unsigned N>
6422 SDLoc DL(Node);
6423 EVT ResTy = Node->getValueType(0);
6424 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6425 // Check the unsigned ImmArg.
6426 if (!isUInt<N>(CImm->getZExtValue())) {
6427 DAG.getContext()->emitError(Node->getOperationName(0) +
6428 ": argument out of range.");
6429 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6430 }
6431
6432 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6433 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6434 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6435}
6436
6437template <unsigned N>
6439 SDLoc DL(Node);
6440 EVT ResTy = Node->getValueType(0);
6441 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6442 // Check the unsigned ImmArg.
6443 if (!isUInt<N>(CImm->getZExtValue())) {
6444 DAG.getContext()->emitError(Node->getOperationName(0) +
6445 ": argument out of range.");
6446 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6447 }
6448
6449 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6450 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6451 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6452}
6453
6454template <unsigned W>
6456 unsigned ResOp) {
6457 unsigned Imm = N->getConstantOperandVal(2);
6458 if (!isUInt<W>(Imm)) {
6459 const StringRef ErrorMsg = "argument out of range";
6460 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6461 return DAG.getUNDEF(N->getValueType(0));
6462 }
6463 SDLoc DL(N);
6464 SDValue Vec = N->getOperand(1);
6465 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6467 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6468}
6469
6470static SDValue
6473 const LoongArchSubtarget &Subtarget) {
6474 SDLoc DL(N);
6475 switch (N->getConstantOperandVal(0)) {
6476 default:
6477 break;
6478 case Intrinsic::loongarch_lsx_vadd_b:
6479 case Intrinsic::loongarch_lsx_vadd_h:
6480 case Intrinsic::loongarch_lsx_vadd_w:
6481 case Intrinsic::loongarch_lsx_vadd_d:
6482 case Intrinsic::loongarch_lasx_xvadd_b:
6483 case Intrinsic::loongarch_lasx_xvadd_h:
6484 case Intrinsic::loongarch_lasx_xvadd_w:
6485 case Intrinsic::loongarch_lasx_xvadd_d:
6486 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6487 N->getOperand(2));
6488 case Intrinsic::loongarch_lsx_vaddi_bu:
6489 case Intrinsic::loongarch_lsx_vaddi_hu:
6490 case Intrinsic::loongarch_lsx_vaddi_wu:
6491 case Intrinsic::loongarch_lsx_vaddi_du:
6492 case Intrinsic::loongarch_lasx_xvaddi_bu:
6493 case Intrinsic::loongarch_lasx_xvaddi_hu:
6494 case Intrinsic::loongarch_lasx_xvaddi_wu:
6495 case Intrinsic::loongarch_lasx_xvaddi_du:
6496 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6497 lowerVectorSplatImm<5>(N, 2, DAG));
6498 case Intrinsic::loongarch_lsx_vsub_b:
6499 case Intrinsic::loongarch_lsx_vsub_h:
6500 case Intrinsic::loongarch_lsx_vsub_w:
6501 case Intrinsic::loongarch_lsx_vsub_d:
6502 case Intrinsic::loongarch_lasx_xvsub_b:
6503 case Intrinsic::loongarch_lasx_xvsub_h:
6504 case Intrinsic::loongarch_lasx_xvsub_w:
6505 case Intrinsic::loongarch_lasx_xvsub_d:
6506 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6507 N->getOperand(2));
6508 case Intrinsic::loongarch_lsx_vsubi_bu:
6509 case Intrinsic::loongarch_lsx_vsubi_hu:
6510 case Intrinsic::loongarch_lsx_vsubi_wu:
6511 case Intrinsic::loongarch_lsx_vsubi_du:
6512 case Intrinsic::loongarch_lasx_xvsubi_bu:
6513 case Intrinsic::loongarch_lasx_xvsubi_hu:
6514 case Intrinsic::loongarch_lasx_xvsubi_wu:
6515 case Intrinsic::loongarch_lasx_xvsubi_du:
6516 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6517 lowerVectorSplatImm<5>(N, 2, DAG));
6518 case Intrinsic::loongarch_lsx_vneg_b:
6519 case Intrinsic::loongarch_lsx_vneg_h:
6520 case Intrinsic::loongarch_lsx_vneg_w:
6521 case Intrinsic::loongarch_lsx_vneg_d:
6522 case Intrinsic::loongarch_lasx_xvneg_b:
6523 case Intrinsic::loongarch_lasx_xvneg_h:
6524 case Intrinsic::loongarch_lasx_xvneg_w:
6525 case Intrinsic::loongarch_lasx_xvneg_d:
6526 return DAG.getNode(
6527 ISD::SUB, DL, N->getValueType(0),
6528 DAG.getConstant(
6529 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6530 /*isSigned=*/true),
6531 SDLoc(N), N->getValueType(0)),
6532 N->getOperand(1));
6533 case Intrinsic::loongarch_lsx_vmax_b:
6534 case Intrinsic::loongarch_lsx_vmax_h:
6535 case Intrinsic::loongarch_lsx_vmax_w:
6536 case Intrinsic::loongarch_lsx_vmax_d:
6537 case Intrinsic::loongarch_lasx_xvmax_b:
6538 case Intrinsic::loongarch_lasx_xvmax_h:
6539 case Intrinsic::loongarch_lasx_xvmax_w:
6540 case Intrinsic::loongarch_lasx_xvmax_d:
6541 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6542 N->getOperand(2));
6543 case Intrinsic::loongarch_lsx_vmax_bu:
6544 case Intrinsic::loongarch_lsx_vmax_hu:
6545 case Intrinsic::loongarch_lsx_vmax_wu:
6546 case Intrinsic::loongarch_lsx_vmax_du:
6547 case Intrinsic::loongarch_lasx_xvmax_bu:
6548 case Intrinsic::loongarch_lasx_xvmax_hu:
6549 case Intrinsic::loongarch_lasx_xvmax_wu:
6550 case Intrinsic::loongarch_lasx_xvmax_du:
6551 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6552 N->getOperand(2));
6553 case Intrinsic::loongarch_lsx_vmaxi_b:
6554 case Intrinsic::loongarch_lsx_vmaxi_h:
6555 case Intrinsic::loongarch_lsx_vmaxi_w:
6556 case Intrinsic::loongarch_lsx_vmaxi_d:
6557 case Intrinsic::loongarch_lasx_xvmaxi_b:
6558 case Intrinsic::loongarch_lasx_xvmaxi_h:
6559 case Intrinsic::loongarch_lasx_xvmaxi_w:
6560 case Intrinsic::loongarch_lasx_xvmaxi_d:
6561 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6562 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6563 case Intrinsic::loongarch_lsx_vmaxi_bu:
6564 case Intrinsic::loongarch_lsx_vmaxi_hu:
6565 case Intrinsic::loongarch_lsx_vmaxi_wu:
6566 case Intrinsic::loongarch_lsx_vmaxi_du:
6567 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6568 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6569 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6570 case Intrinsic::loongarch_lasx_xvmaxi_du:
6571 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6572 lowerVectorSplatImm<5>(N, 2, DAG));
6573 case Intrinsic::loongarch_lsx_vmin_b:
6574 case Intrinsic::loongarch_lsx_vmin_h:
6575 case Intrinsic::loongarch_lsx_vmin_w:
6576 case Intrinsic::loongarch_lsx_vmin_d:
6577 case Intrinsic::loongarch_lasx_xvmin_b:
6578 case Intrinsic::loongarch_lasx_xvmin_h:
6579 case Intrinsic::loongarch_lasx_xvmin_w:
6580 case Intrinsic::loongarch_lasx_xvmin_d:
6581 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6582 N->getOperand(2));
6583 case Intrinsic::loongarch_lsx_vmin_bu:
6584 case Intrinsic::loongarch_lsx_vmin_hu:
6585 case Intrinsic::loongarch_lsx_vmin_wu:
6586 case Intrinsic::loongarch_lsx_vmin_du:
6587 case Intrinsic::loongarch_lasx_xvmin_bu:
6588 case Intrinsic::loongarch_lasx_xvmin_hu:
6589 case Intrinsic::loongarch_lasx_xvmin_wu:
6590 case Intrinsic::loongarch_lasx_xvmin_du:
6591 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6592 N->getOperand(2));
6593 case Intrinsic::loongarch_lsx_vmini_b:
6594 case Intrinsic::loongarch_lsx_vmini_h:
6595 case Intrinsic::loongarch_lsx_vmini_w:
6596 case Intrinsic::loongarch_lsx_vmini_d:
6597 case Intrinsic::loongarch_lasx_xvmini_b:
6598 case Intrinsic::loongarch_lasx_xvmini_h:
6599 case Intrinsic::loongarch_lasx_xvmini_w:
6600 case Intrinsic::loongarch_lasx_xvmini_d:
6601 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6602 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6603 case Intrinsic::loongarch_lsx_vmini_bu:
6604 case Intrinsic::loongarch_lsx_vmini_hu:
6605 case Intrinsic::loongarch_lsx_vmini_wu:
6606 case Intrinsic::loongarch_lsx_vmini_du:
6607 case Intrinsic::loongarch_lasx_xvmini_bu:
6608 case Intrinsic::loongarch_lasx_xvmini_hu:
6609 case Intrinsic::loongarch_lasx_xvmini_wu:
6610 case Intrinsic::loongarch_lasx_xvmini_du:
6611 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6612 lowerVectorSplatImm<5>(N, 2, DAG));
6613 case Intrinsic::loongarch_lsx_vmul_b:
6614 case Intrinsic::loongarch_lsx_vmul_h:
6615 case Intrinsic::loongarch_lsx_vmul_w:
6616 case Intrinsic::loongarch_lsx_vmul_d:
6617 case Intrinsic::loongarch_lasx_xvmul_b:
6618 case Intrinsic::loongarch_lasx_xvmul_h:
6619 case Intrinsic::loongarch_lasx_xvmul_w:
6620 case Intrinsic::loongarch_lasx_xvmul_d:
6621 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6622 N->getOperand(2));
6623 case Intrinsic::loongarch_lsx_vmadd_b:
6624 case Intrinsic::loongarch_lsx_vmadd_h:
6625 case Intrinsic::loongarch_lsx_vmadd_w:
6626 case Intrinsic::loongarch_lsx_vmadd_d:
6627 case Intrinsic::loongarch_lasx_xvmadd_b:
6628 case Intrinsic::loongarch_lasx_xvmadd_h:
6629 case Intrinsic::loongarch_lasx_xvmadd_w:
6630 case Intrinsic::loongarch_lasx_xvmadd_d: {
6631 EVT ResTy = N->getValueType(0);
6632 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6633 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6634 N->getOperand(3)));
6635 }
6636 case Intrinsic::loongarch_lsx_vmsub_b:
6637 case Intrinsic::loongarch_lsx_vmsub_h:
6638 case Intrinsic::loongarch_lsx_vmsub_w:
6639 case Intrinsic::loongarch_lsx_vmsub_d:
6640 case Intrinsic::loongarch_lasx_xvmsub_b:
6641 case Intrinsic::loongarch_lasx_xvmsub_h:
6642 case Intrinsic::loongarch_lasx_xvmsub_w:
6643 case Intrinsic::loongarch_lasx_xvmsub_d: {
6644 EVT ResTy = N->getValueType(0);
6645 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6646 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6647 N->getOperand(3)));
6648 }
6649 case Intrinsic::loongarch_lsx_vdiv_b:
6650 case Intrinsic::loongarch_lsx_vdiv_h:
6651 case Intrinsic::loongarch_lsx_vdiv_w:
6652 case Intrinsic::loongarch_lsx_vdiv_d:
6653 case Intrinsic::loongarch_lasx_xvdiv_b:
6654 case Intrinsic::loongarch_lasx_xvdiv_h:
6655 case Intrinsic::loongarch_lasx_xvdiv_w:
6656 case Intrinsic::loongarch_lasx_xvdiv_d:
6657 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6658 N->getOperand(2));
6659 case Intrinsic::loongarch_lsx_vdiv_bu:
6660 case Intrinsic::loongarch_lsx_vdiv_hu:
6661 case Intrinsic::loongarch_lsx_vdiv_wu:
6662 case Intrinsic::loongarch_lsx_vdiv_du:
6663 case Intrinsic::loongarch_lasx_xvdiv_bu:
6664 case Intrinsic::loongarch_lasx_xvdiv_hu:
6665 case Intrinsic::loongarch_lasx_xvdiv_wu:
6666 case Intrinsic::loongarch_lasx_xvdiv_du:
6667 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6668 N->getOperand(2));
6669 case Intrinsic::loongarch_lsx_vmod_b:
6670 case Intrinsic::loongarch_lsx_vmod_h:
6671 case Intrinsic::loongarch_lsx_vmod_w:
6672 case Intrinsic::loongarch_lsx_vmod_d:
6673 case Intrinsic::loongarch_lasx_xvmod_b:
6674 case Intrinsic::loongarch_lasx_xvmod_h:
6675 case Intrinsic::loongarch_lasx_xvmod_w:
6676 case Intrinsic::loongarch_lasx_xvmod_d:
6677 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6678 N->getOperand(2));
6679 case Intrinsic::loongarch_lsx_vmod_bu:
6680 case Intrinsic::loongarch_lsx_vmod_hu:
6681 case Intrinsic::loongarch_lsx_vmod_wu:
6682 case Intrinsic::loongarch_lsx_vmod_du:
6683 case Intrinsic::loongarch_lasx_xvmod_bu:
6684 case Intrinsic::loongarch_lasx_xvmod_hu:
6685 case Intrinsic::loongarch_lasx_xvmod_wu:
6686 case Intrinsic::loongarch_lasx_xvmod_du:
6687 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6688 N->getOperand(2));
6689 case Intrinsic::loongarch_lsx_vand_v:
6690 case Intrinsic::loongarch_lasx_xvand_v:
6691 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6692 N->getOperand(2));
6693 case Intrinsic::loongarch_lsx_vor_v:
6694 case Intrinsic::loongarch_lasx_xvor_v:
6695 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6696 N->getOperand(2));
6697 case Intrinsic::loongarch_lsx_vxor_v:
6698 case Intrinsic::loongarch_lasx_xvxor_v:
6699 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6700 N->getOperand(2));
6701 case Intrinsic::loongarch_lsx_vnor_v:
6702 case Intrinsic::loongarch_lasx_xvnor_v: {
6703 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6704 N->getOperand(2));
6705 return DAG.getNOT(DL, Res, Res->getValueType(0));
6706 }
6707 case Intrinsic::loongarch_lsx_vandi_b:
6708 case Intrinsic::loongarch_lasx_xvandi_b:
6709 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6710 lowerVectorSplatImm<8>(N, 2, DAG));
6711 case Intrinsic::loongarch_lsx_vori_b:
6712 case Intrinsic::loongarch_lasx_xvori_b:
6713 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6714 lowerVectorSplatImm<8>(N, 2, DAG));
6715 case Intrinsic::loongarch_lsx_vxori_b:
6716 case Intrinsic::loongarch_lasx_xvxori_b:
6717 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6718 lowerVectorSplatImm<8>(N, 2, DAG));
6719 case Intrinsic::loongarch_lsx_vsll_b:
6720 case Intrinsic::loongarch_lsx_vsll_h:
6721 case Intrinsic::loongarch_lsx_vsll_w:
6722 case Intrinsic::loongarch_lsx_vsll_d:
6723 case Intrinsic::loongarch_lasx_xvsll_b:
6724 case Intrinsic::loongarch_lasx_xvsll_h:
6725 case Intrinsic::loongarch_lasx_xvsll_w:
6726 case Intrinsic::loongarch_lasx_xvsll_d:
6727 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6728 truncateVecElts(N, DAG));
6729 case Intrinsic::loongarch_lsx_vslli_b:
6730 case Intrinsic::loongarch_lasx_xvslli_b:
6731 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6732 lowerVectorSplatImm<3>(N, 2, DAG));
6733 case Intrinsic::loongarch_lsx_vslli_h:
6734 case Intrinsic::loongarch_lasx_xvslli_h:
6735 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6736 lowerVectorSplatImm<4>(N, 2, DAG));
6737 case Intrinsic::loongarch_lsx_vslli_w:
6738 case Intrinsic::loongarch_lasx_xvslli_w:
6739 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6740 lowerVectorSplatImm<5>(N, 2, DAG));
6741 case Intrinsic::loongarch_lsx_vslli_d:
6742 case Intrinsic::loongarch_lasx_xvslli_d:
6743 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6744 lowerVectorSplatImm<6>(N, 2, DAG));
6745 case Intrinsic::loongarch_lsx_vsrl_b:
6746 case Intrinsic::loongarch_lsx_vsrl_h:
6747 case Intrinsic::loongarch_lsx_vsrl_w:
6748 case Intrinsic::loongarch_lsx_vsrl_d:
6749 case Intrinsic::loongarch_lasx_xvsrl_b:
6750 case Intrinsic::loongarch_lasx_xvsrl_h:
6751 case Intrinsic::loongarch_lasx_xvsrl_w:
6752 case Intrinsic::loongarch_lasx_xvsrl_d:
6753 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6754 truncateVecElts(N, DAG));
6755 case Intrinsic::loongarch_lsx_vsrli_b:
6756 case Intrinsic::loongarch_lasx_xvsrli_b:
6757 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6758 lowerVectorSplatImm<3>(N, 2, DAG));
6759 case Intrinsic::loongarch_lsx_vsrli_h:
6760 case Intrinsic::loongarch_lasx_xvsrli_h:
6761 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6762 lowerVectorSplatImm<4>(N, 2, DAG));
6763 case Intrinsic::loongarch_lsx_vsrli_w:
6764 case Intrinsic::loongarch_lasx_xvsrli_w:
6765 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6766 lowerVectorSplatImm<5>(N, 2, DAG));
6767 case Intrinsic::loongarch_lsx_vsrli_d:
6768 case Intrinsic::loongarch_lasx_xvsrli_d:
6769 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6770 lowerVectorSplatImm<6>(N, 2, DAG));
6771 case Intrinsic::loongarch_lsx_vsra_b:
6772 case Intrinsic::loongarch_lsx_vsra_h:
6773 case Intrinsic::loongarch_lsx_vsra_w:
6774 case Intrinsic::loongarch_lsx_vsra_d:
6775 case Intrinsic::loongarch_lasx_xvsra_b:
6776 case Intrinsic::loongarch_lasx_xvsra_h:
6777 case Intrinsic::loongarch_lasx_xvsra_w:
6778 case Intrinsic::loongarch_lasx_xvsra_d:
6779 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6780 truncateVecElts(N, DAG));
6781 case Intrinsic::loongarch_lsx_vsrai_b:
6782 case Intrinsic::loongarch_lasx_xvsrai_b:
6783 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6784 lowerVectorSplatImm<3>(N, 2, DAG));
6785 case Intrinsic::loongarch_lsx_vsrai_h:
6786 case Intrinsic::loongarch_lasx_xvsrai_h:
6787 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6788 lowerVectorSplatImm<4>(N, 2, DAG));
6789 case Intrinsic::loongarch_lsx_vsrai_w:
6790 case Intrinsic::loongarch_lasx_xvsrai_w:
6791 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6792 lowerVectorSplatImm<5>(N, 2, DAG));
6793 case Intrinsic::loongarch_lsx_vsrai_d:
6794 case Intrinsic::loongarch_lasx_xvsrai_d:
6795 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6796 lowerVectorSplatImm<6>(N, 2, DAG));
6797 case Intrinsic::loongarch_lsx_vclz_b:
6798 case Intrinsic::loongarch_lsx_vclz_h:
6799 case Intrinsic::loongarch_lsx_vclz_w:
6800 case Intrinsic::loongarch_lsx_vclz_d:
6801 case Intrinsic::loongarch_lasx_xvclz_b:
6802 case Intrinsic::loongarch_lasx_xvclz_h:
6803 case Intrinsic::loongarch_lasx_xvclz_w:
6804 case Intrinsic::loongarch_lasx_xvclz_d:
6805 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6806 case Intrinsic::loongarch_lsx_vpcnt_b:
6807 case Intrinsic::loongarch_lsx_vpcnt_h:
6808 case Intrinsic::loongarch_lsx_vpcnt_w:
6809 case Intrinsic::loongarch_lsx_vpcnt_d:
6810 case Intrinsic::loongarch_lasx_xvpcnt_b:
6811 case Intrinsic::loongarch_lasx_xvpcnt_h:
6812 case Intrinsic::loongarch_lasx_xvpcnt_w:
6813 case Intrinsic::loongarch_lasx_xvpcnt_d:
6814 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6815 case Intrinsic::loongarch_lsx_vbitclr_b:
6816 case Intrinsic::loongarch_lsx_vbitclr_h:
6817 case Intrinsic::loongarch_lsx_vbitclr_w:
6818 case Intrinsic::loongarch_lsx_vbitclr_d:
6819 case Intrinsic::loongarch_lasx_xvbitclr_b:
6820 case Intrinsic::loongarch_lasx_xvbitclr_h:
6821 case Intrinsic::loongarch_lasx_xvbitclr_w:
6822 case Intrinsic::loongarch_lasx_xvbitclr_d:
6823 return lowerVectorBitClear(N, DAG);
6824 case Intrinsic::loongarch_lsx_vbitclri_b:
6825 case Intrinsic::loongarch_lasx_xvbitclri_b:
6826 return lowerVectorBitClearImm<3>(N, DAG);
6827 case Intrinsic::loongarch_lsx_vbitclri_h:
6828 case Intrinsic::loongarch_lasx_xvbitclri_h:
6829 return lowerVectorBitClearImm<4>(N, DAG);
6830 case Intrinsic::loongarch_lsx_vbitclri_w:
6831 case Intrinsic::loongarch_lasx_xvbitclri_w:
6832 return lowerVectorBitClearImm<5>(N, DAG);
6833 case Intrinsic::loongarch_lsx_vbitclri_d:
6834 case Intrinsic::loongarch_lasx_xvbitclri_d:
6835 return lowerVectorBitClearImm<6>(N, DAG);
6836 case Intrinsic::loongarch_lsx_vbitset_b:
6837 case Intrinsic::loongarch_lsx_vbitset_h:
6838 case Intrinsic::loongarch_lsx_vbitset_w:
6839 case Intrinsic::loongarch_lsx_vbitset_d:
6840 case Intrinsic::loongarch_lasx_xvbitset_b:
6841 case Intrinsic::loongarch_lasx_xvbitset_h:
6842 case Intrinsic::loongarch_lasx_xvbitset_w:
6843 case Intrinsic::loongarch_lasx_xvbitset_d: {
6844 EVT VecTy = N->getValueType(0);
6845 SDValue One = DAG.getConstant(1, DL, VecTy);
6846 return DAG.getNode(
6847 ISD::OR, DL, VecTy, N->getOperand(1),
6848 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6849 }
6850 case Intrinsic::loongarch_lsx_vbitseti_b:
6851 case Intrinsic::loongarch_lasx_xvbitseti_b:
6852 return lowerVectorBitSetImm<3>(N, DAG);
6853 case Intrinsic::loongarch_lsx_vbitseti_h:
6854 case Intrinsic::loongarch_lasx_xvbitseti_h:
6855 return lowerVectorBitSetImm<4>(N, DAG);
6856 case Intrinsic::loongarch_lsx_vbitseti_w:
6857 case Intrinsic::loongarch_lasx_xvbitseti_w:
6858 return lowerVectorBitSetImm<5>(N, DAG);
6859 case Intrinsic::loongarch_lsx_vbitseti_d:
6860 case Intrinsic::loongarch_lasx_xvbitseti_d:
6861 return lowerVectorBitSetImm<6>(N, DAG);
6862 case Intrinsic::loongarch_lsx_vbitrev_b:
6863 case Intrinsic::loongarch_lsx_vbitrev_h:
6864 case Intrinsic::loongarch_lsx_vbitrev_w:
6865 case Intrinsic::loongarch_lsx_vbitrev_d:
6866 case Intrinsic::loongarch_lasx_xvbitrev_b:
6867 case Intrinsic::loongarch_lasx_xvbitrev_h:
6868 case Intrinsic::loongarch_lasx_xvbitrev_w:
6869 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6870 EVT VecTy = N->getValueType(0);
6871 SDValue One = DAG.getConstant(1, DL, VecTy);
6872 return DAG.getNode(
6873 ISD::XOR, DL, VecTy, N->getOperand(1),
6874 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6875 }
6876 case Intrinsic::loongarch_lsx_vbitrevi_b:
6877 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6878 return lowerVectorBitRevImm<3>(N, DAG);
6879 case Intrinsic::loongarch_lsx_vbitrevi_h:
6880 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6881 return lowerVectorBitRevImm<4>(N, DAG);
6882 case Intrinsic::loongarch_lsx_vbitrevi_w:
6883 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6884 return lowerVectorBitRevImm<5>(N, DAG);
6885 case Intrinsic::loongarch_lsx_vbitrevi_d:
6886 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6887 return lowerVectorBitRevImm<6>(N, DAG);
6888 case Intrinsic::loongarch_lsx_vfadd_s:
6889 case Intrinsic::loongarch_lsx_vfadd_d:
6890 case Intrinsic::loongarch_lasx_xvfadd_s:
6891 case Intrinsic::loongarch_lasx_xvfadd_d:
6892 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6893 N->getOperand(2));
6894 case Intrinsic::loongarch_lsx_vfsub_s:
6895 case Intrinsic::loongarch_lsx_vfsub_d:
6896 case Intrinsic::loongarch_lasx_xvfsub_s:
6897 case Intrinsic::loongarch_lasx_xvfsub_d:
6898 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6899 N->getOperand(2));
6900 case Intrinsic::loongarch_lsx_vfmul_s:
6901 case Intrinsic::loongarch_lsx_vfmul_d:
6902 case Intrinsic::loongarch_lasx_xvfmul_s:
6903 case Intrinsic::loongarch_lasx_xvfmul_d:
6904 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6905 N->getOperand(2));
6906 case Intrinsic::loongarch_lsx_vfdiv_s:
6907 case Intrinsic::loongarch_lsx_vfdiv_d:
6908 case Intrinsic::loongarch_lasx_xvfdiv_s:
6909 case Intrinsic::loongarch_lasx_xvfdiv_d:
6910 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6911 N->getOperand(2));
6912 case Intrinsic::loongarch_lsx_vfmadd_s:
6913 case Intrinsic::loongarch_lsx_vfmadd_d:
6914 case Intrinsic::loongarch_lasx_xvfmadd_s:
6915 case Intrinsic::loongarch_lasx_xvfmadd_d:
6916 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6917 N->getOperand(2), N->getOperand(3));
6918 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6919 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6920 N->getOperand(1), N->getOperand(2),
6921 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6922 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6923 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6924 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6925 N->getOperand(1), N->getOperand(2),
6926 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6927 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6928 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6929 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6930 N->getOperand(1), N->getOperand(2),
6931 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6932 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6933 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6934 N->getOperand(1), N->getOperand(2),
6935 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6936 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6937 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6938 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6939 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6940 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6941 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6942 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6943 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6944 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6945 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6946 N->getOperand(1)));
6947 case Intrinsic::loongarch_lsx_vreplve_b:
6948 case Intrinsic::loongarch_lsx_vreplve_h:
6949 case Intrinsic::loongarch_lsx_vreplve_w:
6950 case Intrinsic::loongarch_lsx_vreplve_d:
6951 case Intrinsic::loongarch_lasx_xvreplve_b:
6952 case Intrinsic::loongarch_lasx_xvreplve_h:
6953 case Intrinsic::loongarch_lasx_xvreplve_w:
6954 case Intrinsic::loongarch_lasx_xvreplve_d:
6955 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6956 N->getOperand(1),
6957 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6958 N->getOperand(2)));
6959 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6960 if (!Subtarget.is64Bit())
6961 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6962 break;
6963 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6964 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6965 if (!Subtarget.is64Bit())
6966 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6967 break;
6968 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6969 if (!Subtarget.is64Bit())
6970 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6971 break;
6972 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6973 if (!Subtarget.is64Bit())
6974 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6975 break;
6976 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6977 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6978 if (!Subtarget.is64Bit())
6979 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6980 break;
6981 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6982 if (!Subtarget.is64Bit())
6983 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6984 break;
6985 case Intrinsic::loongarch_lsx_bz_b:
6986 case Intrinsic::loongarch_lsx_bz_h:
6987 case Intrinsic::loongarch_lsx_bz_w:
6988 case Intrinsic::loongarch_lsx_bz_d:
6989 case Intrinsic::loongarch_lasx_xbz_b:
6990 case Intrinsic::loongarch_lasx_xbz_h:
6991 case Intrinsic::loongarch_lasx_xbz_w:
6992 case Intrinsic::loongarch_lasx_xbz_d:
6993 if (!Subtarget.is64Bit())
6994 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6995 N->getOperand(1));
6996 break;
6997 case Intrinsic::loongarch_lsx_bz_v:
6998 case Intrinsic::loongarch_lasx_xbz_v:
6999 if (!Subtarget.is64Bit())
7000 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
7001 N->getOperand(1));
7002 break;
7003 case Intrinsic::loongarch_lsx_bnz_b:
7004 case Intrinsic::loongarch_lsx_bnz_h:
7005 case Intrinsic::loongarch_lsx_bnz_w:
7006 case Intrinsic::loongarch_lsx_bnz_d:
7007 case Intrinsic::loongarch_lasx_xbnz_b:
7008 case Intrinsic::loongarch_lasx_xbnz_h:
7009 case Intrinsic::loongarch_lasx_xbnz_w:
7010 case Intrinsic::loongarch_lasx_xbnz_d:
7011 if (!Subtarget.is64Bit())
7012 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
7013 N->getOperand(1));
7014 break;
7015 case Intrinsic::loongarch_lsx_bnz_v:
7016 case Intrinsic::loongarch_lasx_xbnz_v:
7017 if (!Subtarget.is64Bit())
7018 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
7019 N->getOperand(1));
7020 break;
7021 case Intrinsic::loongarch_lasx_concat_128_s:
7022 case Intrinsic::loongarch_lasx_concat_128_d:
7023 case Intrinsic::loongarch_lasx_concat_128:
7024 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
7025 N->getOperand(1), N->getOperand(2));
7026 }
7027 return SDValue();
7028}
7029
7032 const LoongArchSubtarget &Subtarget) {
7033 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
7034 // conversion is unnecessary and can be replaced with the
7035 // MOVFR2GR_S_LA64 operand.
7036 SDValue Op0 = N->getOperand(0);
7037 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
7038 return Op0.getOperand(0);
7039 return SDValue();
7040}
7041
7044 const LoongArchSubtarget &Subtarget) {
7045 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
7046 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
7047 // operand.
7048 SDValue Op0 = N->getOperand(0);
7049 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
7050 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
7051 "Unexpected value type!");
7052 return Op0.getOperand(0);
7053 }
7054 return SDValue();
7055}
7056
7059 const LoongArchSubtarget &Subtarget) {
7060 MVT VT = N->getSimpleValueType(0);
7061 unsigned NumBits = VT.getScalarSizeInBits();
7062
7063 // Simplify the inputs.
7064 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7065 APInt DemandedMask(APInt::getAllOnes(NumBits));
7066 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
7067 return SDValue(N, 0);
7068
7069 return SDValue();
7070}
7071
7072static SDValue
7075 const LoongArchSubtarget &Subtarget) {
7076 SDValue Op0 = N->getOperand(0);
7077 SDLoc DL(N);
7078
7079 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
7080 // redundant. Instead, use BuildPairF64's operands directly.
7081 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
7082 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7083
7084 if (Op0->isUndef()) {
7085 SDValue Lo = DAG.getUNDEF(MVT::i32);
7086 SDValue Hi = DAG.getUNDEF(MVT::i32);
7087 return DCI.CombineTo(N, Lo, Hi);
7088 }
7089
7090 // It's cheaper to materialise two 32-bit integers than to load a double
7091 // from the constant pool and transfer it to integer registers through the
7092 // stack.
7094 APInt V = C->getValueAPF().bitcastToAPInt();
7095 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7096 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7097 return DCI.CombineTo(N, Lo, Hi);
7098 }
7099
7100 return SDValue();
7101}
7102
7103/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
7106 const LoongArchSubtarget &Subtarget) {
7107 SDValue N0 = N->getOperand(0);
7108 SDValue N1 = N->getOperand(1);
7109 MVT VT = N->getSimpleValueType(0);
7110 SDLoc DL(N);
7111
7112 // VANDN(undef, x) -> 0
7113 // VANDN(x, undef) -> 0
7114 if (N0.isUndef() || N1.isUndef())
7115 return DAG.getConstant(0, DL, VT);
7116
7117 // VANDN(0, x) -> x
7119 return N1;
7120
7121 // VANDN(x, 0) -> 0
7123 return DAG.getConstant(0, DL, VT);
7124
7125 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
7127 return DAG.getNOT(DL, N0, VT);
7128
7129 // Turn VANDN back to AND if input is inverted.
7130 if (SDValue Not = isNOT(N0, DAG))
7131 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
7132
7133 // Folds for better commutativity:
7134 if (N1->hasOneUse()) {
7135 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
7136 if (SDValue Not = isNOT(N1, DAG))
7137 return DAG.getNOT(
7138 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
7139
7140 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
7141 // -> NOT(OR(x, SplatVector(-Imm))
7142 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
7143 // gain benefits.
7144 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
7145 N1.getOpcode() == ISD::BUILD_VECTOR) {
7146 if (SDValue SplatValue =
7147 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
7148 if (!N1->isOnlyUserOf(SplatValue.getNode()))
7149 return SDValue();
7150
7151 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
7152 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
7153 SDValue Not =
7154 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
7155 return DAG.getNOT(
7156 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
7157 VT);
7158 }
7159 }
7160 }
7161 }
7162
7163 return SDValue();
7164}
7165
7168 const LoongArchSubtarget &Subtarget) {
7169 SDLoc DL(N);
7170 EVT VT = N->getValueType(0);
7171
7172 if (VT != MVT::f32 && VT != MVT::f64)
7173 return SDValue();
7174 if (VT == MVT::f32 && !Subtarget.hasBasicF())
7175 return SDValue();
7176 if (VT == MVT::f64 && !Subtarget.hasBasicD())
7177 return SDValue();
7178
7179 // Only optimize when the source and destination types have the same width.
7180 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
7181 return SDValue();
7182
7183 SDValue Src = N->getOperand(0);
7184 // If the result of an integer load is only used by an integer-to-float
7185 // conversion, use a fp load instead. This eliminates an integer-to-float-move
7186 // (movgr2fr) instruction.
7187 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
7188 // Do not change the width of a volatile load. This condition check is
7189 // inspired by AArch64.
7190 !cast<LoadSDNode>(Src)->isVolatile()) {
7191 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
7192 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
7193 LN0->getPointerInfo(), LN0->getAlign(),
7194 LN0->getMemOperand()->getFlags());
7195
7196 // Make sure successors of the original load stay after it by updating them
7197 // to use the new Chain.
7198 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
7199 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
7200 }
7201
7202 return SDValue();
7203}
7204
7205// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
7206// logical operations, like in the example below.
7207// or (and (truncate x, truncate y)),
7208// (xor (truncate z, build_vector (constants)))
7209// Given a target type \p VT, we generate
7210// or (and x, y), (xor z, zext(build_vector (constants)))
7211// given x, y and z are of type \p VT. We can do so, if operands are either
7212// truncates from VT types, the second operand is a vector of constants, can
7213// be recursively promoted or is an existing extension we can extend further.
7215 SelectionDAG &DAG,
7216 const LoongArchSubtarget &Subtarget,
7217 unsigned Depth) {
7218 // Limit recursion to avoid excessive compile times.
7220 return SDValue();
7221
7222 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
7223 return SDValue();
7224
7225 SDValue N0 = N.getOperand(0);
7226 SDValue N1 = N.getOperand(1);
7227
7228 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7229 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
7230 return SDValue();
7231
7232 if (SDValue NN0 =
7233 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
7234 N0 = NN0;
7235 else {
7236 // The left side has to be a 'trunc'.
7237 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
7238 N0.getOperand(0).getValueType() == VT;
7239 if (LHSTrunc)
7240 N0 = N0.getOperand(0);
7241 else
7242 return SDValue();
7243 }
7244
7245 if (SDValue NN1 =
7246 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
7247 N1 = NN1;
7248 else {
7249 // The right side has to be a 'trunc', a (foldable) constant or an
7250 // existing extension we can extend further.
7251 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
7252 N1.getOperand(0).getValueType() == VT;
7253 if (RHSTrunc)
7254 N1 = N1.getOperand(0);
7255 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
7256 Subtarget.hasExtLASX() && N1.hasOneUse())
7257 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
7258 // On 32-bit platform, i64 is an illegal integer scalar type, and
7259 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
7260 // future.
7261 else if (SDValue Cst =
7263 N1 = Cst;
7264 else
7265 return SDValue();
7266 }
7267
7268 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
7269}
7270
7271// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
7272// is LSX-sized register. In most cases we actually compare or select LASX-sized
7273// registers and mixing the two types creates horrible code. This method
7274// optimizes some of the transition sequences.
7276 SelectionDAG &DAG,
7277 const LoongArchSubtarget &Subtarget) {
7278 EVT VT = N.getValueType();
7279 assert(VT.isVector() && "Expected vector type");
7280 assert((N.getOpcode() == ISD::ANY_EXTEND ||
7281 N.getOpcode() == ISD::ZERO_EXTEND ||
7282 N.getOpcode() == ISD::SIGN_EXTEND) &&
7283 "Invalid Node");
7284
7285 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
7286 return SDValue();
7287
7288 SDValue Narrow = N.getOperand(0);
7289 EVT NarrowVT = Narrow.getValueType();
7290
7291 // Generate the wide operation.
7292 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
7293 if (!Op)
7294 return SDValue();
7295 switch (N.getOpcode()) {
7296 default:
7297 llvm_unreachable("Unexpected opcode");
7298 case ISD::ANY_EXTEND:
7299 return Op;
7300 case ISD::ZERO_EXTEND:
7301 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
7302 case ISD::SIGN_EXTEND:
7303 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7304 DAG.getValueType(NarrowVT));
7305 }
7306}
7307
7310 const LoongArchSubtarget &Subtarget) {
7311 EVT VT = N->getValueType(0);
7312 SDLoc DL(N);
7313
7314 if (VT.isVector())
7315 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
7316 return R;
7317
7318 return SDValue();
7319}
7320
7321static SDValue
7324 const LoongArchSubtarget &Subtarget) {
7325 SDLoc DL(N);
7326 EVT VT = N->getValueType(0);
7327
7328 if (VT.isVector() && N->getNumOperands() == 2)
7329 if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
7330 return R;
7331
7332 return SDValue();
7333}
7334
7336 DAGCombinerInfo &DCI) const {
7337 SelectionDAG &DAG = DCI.DAG;
7338 switch (N->getOpcode()) {
7339 default:
7340 break;
7341 case ISD::AND:
7342 return performANDCombine(N, DAG, DCI, Subtarget);
7343 case ISD::OR:
7344 return performORCombine(N, DAG, DCI, Subtarget);
7345 case ISD::SETCC:
7346 return performSETCCCombine(N, DAG, DCI, Subtarget);
7347 case ISD::SRL:
7348 return performSRLCombine(N, DAG, DCI, Subtarget);
7349 case ISD::BITCAST:
7350 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7351 case ISD::ANY_EXTEND:
7352 case ISD::ZERO_EXTEND:
7353 case ISD::SIGN_EXTEND:
7354 return performEXTENDCombine(N, DAG, DCI, Subtarget);
7355 case ISD::SINT_TO_FP:
7356 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7357 case LoongArchISD::BITREV_W:
7358 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7359 case LoongArchISD::BR_CC:
7360 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7361 case LoongArchISD::SELECT_CC:
7362 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7364 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7365 case LoongArchISD::MOVGR2FR_W_LA64:
7366 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7367 case LoongArchISD::MOVFR2GR_S_LA64:
7368 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7369 case LoongArchISD::VMSKLTZ:
7370 case LoongArchISD::XVMSKLTZ:
7371 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7372 case LoongArchISD::SPLIT_PAIR_F64:
7373 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7374 case LoongArchISD::VANDN:
7375 return performVANDNCombine(N, DAG, DCI, Subtarget);
7377 return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
7378 case LoongArchISD::VPACKEV:
7379 if (SDValue Result =
7380 combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
7381 return Result;
7382 }
7383 return SDValue();
7384}
7385
7388 if (!ZeroDivCheck)
7389 return MBB;
7390
7391 // Build instructions:
7392 // MBB:
7393 // div(or mod) $dst, $dividend, $divisor
7394 // bne $divisor, $zero, SinkMBB
7395 // BreakMBB:
7396 // break 7 // BRK_DIVZERO
7397 // SinkMBB:
7398 // fallthrough
7399 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7400 MachineFunction::iterator It = ++MBB->getIterator();
7401 MachineFunction *MF = MBB->getParent();
7402 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7403 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7404 MF->insert(It, BreakMBB);
7405 MF->insert(It, SinkMBB);
7406
7407 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7408 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7409 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7410
7411 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7412 DebugLoc DL = MI.getDebugLoc();
7413 MachineOperand &Divisor = MI.getOperand(2);
7414 Register DivisorReg = Divisor.getReg();
7415
7416 // MBB:
7417 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7418 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7419 .addReg(LoongArch::R0)
7420 .addMBB(SinkMBB);
7421 MBB->addSuccessor(BreakMBB);
7422 MBB->addSuccessor(SinkMBB);
7423
7424 // BreakMBB:
7425 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7426 // definition of BRK_DIVZERO.
7427 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7428 BreakMBB->addSuccessor(SinkMBB);
7429
7430 // Clear Divisor's kill flag.
7431 Divisor.setIsKill(false);
7432
7433 return SinkMBB;
7434}
7435
7436static MachineBasicBlock *
7438 const LoongArchSubtarget &Subtarget) {
7439 unsigned CondOpc;
7440 switch (MI.getOpcode()) {
7441 default:
7442 llvm_unreachable("Unexpected opcode");
7443 case LoongArch::PseudoVBZ:
7444 CondOpc = LoongArch::VSETEQZ_V;
7445 break;
7446 case LoongArch::PseudoVBZ_B:
7447 CondOpc = LoongArch::VSETANYEQZ_B;
7448 break;
7449 case LoongArch::PseudoVBZ_H:
7450 CondOpc = LoongArch::VSETANYEQZ_H;
7451 break;
7452 case LoongArch::PseudoVBZ_W:
7453 CondOpc = LoongArch::VSETANYEQZ_W;
7454 break;
7455 case LoongArch::PseudoVBZ_D:
7456 CondOpc = LoongArch::VSETANYEQZ_D;
7457 break;
7458 case LoongArch::PseudoVBNZ:
7459 CondOpc = LoongArch::VSETNEZ_V;
7460 break;
7461 case LoongArch::PseudoVBNZ_B:
7462 CondOpc = LoongArch::VSETALLNEZ_B;
7463 break;
7464 case LoongArch::PseudoVBNZ_H:
7465 CondOpc = LoongArch::VSETALLNEZ_H;
7466 break;
7467 case LoongArch::PseudoVBNZ_W:
7468 CondOpc = LoongArch::VSETALLNEZ_W;
7469 break;
7470 case LoongArch::PseudoVBNZ_D:
7471 CondOpc = LoongArch::VSETALLNEZ_D;
7472 break;
7473 case LoongArch::PseudoXVBZ:
7474 CondOpc = LoongArch::XVSETEQZ_V;
7475 break;
7476 case LoongArch::PseudoXVBZ_B:
7477 CondOpc = LoongArch::XVSETANYEQZ_B;
7478 break;
7479 case LoongArch::PseudoXVBZ_H:
7480 CondOpc = LoongArch::XVSETANYEQZ_H;
7481 break;
7482 case LoongArch::PseudoXVBZ_W:
7483 CondOpc = LoongArch::XVSETANYEQZ_W;
7484 break;
7485 case LoongArch::PseudoXVBZ_D:
7486 CondOpc = LoongArch::XVSETANYEQZ_D;
7487 break;
7488 case LoongArch::PseudoXVBNZ:
7489 CondOpc = LoongArch::XVSETNEZ_V;
7490 break;
7491 case LoongArch::PseudoXVBNZ_B:
7492 CondOpc = LoongArch::XVSETALLNEZ_B;
7493 break;
7494 case LoongArch::PseudoXVBNZ_H:
7495 CondOpc = LoongArch::XVSETALLNEZ_H;
7496 break;
7497 case LoongArch::PseudoXVBNZ_W:
7498 CondOpc = LoongArch::XVSETALLNEZ_W;
7499 break;
7500 case LoongArch::PseudoXVBNZ_D:
7501 CondOpc = LoongArch::XVSETALLNEZ_D;
7502 break;
7503 }
7504
7505 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7506 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7507 DebugLoc DL = MI.getDebugLoc();
7510
7511 MachineFunction *F = BB->getParent();
7512 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7513 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7514 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7515
7516 F->insert(It, FalseBB);
7517 F->insert(It, TrueBB);
7518 F->insert(It, SinkBB);
7519
7520 // Transfer the remainder of MBB and its successor edges to Sink.
7521 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7523
7524 // Insert the real instruction to BB.
7525 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7526 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7527
7528 // Insert branch.
7529 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7530 BB->addSuccessor(FalseBB);
7531 BB->addSuccessor(TrueBB);
7532
7533 // FalseBB.
7534 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7535 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7536 .addReg(LoongArch::R0)
7537 .addImm(0);
7538 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7539 FalseBB->addSuccessor(SinkBB);
7540
7541 // TrueBB.
7542 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7543 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7544 .addReg(LoongArch::R0)
7545 .addImm(1);
7546 TrueBB->addSuccessor(SinkBB);
7547
7548 // SinkBB: merge the results.
7549 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7550 MI.getOperand(0).getReg())
7551 .addReg(RD1)
7552 .addMBB(FalseBB)
7553 .addReg(RD2)
7554 .addMBB(TrueBB);
7555
7556 // The pseudo instruction is gone now.
7557 MI.eraseFromParent();
7558 return SinkBB;
7559}
7560
7561static MachineBasicBlock *
7563 const LoongArchSubtarget &Subtarget) {
7564 unsigned InsOp;
7565 unsigned BroadcastOp;
7566 unsigned HalfSize;
7567 switch (MI.getOpcode()) {
7568 default:
7569 llvm_unreachable("Unexpected opcode");
7570 case LoongArch::PseudoXVINSGR2VR_B:
7571 HalfSize = 16;
7572 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7573 InsOp = LoongArch::XVEXTRINS_B;
7574 break;
7575 case LoongArch::PseudoXVINSGR2VR_H:
7576 HalfSize = 8;
7577 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7578 InsOp = LoongArch::XVEXTRINS_H;
7579 break;
7580 }
7581 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7582 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7583 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7584 DebugLoc DL = MI.getDebugLoc();
7586 // XDst = vector_insert XSrc, Elt, Idx
7587 Register XDst = MI.getOperand(0).getReg();
7588 Register XSrc = MI.getOperand(1).getReg();
7589 Register Elt = MI.getOperand(2).getReg();
7590 unsigned Idx = MI.getOperand(3).getImm();
7591
7592 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
7593 Idx < HalfSize) {
7594 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
7595 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7596
7597 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7598 .addReg(XSrc, {}, LoongArch::sub_128);
7599 BuildMI(*BB, MI, DL,
7600 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7601 : LoongArch::VINSGR2VR_B),
7602 ScratchSubReg2)
7603 .addReg(ScratchSubReg1)
7604 .addReg(Elt)
7605 .addImm(Idx);
7606
7607 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7608 .addReg(ScratchSubReg2)
7609 .addImm(LoongArch::sub_128);
7610 } else {
7611 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7612 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7613
7614 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7615
7616 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7617 .addReg(ScratchReg1)
7618 .addReg(XSrc)
7619 .addImm(Idx >= HalfSize ? 48 : 18);
7620
7621 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7622 .addReg(XSrc)
7623 .addReg(ScratchReg2)
7624 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7625 }
7626
7627 MI.eraseFromParent();
7628 return BB;
7629}
7630
7633 const LoongArchSubtarget &Subtarget) {
7634 assert(Subtarget.hasExtLSX());
7635 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7636 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7637 DebugLoc DL = MI.getDebugLoc();
7639 Register Dst = MI.getOperand(0).getReg();
7640 Register Src = MI.getOperand(1).getReg();
7641 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7642 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7643 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7644
7645 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7646 BuildMI(*BB, MI, DL,
7647 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7648 : LoongArch::VINSGR2VR_W),
7649 ScratchReg2)
7650 .addReg(ScratchReg1)
7651 .addReg(Src)
7652 .addImm(0);
7653 BuildMI(
7654 *BB, MI, DL,
7655 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7656 ScratchReg3)
7657 .addReg(ScratchReg2);
7658 BuildMI(*BB, MI, DL,
7659 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7660 : LoongArch::VPICKVE2GR_W),
7661 Dst)
7662 .addReg(ScratchReg3)
7663 .addImm(0);
7664
7665 MI.eraseFromParent();
7666 return BB;
7667}
7668
7669static MachineBasicBlock *
7671 const LoongArchSubtarget &Subtarget) {
7672 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7673 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7674 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7676 Register Dst = MI.getOperand(0).getReg();
7677 Register Src = MI.getOperand(1).getReg();
7678 DebugLoc DL = MI.getDebugLoc();
7679 unsigned EleBits = 8;
7680 unsigned NotOpc = 0;
7681 unsigned MskOpc;
7682
7683 switch (MI.getOpcode()) {
7684 default:
7685 llvm_unreachable("Unexpected opcode");
7686 case LoongArch::PseudoVMSKLTZ_B:
7687 MskOpc = LoongArch::VMSKLTZ_B;
7688 break;
7689 case LoongArch::PseudoVMSKLTZ_H:
7690 MskOpc = LoongArch::VMSKLTZ_H;
7691 EleBits = 16;
7692 break;
7693 case LoongArch::PseudoVMSKLTZ_W:
7694 MskOpc = LoongArch::VMSKLTZ_W;
7695 EleBits = 32;
7696 break;
7697 case LoongArch::PseudoVMSKLTZ_D:
7698 MskOpc = LoongArch::VMSKLTZ_D;
7699 EleBits = 64;
7700 break;
7701 case LoongArch::PseudoVMSKGEZ_B:
7702 MskOpc = LoongArch::VMSKGEZ_B;
7703 break;
7704 case LoongArch::PseudoVMSKEQZ_B:
7705 MskOpc = LoongArch::VMSKNZ_B;
7706 NotOpc = LoongArch::VNOR_V;
7707 break;
7708 case LoongArch::PseudoVMSKNEZ_B:
7709 MskOpc = LoongArch::VMSKNZ_B;
7710 break;
7711 case LoongArch::PseudoXVMSKLTZ_B:
7712 MskOpc = LoongArch::XVMSKLTZ_B;
7713 RC = &LoongArch::LASX256RegClass;
7714 break;
7715 case LoongArch::PseudoXVMSKLTZ_H:
7716 MskOpc = LoongArch::XVMSKLTZ_H;
7717 RC = &LoongArch::LASX256RegClass;
7718 EleBits = 16;
7719 break;
7720 case LoongArch::PseudoXVMSKLTZ_W:
7721 MskOpc = LoongArch::XVMSKLTZ_W;
7722 RC = &LoongArch::LASX256RegClass;
7723 EleBits = 32;
7724 break;
7725 case LoongArch::PseudoXVMSKLTZ_D:
7726 MskOpc = LoongArch::XVMSKLTZ_D;
7727 RC = &LoongArch::LASX256RegClass;
7728 EleBits = 64;
7729 break;
7730 case LoongArch::PseudoXVMSKGEZ_B:
7731 MskOpc = LoongArch::XVMSKGEZ_B;
7732 RC = &LoongArch::LASX256RegClass;
7733 break;
7734 case LoongArch::PseudoXVMSKEQZ_B:
7735 MskOpc = LoongArch::XVMSKNZ_B;
7736 NotOpc = LoongArch::XVNOR_V;
7737 RC = &LoongArch::LASX256RegClass;
7738 break;
7739 case LoongArch::PseudoXVMSKNEZ_B:
7740 MskOpc = LoongArch::XVMSKNZ_B;
7741 RC = &LoongArch::LASX256RegClass;
7742 break;
7743 }
7744
7745 Register Msk = MRI.createVirtualRegister(RC);
7746 if (NotOpc) {
7747 Register Tmp = MRI.createVirtualRegister(RC);
7748 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7749 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7750 .addReg(Tmp, RegState::Kill)
7751 .addReg(Tmp, RegState::Kill);
7752 } else {
7753 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7754 }
7755
7756 if (TRI->getRegSizeInBits(*RC) > 128) {
7757 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7758 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7759 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7760 .addReg(Msk)
7761 .addImm(0);
7762 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7763 .addReg(Msk, RegState::Kill)
7764 .addImm(4);
7765 BuildMI(*BB, MI, DL,
7766 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7767 : LoongArch::BSTRINS_W),
7768 Dst)
7771 .addImm(256 / EleBits - 1)
7772 .addImm(128 / EleBits);
7773 } else {
7774 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7775 .addReg(Msk, RegState::Kill)
7776 .addImm(0);
7777 }
7778
7779 MI.eraseFromParent();
7780 return BB;
7781}
7782
7783static MachineBasicBlock *
7785 const LoongArchSubtarget &Subtarget) {
7786 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7787 "Unexpected instruction");
7788
7789 MachineFunction &MF = *BB->getParent();
7790 DebugLoc DL = MI.getDebugLoc();
7792 Register LoReg = MI.getOperand(0).getReg();
7793 Register HiReg = MI.getOperand(1).getReg();
7794 Register SrcReg = MI.getOperand(2).getReg();
7795
7796 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7797 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7798 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7799 MI.eraseFromParent(); // The pseudo instruction is gone now.
7800 return BB;
7801}
7802
7803static MachineBasicBlock *
7805 const LoongArchSubtarget &Subtarget) {
7806 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7807 "Unexpected instruction");
7808
7809 MachineFunction &MF = *BB->getParent();
7810 DebugLoc DL = MI.getDebugLoc();
7813 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7814 Register DstReg = MI.getOperand(0).getReg();
7815 Register LoReg = MI.getOperand(1).getReg();
7816 Register HiReg = MI.getOperand(2).getReg();
7817
7818 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7819 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7820 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7821 .addReg(TmpReg, RegState::Kill)
7822 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7823 MI.eraseFromParent(); // The pseudo instruction is gone now.
7824 return BB;
7825}
7826
7828 switch (MI.getOpcode()) {
7829 default:
7830 return false;
7831 case LoongArch::Select_GPR_Using_CC_GPR:
7832 return true;
7833 }
7834}
7835
7836static MachineBasicBlock *
7838 const LoongArchSubtarget &Subtarget) {
7839 // To "insert" Select_* instructions, we actually have to insert the triangle
7840 // control-flow pattern. The incoming instructions know the destination vreg
7841 // to set, the condition code register to branch on, the true/false values to
7842 // select between, and the condcode to use to select the appropriate branch.
7843 //
7844 // We produce the following control flow:
7845 // HeadMBB
7846 // | \
7847 // | IfFalseMBB
7848 // | /
7849 // TailMBB
7850 //
7851 // When we find a sequence of selects we attempt to optimize their emission
7852 // by sharing the control flow. Currently we only handle cases where we have
7853 // multiple selects with the exact same condition (same LHS, RHS and CC).
7854 // The selects may be interleaved with other instructions if the other
7855 // instructions meet some requirements we deem safe:
7856 // - They are not pseudo instructions.
7857 // - They are debug instructions. Otherwise,
7858 // - They do not have side-effects, do not access memory and their inputs do
7859 // not depend on the results of the select pseudo-instructions.
7860 // The TrueV/FalseV operands of the selects cannot depend on the result of
7861 // previous selects in the sequence.
7862 // These conditions could be further relaxed. See the X86 target for a
7863 // related approach and more information.
7864
7865 Register LHS = MI.getOperand(1).getReg();
7866 Register RHS;
7867 if (MI.getOperand(2).isReg())
7868 RHS = MI.getOperand(2).getReg();
7869 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7870
7871 SmallVector<MachineInstr *, 4> SelectDebugValues;
7872 SmallSet<Register, 4> SelectDests;
7873 SelectDests.insert(MI.getOperand(0).getReg());
7874
7875 MachineInstr *LastSelectPseudo = &MI;
7876 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7877 SequenceMBBI != E; ++SequenceMBBI) {
7878 if (SequenceMBBI->isDebugInstr())
7879 continue;
7880 if (isSelectPseudo(*SequenceMBBI)) {
7881 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7882 !SequenceMBBI->getOperand(2).isReg() ||
7883 SequenceMBBI->getOperand(2).getReg() != RHS ||
7884 SequenceMBBI->getOperand(3).getImm() != CC ||
7885 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7886 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7887 break;
7888 LastSelectPseudo = &*SequenceMBBI;
7889 SequenceMBBI->collectDebugValues(SelectDebugValues);
7890 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7891 continue;
7892 }
7893 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7894 SequenceMBBI->mayLoadOrStore() ||
7895 SequenceMBBI->usesCustomInsertionHook())
7896 break;
7897 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7898 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7899 }))
7900 break;
7901 }
7902
7903 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7904 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7905 DebugLoc DL = MI.getDebugLoc();
7907
7908 MachineBasicBlock *HeadMBB = BB;
7909 MachineFunction *F = BB->getParent();
7910 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7911 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7912
7913 F->insert(I, IfFalseMBB);
7914 F->insert(I, TailMBB);
7915
7916 // Set the call frame size on entry to the new basic blocks.
7917 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7918 IfFalseMBB->setCallFrameSize(CallFrameSize);
7919 TailMBB->setCallFrameSize(CallFrameSize);
7920
7921 // Transfer debug instructions associated with the selects to TailMBB.
7922 for (MachineInstr *DebugInstr : SelectDebugValues) {
7923 TailMBB->push_back(DebugInstr->removeFromParent());
7924 }
7925
7926 // Move all instructions after the sequence to TailMBB.
7927 TailMBB->splice(TailMBB->end(), HeadMBB,
7928 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7929 // Update machine-CFG edges by transferring all successors of the current
7930 // block to the new block which will contain the Phi nodes for the selects.
7931 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7932 // Set the successors for HeadMBB.
7933 HeadMBB->addSuccessor(IfFalseMBB);
7934 HeadMBB->addSuccessor(TailMBB);
7935
7936 // Insert appropriate branch.
7937 if (MI.getOperand(2).isImm())
7938 BuildMI(HeadMBB, DL, TII.get(CC))
7939 .addReg(LHS)
7940 .addImm(MI.getOperand(2).getImm())
7941 .addMBB(TailMBB);
7942 else
7943 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7944
7945 // IfFalseMBB just falls through to TailMBB.
7946 IfFalseMBB->addSuccessor(TailMBB);
7947
7948 // Create PHIs for all of the select pseudo-instructions.
7949 auto SelectMBBI = MI.getIterator();
7950 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7951 auto InsertionPoint = TailMBB->begin();
7952 while (SelectMBBI != SelectEnd) {
7953 auto Next = std::next(SelectMBBI);
7954 if (isSelectPseudo(*SelectMBBI)) {
7955 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7956 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7957 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7958 .addReg(SelectMBBI->getOperand(4).getReg())
7959 .addMBB(HeadMBB)
7960 .addReg(SelectMBBI->getOperand(5).getReg())
7961 .addMBB(IfFalseMBB);
7962 SelectMBBI->eraseFromParent();
7963 }
7964 SelectMBBI = Next;
7965 }
7966
7967 F->getProperties().resetNoPHIs();
7968 return TailMBB;
7969}
7970
7971MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7972 MachineInstr &MI, MachineBasicBlock *BB) const {
7973 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7974 DebugLoc DL = MI.getDebugLoc();
7975
7976 switch (MI.getOpcode()) {
7977 default:
7978 llvm_unreachable("Unexpected instr type to insert");
7979 case LoongArch::DIV_W:
7980 case LoongArch::DIV_WU:
7981 case LoongArch::MOD_W:
7982 case LoongArch::MOD_WU:
7983 case LoongArch::DIV_D:
7984 case LoongArch::DIV_DU:
7985 case LoongArch::MOD_D:
7986 case LoongArch::MOD_DU:
7987 return insertDivByZeroTrap(MI, BB);
7988 break;
7989 case LoongArch::WRFCSR: {
7990 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7991 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7992 .addReg(MI.getOperand(1).getReg());
7993 MI.eraseFromParent();
7994 return BB;
7995 }
7996 case LoongArch::RDFCSR: {
7997 MachineInstr *ReadFCSR =
7998 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7999 MI.getOperand(0).getReg())
8000 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
8001 ReadFCSR->getOperand(1).setIsUndef();
8002 MI.eraseFromParent();
8003 return BB;
8004 }
8005 case LoongArch::Select_GPR_Using_CC_GPR:
8006 return emitSelectPseudo(MI, BB, Subtarget);
8007 case LoongArch::BuildPairF64Pseudo:
8008 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
8009 case LoongArch::SplitPairF64Pseudo:
8010 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
8011 case LoongArch::PseudoVBZ:
8012 case LoongArch::PseudoVBZ_B:
8013 case LoongArch::PseudoVBZ_H:
8014 case LoongArch::PseudoVBZ_W:
8015 case LoongArch::PseudoVBZ_D:
8016 case LoongArch::PseudoVBNZ:
8017 case LoongArch::PseudoVBNZ_B:
8018 case LoongArch::PseudoVBNZ_H:
8019 case LoongArch::PseudoVBNZ_W:
8020 case LoongArch::PseudoVBNZ_D:
8021 case LoongArch::PseudoXVBZ:
8022 case LoongArch::PseudoXVBZ_B:
8023 case LoongArch::PseudoXVBZ_H:
8024 case LoongArch::PseudoXVBZ_W:
8025 case LoongArch::PseudoXVBZ_D:
8026 case LoongArch::PseudoXVBNZ:
8027 case LoongArch::PseudoXVBNZ_B:
8028 case LoongArch::PseudoXVBNZ_H:
8029 case LoongArch::PseudoXVBNZ_W:
8030 case LoongArch::PseudoXVBNZ_D:
8031 return emitVecCondBranchPseudo(MI, BB, Subtarget);
8032 case LoongArch::PseudoXVINSGR2VR_B:
8033 case LoongArch::PseudoXVINSGR2VR_H:
8034 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
8035 case LoongArch::PseudoCTPOP:
8036 return emitPseudoCTPOP(MI, BB, Subtarget);
8037 case LoongArch::PseudoVMSKLTZ_B:
8038 case LoongArch::PseudoVMSKLTZ_H:
8039 case LoongArch::PseudoVMSKLTZ_W:
8040 case LoongArch::PseudoVMSKLTZ_D:
8041 case LoongArch::PseudoVMSKGEZ_B:
8042 case LoongArch::PseudoVMSKEQZ_B:
8043 case LoongArch::PseudoVMSKNEZ_B:
8044 case LoongArch::PseudoXVMSKLTZ_B:
8045 case LoongArch::PseudoXVMSKLTZ_H:
8046 case LoongArch::PseudoXVMSKLTZ_W:
8047 case LoongArch::PseudoXVMSKLTZ_D:
8048 case LoongArch::PseudoXVMSKGEZ_B:
8049 case LoongArch::PseudoXVMSKEQZ_B:
8050 case LoongArch::PseudoXVMSKNEZ_B:
8051 return emitPseudoVMSKCOND(MI, BB, Subtarget);
8052 case TargetOpcode::STATEPOINT:
8053 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
8054 // while bl call instruction (where statepoint will be lowered at the
8055 // end) has implicit def. This def is early-clobber as it will be set at
8056 // the moment of the call and earlier than any use is read.
8057 // Add this implicit dead def here as a workaround.
8058 MI.addOperand(*MI.getMF(),
8060 LoongArch::R1, /*isDef*/ true,
8061 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
8062 /*isUndef*/ false, /*isEarlyClobber*/ true));
8063 if (!Subtarget.is64Bit())
8064 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
8065 return emitPatchPoint(MI, BB);
8066 }
8067}
8068
8070 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
8071 unsigned *Fast) const {
8072 if (!Subtarget.hasUAL())
8073 return false;
8074
8075 // TODO: set reasonable speed number.
8076 if (Fast)
8077 *Fast = 1;
8078 return true;
8079}
8080
8081//===----------------------------------------------------------------------===//
8082// Calling Convention Implementation
8083//===----------------------------------------------------------------------===//
8084
8085// Eight general-purpose registers a0-a7 used for passing integer arguments,
8086// with a0-a1 reused to return values. Generally, the GPRs are used to pass
8087// fixed-point arguments, and floating-point arguments when no FPR is available
8088// or with soft float ABI.
8089const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
8090 LoongArch::R7, LoongArch::R8, LoongArch::R9,
8091 LoongArch::R10, LoongArch::R11};
8092
8093// PreserveNone calling convention:
8094// Arguments may be passed in any general-purpose registers except:
8095// - R1 : return address register
8096// - R22 : frame pointer
8097// - R31 : base pointer
8098//
8099// All general-purpose registers are treated as caller-saved,
8100// except R1 (RA) and R22 (FP).
8101//
8102// Non-volatile registers are allocated first so that a function
8103// can call normal functions without having to spill and reload
8104// argument registers.
8106 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
8107 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
8108 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
8109 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
8110 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
8111 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
8112 LoongArch::R20};
8113
8114// Eight floating-point registers fa0-fa7 used for passing floating-point
8115// arguments, and fa0-fa1 are also used to return values.
8116const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
8117 LoongArch::F3, LoongArch::F4, LoongArch::F5,
8118 LoongArch::F6, LoongArch::F7};
8119// FPR32 and FPR64 alias each other.
8121 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
8122 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
8123
8124const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
8125 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
8126 LoongArch::VR6, LoongArch::VR7};
8127
8128const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
8129 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
8130 LoongArch::XR6, LoongArch::XR7};
8131
8133 switch (State.getCallingConv()) {
8135 if (!State.isVarArg())
8136 return State.AllocateReg(PreserveNoneArgGPRs);
8137 [[fallthrough]];
8138 default:
8139 return State.AllocateReg(ArgGPRs);
8140 }
8141}
8142
8143// Pass a 2*GRLen argument that has been split into two GRLen values through
8144// registers or the stack as necessary.
8145static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
8146 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
8147 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
8148 ISD::ArgFlagsTy ArgFlags2) {
8149 unsigned GRLenInBytes = GRLen / 8;
8150 if (Register Reg = allocateArgGPR(State)) {
8151 // At least one half can be passed via register.
8152 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
8153 VA1.getLocVT(), CCValAssign::Full));
8154 } else {
8155 // Both halves must be passed on the stack, with proper alignment.
8156 Align StackAlign =
8157 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
8158 State.addLoc(
8160 State.AllocateStack(GRLenInBytes, StackAlign),
8161 VA1.getLocVT(), CCValAssign::Full));
8162 State.addLoc(CCValAssign::getMem(
8163 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8164 LocVT2, CCValAssign::Full));
8165 return false;
8166 }
8167 if (Register Reg = allocateArgGPR(State)) {
8168 // The second half can also be passed via register.
8169 State.addLoc(
8170 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
8171 } else {
8172 // The second half is passed via the stack, without additional alignment.
8173 State.addLoc(CCValAssign::getMem(
8174 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8175 LocVT2, CCValAssign::Full));
8176 }
8177 return false;
8178}
8179
8180// Implements the LoongArch calling convention. Returns true upon failure.
8182 unsigned ValNo, MVT ValVT,
8183 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
8184 CCState &State, bool IsRet, Type *OrigTy) {
8185 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
8186 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
8187 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
8188 MVT LocVT = ValVT;
8189
8190 // Any return value split into more than two values can't be returned
8191 // directly.
8192 if (IsRet && ValNo > 1)
8193 return true;
8194
8195 // If passing a variadic argument, or if no FPR is available.
8196 bool UseGPRForFloat = true;
8197
8198 switch (ABI) {
8199 default:
8200 llvm_unreachable("Unexpected ABI");
8201 break;
8206 UseGPRForFloat = ArgFlags.isVarArg();
8207 break;
8210 break;
8211 }
8212
8213 // If this is a variadic argument, the LoongArch calling convention requires
8214 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
8215 // byte alignment. An aligned register should be used regardless of whether
8216 // the original argument was split during legalisation or not. The argument
8217 // will not be passed by registers if the original type is larger than
8218 // 2*GRLen, so the register alignment rule does not apply.
8219 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
8220 if (ArgFlags.isVarArg() &&
8221 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
8222 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
8223 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
8224 // Skip 'odd' register if necessary.
8225 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
8226 State.AllocateReg(ArgGPRs);
8227 }
8228
8229 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8230 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8231 State.getPendingArgFlags();
8232
8233 assert(PendingLocs.size() == PendingArgFlags.size() &&
8234 "PendingLocs and PendingArgFlags out of sync");
8235
8236 // FPR32 and FPR64 alias each other.
8237 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
8238 UseGPRForFloat = true;
8239
8240 if (UseGPRForFloat && ValVT == MVT::f32) {
8241 LocVT = GRLenVT;
8242 LocInfo = CCValAssign::BCvt;
8243 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
8244 LocVT = MVT::i64;
8245 LocInfo = CCValAssign::BCvt;
8246 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
8247 // Handle passing f64 on LA32D with a soft float ABI or when floating point
8248 // registers are exhausted.
8249 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
8250 // Depending on available argument GPRS, f64 may be passed in a pair of
8251 // GPRs, split between a GPR and the stack, or passed completely on the
8252 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8253 // cases.
8254 MCRegister Reg = allocateArgGPR(State);
8255 if (!Reg) {
8256 int64_t StackOffset = State.AllocateStack(8, Align(8));
8257 State.addLoc(
8258 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8259 return false;
8260 }
8261 LocVT = MVT::i32;
8262 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8263 MCRegister HiReg = allocateArgGPR(State);
8264 if (HiReg) {
8265 State.addLoc(
8266 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
8267 } else {
8268 int64_t StackOffset = State.AllocateStack(4, Align(4));
8269 State.addLoc(
8270 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8271 }
8272 return false;
8273 }
8274
8275 // Split arguments might be passed indirectly, so keep track of the pending
8276 // values.
8277 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
8278 LocVT = GRLenVT;
8279 LocInfo = CCValAssign::Indirect;
8280 PendingLocs.push_back(
8281 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
8282 PendingArgFlags.push_back(ArgFlags);
8283 if (!ArgFlags.isSplitEnd()) {
8284 return false;
8285 }
8286 }
8287
8288 // If the split argument only had two elements, it should be passed directly
8289 // in registers or on the stack.
8290 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
8291 PendingLocs.size() <= 2) {
8292 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
8293 // Apply the normal calling convention rules to the first half of the
8294 // split argument.
8295 CCValAssign VA = PendingLocs[0];
8296 ISD::ArgFlagsTy AF = PendingArgFlags[0];
8297 PendingLocs.clear();
8298 PendingArgFlags.clear();
8299 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
8300 ArgFlags);
8301 }
8302
8303 // Allocate to a register if possible, or else a stack slot.
8304 Register Reg;
8305 unsigned StoreSizeBytes = GRLen / 8;
8306 Align StackAlign = Align(GRLen / 8);
8307
8308 if (ValVT == MVT::f32 && !UseGPRForFloat) {
8309 Reg = State.AllocateReg(ArgFPR32s);
8310 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
8311 Reg = State.AllocateReg(ArgFPR64s);
8312 } else if (ValVT.is128BitVector()) {
8313 Reg = State.AllocateReg(ArgVRs);
8314 UseGPRForFloat = false;
8315 StoreSizeBytes = 16;
8316 StackAlign = Align(16);
8317 } else if (ValVT.is256BitVector()) {
8318 Reg = State.AllocateReg(ArgXRs);
8319 UseGPRForFloat = false;
8320 StoreSizeBytes = 32;
8321 StackAlign = Align(32);
8322 } else {
8323 Reg = allocateArgGPR(State);
8324 }
8325
8326 unsigned StackOffset =
8327 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
8328
8329 // If we reach this point and PendingLocs is non-empty, we must be at the
8330 // end of a split argument that must be passed indirectly.
8331 if (!PendingLocs.empty()) {
8332 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8333 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
8334 for (auto &It : PendingLocs) {
8335 if (Reg)
8336 It.convertToReg(Reg);
8337 else
8338 It.convertToMem(StackOffset);
8339 State.addLoc(It);
8340 }
8341 PendingLocs.clear();
8342 PendingArgFlags.clear();
8343 return false;
8344 }
8345 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
8346 "Expected an GRLenVT at this stage");
8347
8348 if (Reg) {
8349 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8350 return false;
8351 }
8352
8353 // When a floating-point value is passed on the stack, no bit-cast is needed.
8354 if (ValVT.isFloatingPoint()) {
8355 LocVT = ValVT;
8356 LocInfo = CCValAssign::Full;
8357 }
8358
8359 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8360 return false;
8361}
8362
8363void LoongArchTargetLowering::analyzeInputArgs(
8364 MachineFunction &MF, CCState &CCInfo,
8365 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8366 LoongArchCCAssignFn Fn) const {
8367 FunctionType *FType = MF.getFunction().getFunctionType();
8368 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8369 MVT ArgVT = Ins[i].VT;
8370 Type *ArgTy = nullptr;
8371 if (IsRet)
8372 ArgTy = FType->getReturnType();
8373 else if (Ins[i].isOrigArg())
8374 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8376 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8377 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8378 CCInfo, IsRet, ArgTy)) {
8379 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8380 << '\n');
8381 llvm_unreachable("");
8382 }
8383 }
8384}
8385
8386void LoongArchTargetLowering::analyzeOutputArgs(
8387 MachineFunction &MF, CCState &CCInfo,
8388 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8389 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8390 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8391 MVT ArgVT = Outs[i].VT;
8392 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8394 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8395 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8396 CCInfo, IsRet, OrigTy)) {
8397 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8398 << "\n");
8399 llvm_unreachable("");
8400 }
8401 }
8402}
8403
8404// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8405// values.
8407 const CCValAssign &VA, const SDLoc &DL) {
8408 switch (VA.getLocInfo()) {
8409 default:
8410 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8411 case CCValAssign::Full:
8413 break;
8414 case CCValAssign::BCvt:
8415 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8416 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
8417 else
8418 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8419 break;
8420 }
8421 return Val;
8422}
8423
8425 const CCValAssign &VA, const SDLoc &DL,
8426 const ISD::InputArg &In,
8427 const LoongArchTargetLowering &TLI) {
8430 EVT LocVT = VA.getLocVT();
8431 SDValue Val;
8432 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8433 Register VReg = RegInfo.createVirtualRegister(RC);
8434 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8435 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8436
8437 // If input is sign extended from 32 bits, note it for the OptW pass.
8438 if (In.isOrigArg()) {
8439 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8440 if (OrigArg->getType()->isIntegerTy()) {
8441 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8442 // An input zero extended from i31 can also be considered sign extended.
8443 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8444 (BitWidth < 32 && In.Flags.isZExt())) {
8447 LAFI->addSExt32Register(VReg);
8448 }
8449 }
8450 }
8451
8452 return convertLocVTToValVT(DAG, Val, VA, DL);
8453}
8454
8455// The caller is responsible for loading the full value if the argument is
8456// passed with CCValAssign::Indirect.
8458 const CCValAssign &VA, const SDLoc &DL) {
8460 MachineFrameInfo &MFI = MF.getFrameInfo();
8461 EVT ValVT = VA.getValVT();
8462 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8463 /*IsImmutable=*/true);
8464 SDValue FIN = DAG.getFrameIndex(
8466
8467 ISD::LoadExtType ExtType;
8468 switch (VA.getLocInfo()) {
8469 default:
8470 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8471 case CCValAssign::Full:
8473 case CCValAssign::BCvt:
8474 ExtType = ISD::NON_EXTLOAD;
8475 break;
8476 }
8477 return DAG.getExtLoad(
8478 ExtType, DL, VA.getLocVT(), Chain, FIN,
8480}
8481
8483 const CCValAssign &VA,
8484 const CCValAssign &HiVA,
8485 const SDLoc &DL) {
8486 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8487 "Unexpected VA");
8489 MachineFrameInfo &MFI = MF.getFrameInfo();
8491
8492 assert(VA.isRegLoc() && "Expected register VA assignment");
8493
8494 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8495 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8496 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8497 SDValue Hi;
8498 if (HiVA.isMemLoc()) {
8499 // Second half of f64 is passed on the stack.
8500 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8501 /*IsImmutable=*/true);
8502 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8503 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8505 } else {
8506 // Second half of f64 is passed in another GPR.
8507 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8508 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8509 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8510 }
8511 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8512}
8513
8515 const CCValAssign &VA, const SDLoc &DL) {
8516 EVT LocVT = VA.getLocVT();
8517
8518 switch (VA.getLocInfo()) {
8519 default:
8520 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8521 case CCValAssign::Full:
8522 break;
8523 case CCValAssign::BCvt:
8524 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8525 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8526 else
8527 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8528 break;
8529 }
8530 return Val;
8531}
8532
8533static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8534 CCValAssign::LocInfo LocInfo,
8535 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8536 CCState &State) {
8537 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8538 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8539 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8540 static const MCPhysReg GPRList[] = {
8541 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8542 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8543 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8544 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8545 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8546 return false;
8547 }
8548 }
8549
8550 if (LocVT == MVT::f32) {
8551 // Pass in STG registers: F1, F2, F3, F4
8552 // fs0,fs1,fs2,fs3
8553 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8554 LoongArch::F26, LoongArch::F27};
8555 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8556 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8557 return false;
8558 }
8559 }
8560
8561 if (LocVT == MVT::f64) {
8562 // Pass in STG registers: D1, D2, D3, D4
8563 // fs4,fs5,fs6,fs7
8564 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8565 LoongArch::F30_64, LoongArch::F31_64};
8566 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8567 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8568 return false;
8569 }
8570 }
8571
8572 report_fatal_error("No registers left in GHC calling convention");
8573 return true;
8574}
8575
8576// Transform physical registers into virtual registers.
8578 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8579 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8580 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8581
8583
8584 switch (CallConv) {
8585 default:
8586 llvm_unreachable("Unsupported calling convention");
8587 case CallingConv::C:
8588 case CallingConv::Fast:
8591 break;
8592 case CallingConv::GHC:
8593 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8594 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8596 "GHC calling convention requires the F and D extensions");
8597 }
8598
8599 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8600 MVT GRLenVT = Subtarget.getGRLenVT();
8601 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8602 // Used with varargs to acumulate store chains.
8603 std::vector<SDValue> OutChains;
8604
8605 // Assign locations to all of the incoming arguments.
8607 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8608
8609 if (CallConv == CallingConv::GHC)
8611 else
8612 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8613
8614 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8615 CCValAssign &VA = ArgLocs[i];
8616 SDValue ArgValue;
8617 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8618 // case.
8619 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8620 assert(VA.needsCustom());
8621 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8622 } else if (VA.isRegLoc())
8623 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8624 else
8625 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8626 if (VA.getLocInfo() == CCValAssign::Indirect) {
8627 // If the original argument was split and passed by reference, we need to
8628 // load all parts of it here (using the same address).
8629 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8631 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8632 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8633 assert(ArgPartOffset == 0);
8634 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8635 CCValAssign &PartVA = ArgLocs[i + 1];
8636 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8637 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8638 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8639 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8641 ++i;
8642 ++InsIdx;
8643 }
8644 continue;
8645 }
8646 InVals.push_back(ArgValue);
8647 }
8648
8649 if (IsVarArg) {
8651 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8652 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8653 MachineFrameInfo &MFI = MF.getFrameInfo();
8654 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8655 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8656
8657 // Offset of the first variable argument from stack pointer, and size of
8658 // the vararg save area. For now, the varargs save area is either zero or
8659 // large enough to hold a0-a7.
8660 int VaArgOffset, VarArgsSaveSize;
8661
8662 // If all registers are allocated, then all varargs must be passed on the
8663 // stack and we don't need to save any argregs.
8664 if (ArgRegs.size() == Idx) {
8665 VaArgOffset = CCInfo.getStackSize();
8666 VarArgsSaveSize = 0;
8667 } else {
8668 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8669 VaArgOffset = -VarArgsSaveSize;
8670 }
8671
8672 // Record the frame index of the first variable argument
8673 // which is a value necessary to VASTART.
8674 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8675 LoongArchFI->setVarArgsFrameIndex(FI);
8676
8677 // If saving an odd number of registers then create an extra stack slot to
8678 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8679 // offsets to even-numbered registered remain 2*GRLen-aligned.
8680 if (Idx % 2) {
8681 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8682 true);
8683 VarArgsSaveSize += GRLenInBytes;
8684 }
8685
8686 // Copy the integer registers that may have been used for passing varargs
8687 // to the vararg save area.
8688 for (unsigned I = Idx; I < ArgRegs.size();
8689 ++I, VaArgOffset += GRLenInBytes) {
8690 const Register Reg = RegInfo.createVirtualRegister(RC);
8691 RegInfo.addLiveIn(ArgRegs[I], Reg);
8692 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8693 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8694 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8695 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8697 cast<StoreSDNode>(Store.getNode())
8698 ->getMemOperand()
8699 ->setValue((Value *)nullptr);
8700 OutChains.push_back(Store);
8701 }
8702 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8703 }
8704
8705 // All stores are grouped in one node to allow the matching between
8706 // the size of Ins and InVals. This only happens for vararg functions.
8707 if (!OutChains.empty()) {
8708 OutChains.push_back(Chain);
8709 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8710 }
8711
8712 return Chain;
8713}
8714
8716 return CI->isTailCall();
8717}
8718
8719// Check if the return value is used as only a return value, as otherwise
8720// we can't perform a tail-call.
8722 SDValue &Chain) const {
8723 if (N->getNumValues() != 1)
8724 return false;
8725 if (!N->hasNUsesOfValue(1, 0))
8726 return false;
8727
8728 SDNode *Copy = *N->user_begin();
8729 if (Copy->getOpcode() != ISD::CopyToReg)
8730 return false;
8731
8732 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8733 // isn't safe to perform a tail call.
8734 if (Copy->getGluedNode())
8735 return false;
8736
8737 // The copy must be used by a LoongArchISD::RET, and nothing else.
8738 bool HasRet = false;
8739 for (SDNode *Node : Copy->users()) {
8740 if (Node->getOpcode() != LoongArchISD::RET)
8741 return false;
8742 HasRet = true;
8743 }
8744
8745 if (!HasRet)
8746 return false;
8747
8748 Chain = Copy->getOperand(0);
8749 return true;
8750}
8751
8752// Check whether the call is eligible for tail call optimization.
8753bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8754 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8755 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8756
8757 auto CalleeCC = CLI.CallConv;
8758 auto &Outs = CLI.Outs;
8759 auto &Caller = MF.getFunction();
8760 auto CallerCC = Caller.getCallingConv();
8761
8762 // Do not tail call opt if the stack is used to pass parameters.
8763 if (CCInfo.getStackSize() != 0)
8764 return false;
8765
8766 // Do not tail call opt if any parameters need to be passed indirectly.
8767 for (auto &VA : ArgLocs)
8768 if (VA.getLocInfo() == CCValAssign::Indirect)
8769 return false;
8770
8771 // Do not tail call opt if either caller or callee uses struct return
8772 // semantics.
8773 auto IsCallerStructRet = Caller.hasStructRetAttr();
8774 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8775 if (IsCallerStructRet || IsCalleeStructRet)
8776 return false;
8777
8778 // Do not tail call opt if either the callee or caller has a byval argument.
8779 for (auto &Arg : Outs)
8780 if (Arg.Flags.isByVal())
8781 return false;
8782
8783 // The callee has to preserve all registers the caller needs to preserve.
8784 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8785 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8786 if (CalleeCC != CallerCC) {
8787 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8788 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8789 return false;
8790 }
8791 return true;
8792}
8793
8795 return DAG.getDataLayout().getPrefTypeAlign(
8796 VT.getTypeForEVT(*DAG.getContext()));
8797}
8798
8799// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8800// and output parameter nodes.
8801SDValue
8803 SmallVectorImpl<SDValue> &InVals) const {
8804 SelectionDAG &DAG = CLI.DAG;
8805 SDLoc &DL = CLI.DL;
8807 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8809 SDValue Chain = CLI.Chain;
8810 SDValue Callee = CLI.Callee;
8811 CallingConv::ID CallConv = CLI.CallConv;
8812 bool IsVarArg = CLI.IsVarArg;
8813 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8814 MVT GRLenVT = Subtarget.getGRLenVT();
8815 bool &IsTailCall = CLI.IsTailCall;
8816
8818
8819 // Analyze the operands of the call, assigning locations to each operand.
8821 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8822
8823 if (CallConv == CallingConv::GHC)
8824 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8825 else
8826 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8827
8828 // Check if it's really possible to do a tail call.
8829 if (IsTailCall)
8830 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8831
8832 if (IsTailCall)
8833 ++NumTailCalls;
8834 else if (CLI.CB && CLI.CB->isMustTailCall())
8835 report_fatal_error("failed to perform tail call elimination on a call "
8836 "site marked musttail");
8837
8838 // Get a count of how many bytes are to be pushed on the stack.
8839 unsigned NumBytes = ArgCCInfo.getStackSize();
8840
8841 // Create local copies for byval args.
8842 SmallVector<SDValue> ByValArgs;
8843 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8844 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8845 if (!Flags.isByVal())
8846 continue;
8847
8848 SDValue Arg = OutVals[i];
8849 unsigned Size = Flags.getByValSize();
8850 Align Alignment = Flags.getNonZeroByValAlign();
8851
8852 int FI =
8853 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8854 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8855 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8856
8857 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8858 /*IsVolatile=*/false,
8859 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8861 ByValArgs.push_back(FIPtr);
8862 }
8863
8864 if (!IsTailCall)
8865 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8866
8867 // Copy argument values to their designated locations.
8869 SmallVector<SDValue> MemOpChains;
8870 SDValue StackPtr;
8871 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8872 ++i, ++OutIdx) {
8873 CCValAssign &VA = ArgLocs[i];
8874 SDValue ArgValue = OutVals[OutIdx];
8875 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8876
8877 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8878 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8879 assert(VA.isRegLoc() && "Expected register VA assignment");
8880 assert(VA.needsCustom());
8881 SDValue SplitF64 =
8882 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8883 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8884 SDValue Lo = SplitF64.getValue(0);
8885 SDValue Hi = SplitF64.getValue(1);
8886
8887 Register RegLo = VA.getLocReg();
8888 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8889
8890 // Get the CCValAssign for the Hi part.
8891 CCValAssign &HiVA = ArgLocs[++i];
8892
8893 if (HiVA.isMemLoc()) {
8894 // Second half of f64 is passed on the stack.
8895 if (!StackPtr.getNode())
8896 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8898 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8899 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8900 // Emit the store.
8901 MemOpChains.push_back(DAG.getStore(
8902 Chain, DL, Hi, Address,
8904 } else {
8905 // Second half of f64 is passed in another GPR.
8906 Register RegHigh = HiVA.getLocReg();
8907 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8908 }
8909 continue;
8910 }
8911
8912 // Promote the value if needed.
8913 // For now, only handle fully promoted and indirect arguments.
8914 if (VA.getLocInfo() == CCValAssign::Indirect) {
8915 // Store the argument in a stack slot and pass its address.
8916 Align StackAlign =
8917 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8918 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8919 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8920 // If the original argument was split and passed by reference, we need to
8921 // store the required parts of it here (and pass just one address).
8922 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8923 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8924 assert(ArgPartOffset == 0);
8925 // Calculate the total size to store. We don't have access to what we're
8926 // actually storing other than performing the loop and collecting the
8927 // info.
8929 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8930 SDValue PartValue = OutVals[OutIdx + 1];
8931 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8932 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8933 EVT PartVT = PartValue.getValueType();
8934
8935 StoredSize += PartVT.getStoreSize();
8936 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8937 Parts.push_back(std::make_pair(PartValue, Offset));
8938 ++i;
8939 ++OutIdx;
8940 }
8941 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8942 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8943 MemOpChains.push_back(
8944 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8946 for (const auto &Part : Parts) {
8947 SDValue PartValue = Part.first;
8948 SDValue PartOffset = Part.second;
8950 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8951 MemOpChains.push_back(
8952 DAG.getStore(Chain, DL, PartValue, Address,
8954 }
8955 ArgValue = SpillSlot;
8956 } else {
8957 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8958 }
8959
8960 // Use local copy if it is a byval arg.
8961 if (Flags.isByVal())
8962 ArgValue = ByValArgs[j++];
8963
8964 if (VA.isRegLoc()) {
8965 // Queue up the argument copies and emit them at the end.
8966 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8967 } else {
8968 assert(VA.isMemLoc() && "Argument not register or memory");
8969 assert(!IsTailCall && "Tail call not allowed if stack is used "
8970 "for passing parameters");
8971
8972 // Work out the address of the stack slot.
8973 if (!StackPtr.getNode())
8974 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8976 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8978
8979 // Emit the store.
8980 MemOpChains.push_back(
8981 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8982 }
8983 }
8984
8985 // Join the stores, which are independent of one another.
8986 if (!MemOpChains.empty())
8987 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8988
8989 SDValue Glue;
8990
8991 // Build a sequence of copy-to-reg nodes, chained and glued together.
8992 for (auto &Reg : RegsToPass) {
8993 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8994 Glue = Chain.getValue(1);
8995 }
8996
8997 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8998 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8999 // split it and then direct call can be matched by PseudoCALL_SMALL.
9001 const GlobalValue *GV = S->getGlobal();
9002 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
9005 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
9006 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
9007 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
9010 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
9011 }
9012
9013 // The first call operand is the chain and the second is the target address.
9015 Ops.push_back(Chain);
9016 Ops.push_back(Callee);
9017
9018 // Add argument registers to the end of the list so that they are
9019 // known live into the call.
9020 for (auto &Reg : RegsToPass)
9021 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
9022
9023 if (!IsTailCall) {
9024 // Add a register mask operand representing the call-preserved registers.
9025 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
9026 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
9027 assert(Mask && "Missing call preserved mask for calling convention");
9028 Ops.push_back(DAG.getRegisterMask(Mask));
9029 }
9030
9031 // Glue the call to the argument copies, if any.
9032 if (Glue.getNode())
9033 Ops.push_back(Glue);
9034
9035 // Emit the call.
9036 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
9037 unsigned Op;
9038 switch (DAG.getTarget().getCodeModel()) {
9039 default:
9040 report_fatal_error("Unsupported code model");
9041 case CodeModel::Small:
9042 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
9043 break;
9044 case CodeModel::Medium:
9045 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
9046 break;
9047 case CodeModel::Large:
9048 assert(Subtarget.is64Bit() && "Large code model requires LA64");
9049 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
9050 break;
9051 }
9052
9053 if (IsTailCall) {
9055 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
9056 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
9057 return Ret;
9058 }
9059
9060 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
9061 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
9062 Glue = Chain.getValue(1);
9063
9064 // Mark the end of the call, which is glued to the call itself.
9065 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
9066 Glue = Chain.getValue(1);
9067
9068 // Assign locations to each value returned by this call.
9070 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
9071 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
9072
9073 // Copy all of the result registers out of their specified physreg.
9074 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
9075 auto &VA = RVLocs[i];
9076 // Copy the value out.
9077 SDValue RetValue =
9078 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
9079 // Glue the RetValue to the end of the call sequence.
9080 Chain = RetValue.getValue(1);
9081 Glue = RetValue.getValue(2);
9082
9083 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9084 assert(VA.needsCustom());
9085 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
9086 MVT::i32, Glue);
9087 Chain = RetValue2.getValue(1);
9088 Glue = RetValue2.getValue(2);
9089 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
9090 RetValue, RetValue2);
9091 } else
9092 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
9093
9094 InVals.push_back(RetValue);
9095 }
9096
9097 return Chain;
9098}
9099
9101 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
9102 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
9103 const Type *RetTy) const {
9105 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
9106
9107 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9108 LoongArchABI::ABI ABI =
9109 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9110 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
9111 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
9112 return false;
9113 }
9114 return true;
9115}
9116
9118 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9120 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
9121 SelectionDAG &DAG) const {
9122 // Stores the assignment of the return value to a location.
9124
9125 // Info about the registers and stack slot.
9126 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
9127 *DAG.getContext());
9128
9129 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
9130 nullptr, CC_LoongArch);
9131 if (CallConv == CallingConv::GHC && !RVLocs.empty())
9132 report_fatal_error("GHC functions return void only");
9133 SDValue Glue;
9134 SmallVector<SDValue, 4> RetOps(1, Chain);
9135
9136 // Copy the result values into the output registers.
9137 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
9138 SDValue Val = OutVals[OutIdx];
9139 CCValAssign &VA = RVLocs[i];
9140 assert(VA.isRegLoc() && "Can only return in registers!");
9141
9142 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9143 // Handle returning f64 on LA32D with a soft float ABI.
9144 assert(VA.isRegLoc() && "Expected return via registers");
9145 assert(VA.needsCustom());
9146 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
9147 DAG.getVTList(MVT::i32, MVT::i32), Val);
9148 SDValue Lo = SplitF64.getValue(0);
9149 SDValue Hi = SplitF64.getValue(1);
9150 Register RegLo = VA.getLocReg();
9151 Register RegHi = RVLocs[++i].getLocReg();
9152
9153 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
9154 Glue = Chain.getValue(1);
9155 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
9156 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
9157 Glue = Chain.getValue(1);
9158 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
9159 } else {
9160 // Handle a 'normal' return.
9161 Val = convertValVTToLocVT(DAG, Val, VA, DL);
9162 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
9163
9164 // Guarantee that all emitted copies are stuck together.
9165 Glue = Chain.getValue(1);
9166 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
9167 }
9168 }
9169
9170 RetOps[0] = Chain; // Update chain.
9171
9172 // Add the glue node if we have it.
9173 if (Glue.getNode())
9174 RetOps.push_back(Glue);
9175
9176 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
9177}
9178
9179// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
9180// Note: The following prefixes are excluded:
9181// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
9182// as they can be represented using [x]vrepli.[whb]
9184 const APInt &SplatValue, const unsigned SplatBitSize) const {
9185 uint64_t RequiredImm = 0;
9186 uint64_t V = SplatValue.getZExtValue();
9187 if (SplatBitSize == 16 && !(V & 0x00FF)) {
9188 // 4'b0101
9189 RequiredImm = (0b10101 << 8) | (V >> 8);
9190 return {true, RequiredImm};
9191 } else if (SplatBitSize == 32) {
9192 // 4'b0001
9193 if (!(V & 0xFFFF00FF)) {
9194 RequiredImm = (0b10001 << 8) | (V >> 8);
9195 return {true, RequiredImm};
9196 }
9197 // 4'b0010
9198 if (!(V & 0xFF00FFFF)) {
9199 RequiredImm = (0b10010 << 8) | (V >> 16);
9200 return {true, RequiredImm};
9201 }
9202 // 4'b0011
9203 if (!(V & 0x00FFFFFF)) {
9204 RequiredImm = (0b10011 << 8) | (V >> 24);
9205 return {true, RequiredImm};
9206 }
9207 // 4'b0110
9208 if ((V & 0xFFFF00FF) == 0xFF) {
9209 RequiredImm = (0b10110 << 8) | (V >> 8);
9210 return {true, RequiredImm};
9211 }
9212 // 4'b0111
9213 if ((V & 0xFF00FFFF) == 0xFFFF) {
9214 RequiredImm = (0b10111 << 8) | (V >> 16);
9215 return {true, RequiredImm};
9216 }
9217 // 4'b1010
9218 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
9219 RequiredImm =
9220 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9221 return {true, RequiredImm};
9222 }
9223 } else if (SplatBitSize == 64) {
9224 // 4'b1011
9225 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
9226 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
9227 RequiredImm =
9228 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9229 return {true, RequiredImm};
9230 }
9231 // 4'b1100
9232 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
9233 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
9234 RequiredImm =
9235 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
9236 return {true, RequiredImm};
9237 }
9238 // 4'b1001
9239 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
9240 uint8_t res = 0;
9241 for (int i = 0; i < 8; ++i) {
9242 uint8_t byte = x & 0xFF;
9243 if (byte == 0 || byte == 0xFF)
9244 res |= ((byte & 1) << i);
9245 else
9246 return {false, 0};
9247 x >>= 8;
9248 }
9249 return {true, res};
9250 };
9251 auto [IsSame, Suffix] = sameBitsPreByte(V);
9252 if (IsSame) {
9253 RequiredImm = (0b11001 << 8) | Suffix;
9254 return {true, RequiredImm};
9255 }
9256 }
9257 return {false, RequiredImm};
9258}
9259
9261 EVT VT) const {
9262 if (!Subtarget.hasExtLSX())
9263 return false;
9264
9265 if (VT == MVT::f32) {
9266 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
9267 return (masked == 0x3e000000 || masked == 0x40000000);
9268 }
9269
9270 if (VT == MVT::f64) {
9271 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
9272 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
9273 }
9274
9275 return false;
9276}
9277
9278bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
9279 bool ForCodeSize) const {
9280 // TODO: Maybe need more checks here after vector extension is supported.
9281 if (VT == MVT::f32 && !Subtarget.hasBasicF())
9282 return false;
9283 if (VT == MVT::f64 && !Subtarget.hasBasicD())
9284 return false;
9285 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
9286}
9287
9289 return true;
9290}
9291
9293 return true;
9294}
9295
9296bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9297 const Instruction *I) const {
9298 if (!Subtarget.is64Bit())
9299 return isa<LoadInst>(I) || isa<StoreInst>(I);
9300
9301 if (isa<LoadInst>(I))
9302 return true;
9303
9304 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9305 // require fences beacuse we can use amswap_db.[w/d].
9306 Type *Ty = I->getOperand(0)->getType();
9307 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
9308 unsigned Size = Ty->getIntegerBitWidth();
9309 return (Size == 8 || Size == 16);
9310 }
9311
9312 return false;
9313}
9314
9316 LLVMContext &Context,
9317 EVT VT) const {
9318 if (!VT.isVector())
9319 return getPointerTy(DL);
9321}
9322
9324 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
9325 // Do not merge to float value size (128 or 256 bits) if no implicit
9326 // float attribute is set.
9327 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
9328 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
9329 if (NoFloat)
9330 return MemVT.getSizeInBits() <= MaxIntSize;
9331
9332 // Make sure we don't merge greater than our maximum supported vector width.
9333 if (Subtarget.hasExtLASX())
9334 MaxIntSize = 256;
9335 else if (Subtarget.hasExtLSX())
9336 MaxIntSize = 128;
9337
9338 return MemVT.getSizeInBits() <= MaxIntSize;
9339}
9340
9342 EVT VT = Y.getValueType();
9343
9344 if (VT.isVector())
9345 return Subtarget.hasExtLSX() && VT.isInteger();
9346
9347 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
9348}
9349
9352 MachineFunction &MF, unsigned Intrinsic) const {
9353 switch (Intrinsic) {
9354 default:
9355 return;
9356 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9357 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9358 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9359 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
9360 IntrinsicInfo Info;
9362 Info.memVT = MVT::i32;
9363 Info.ptrVal = I.getArgOperand(0);
9364 Info.offset = 0;
9365 Info.align = Align(4);
9368 Infos.push_back(Info);
9369 return;
9370 // TODO: Add more Intrinsics later.
9371 }
9372 }
9373}
9374
9375// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9376// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9377// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9378// regression, we need to implement it manually.
9381
9383 Op == AtomicRMWInst::And) &&
9384 "Unable to expand");
9385 unsigned MinWordSize = 4;
9386
9387 IRBuilder<> Builder(AI);
9388 LLVMContext &Ctx = Builder.getContext();
9389 const DataLayout &DL = AI->getDataLayout();
9390 Type *ValueType = AI->getType();
9391 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
9392
9393 Value *Addr = AI->getPointerOperand();
9394 PointerType *PtrTy = cast<PointerType>(Addr->getType());
9395 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
9396
9397 Value *AlignedAddr = Builder.CreateIntrinsic(
9398 Intrinsic::ptrmask, {PtrTy, IntTy},
9399 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
9400 "AlignedAddr");
9401
9402 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
9403 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
9404 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
9405 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
9406 Value *Mask = Builder.CreateShl(
9407 ConstantInt::get(WordType,
9408 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
9409 ShiftAmt, "Mask");
9410 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
9411 Value *ValOperand_Shifted =
9412 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
9413 ShiftAmt, "ValOperand_Shifted");
9414 Value *NewOperand;
9415 if (Op == AtomicRMWInst::And)
9416 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
9417 else
9418 NewOperand = ValOperand_Shifted;
9419
9420 AtomicRMWInst *NewAI =
9421 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
9422 AI->getOrdering(), AI->getSyncScopeID());
9423
9424 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
9425 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
9426 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
9427 AI->replaceAllUsesWith(FinalOldResult);
9428 AI->eraseFromParent();
9429}
9430
9433 const AtomicRMWInst *AI) const {
9434 // TODO: Add more AtomicRMWInst that needs to be extended.
9435
9436 // Since floating-point operation requires a non-trivial set of data
9437 // operations, use CmpXChg to expand.
9438 if (AI->isFloatingPointOperation() ||
9444
9445 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9448 AI->getOperation() == AtomicRMWInst::Sub)) {
9450 }
9451
9452 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9453 if (Subtarget.hasLAMCAS()) {
9454 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9458 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9460 }
9461
9462 if (Size == 8 || Size == 16)
9465}
9466
9467static Intrinsic::ID
9469 AtomicRMWInst::BinOp BinOp) {
9470 if (GRLen == 64) {
9471 switch (BinOp) {
9472 default:
9473 llvm_unreachable("Unexpected AtomicRMW BinOp");
9475 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9476 case AtomicRMWInst::Add:
9477 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9478 case AtomicRMWInst::Sub:
9479 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9481 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9483 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9485 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9486 case AtomicRMWInst::Max:
9487 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9488 case AtomicRMWInst::Min:
9489 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9490 // TODO: support other AtomicRMWInst.
9491 }
9492 }
9493
9494 if (GRLen == 32) {
9495 switch (BinOp) {
9496 default:
9497 llvm_unreachable("Unexpected AtomicRMW BinOp");
9499 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9500 case AtomicRMWInst::Add:
9501 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9502 case AtomicRMWInst::Sub:
9503 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9505 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9507 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9509 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9510 case AtomicRMWInst::Max:
9511 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9512 case AtomicRMWInst::Min:
9513 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9514 // TODO: support other AtomicRMWInst.
9515 }
9516 }
9517
9518 llvm_unreachable("Unexpected GRLen\n");
9519}
9520
9523 const AtomicCmpXchgInst *CI) const {
9524
9525 if (Subtarget.hasLAMCAS())
9527
9529 if (Size == 8 || Size == 16)
9532}
9533
9535 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9536 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9537 unsigned GRLen = Subtarget.getGRLen();
9538 AtomicOrdering FailOrd = CI->getFailureOrdering();
9539 Value *FailureOrdering =
9540 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9541 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9542 if (GRLen == 64) {
9543 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9544 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9545 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9546 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9547 }
9548 Type *Tys[] = {AlignedAddr->getType()};
9549 Value *Result = Builder.CreateIntrinsic(
9550 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9551 if (GRLen == 64)
9552 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9553 return Result;
9554}
9555
9557 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9558 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9559 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9560 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9561 // mask, as this produces better code than the LL/SC loop emitted by
9562 // int_loongarch_masked_atomicrmw_xchg.
9563 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9566 if (CVal->isZero())
9567 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9568 Builder.CreateNot(Mask, "Inv_Mask"),
9569 AI->getAlign(), Ord);
9570 if (CVal->isMinusOne())
9571 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9572 AI->getAlign(), Ord);
9573 }
9574
9575 unsigned GRLen = Subtarget.getGRLen();
9576 Value *Ordering =
9577 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9578 Type *Tys[] = {AlignedAddr->getType()};
9580 AI->getModule(),
9582
9583 if (GRLen == 64) {
9584 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9585 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9586 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9587 }
9588
9589 Value *Result;
9590
9591 // Must pass the shift amount needed to sign extend the loaded value prior
9592 // to performing a signed comparison for min/max. ShiftAmt is the number of
9593 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9594 // is the number of bits to left+right shift the value in order to
9595 // sign-extend.
9596 if (AI->getOperation() == AtomicRMWInst::Min ||
9598 const DataLayout &DL = AI->getDataLayout();
9599 unsigned ValWidth =
9600 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9601 Value *SextShamt =
9602 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9603 Result = Builder.CreateCall(LlwOpScwLoop,
9604 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9605 } else {
9606 Result =
9607 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9608 }
9609
9610 if (GRLen == 64)
9611 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9612 return Result;
9613}
9614
9616 const MachineFunction &MF, EVT VT) const {
9617 VT = VT.getScalarType();
9618
9619 if (!VT.isSimple())
9620 return false;
9621
9622 switch (VT.getSimpleVT().SimpleTy) {
9623 case MVT::f32:
9624 case MVT::f64:
9625 return true;
9626 default:
9627 break;
9628 }
9629
9630 return false;
9631}
9632
9634 const Constant *PersonalityFn) const {
9635 return LoongArch::R4;
9636}
9637
9639 const Constant *PersonalityFn) const {
9640 return LoongArch::R5;
9641}
9642
9643//===----------------------------------------------------------------------===//
9644// Target Optimization Hooks
9645//===----------------------------------------------------------------------===//
9646
9648 const LoongArchSubtarget &Subtarget) {
9649 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9650 // IEEE float has 23 digits and double has 52 digits.
9651 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9652 return RefinementSteps;
9653}
9654
9655static bool
9657 assert(Subtarget.hasFrecipe() &&
9658 "Reciprocal estimate queried on unsupported target");
9659
9660 if (!VT.isSimple())
9661 return false;
9662
9663 switch (VT.getSimpleVT().SimpleTy) {
9664 case MVT::f32:
9665 // f32 is the base type for reciprocal estimate instructions.
9666 return true;
9667
9668 case MVT::f64:
9669 return Subtarget.hasBasicD();
9670
9671 case MVT::v4f32:
9672 case MVT::v2f64:
9673 return Subtarget.hasExtLSX();
9674
9675 case MVT::v8f32:
9676 case MVT::v4f64:
9677 return Subtarget.hasExtLASX();
9678
9679 default:
9680 return false;
9681 }
9682}
9683
9685 SelectionDAG &DAG, int Enabled,
9686 int &RefinementSteps,
9687 bool &UseOneConstNR,
9688 bool Reciprocal) const {
9690 "Enabled should never be Disabled here");
9691
9692 if (!Subtarget.hasFrecipe())
9693 return SDValue();
9694
9695 SDLoc DL(Operand);
9696 EVT VT = Operand.getValueType();
9697
9698 // Check supported types.
9699 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
9700 return SDValue();
9701
9702 // Handle refinement steps.
9703 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9704 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9705
9706 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
9707 UseOneConstNR = false;
9708 SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9709
9710 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
9711 // are needed (which rely on the reciprocal form), return the raw reciprocal
9712 // estimate.
9713 if (Reciprocal || RefinementSteps > 0)
9714 return Rsqrt;
9715
9716 // Otherwise, return sqrt(x) by multiplying with the operand.
9717 return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
9718}
9719
9721 SelectionDAG &DAG,
9722 int Enabled,
9723 int &RefinementSteps) const {
9725 "Enabled should never be Disabled here");
9726
9727 if (!Subtarget.hasFrecipe())
9728 return SDValue();
9729
9730 SDLoc DL(Operand);
9731 EVT VT = Operand.getValueType();
9732
9733 // Check supported types.
9734 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
9735 return SDValue();
9736
9737 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9738 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9739
9740 // FRECIPE computes 1.0 / x.
9741 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9742}
9743
9744//===----------------------------------------------------------------------===//
9745// LoongArch Inline Assembly Support
9746//===----------------------------------------------------------------------===//
9747
9749LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9750 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9751 //
9752 // 'f': A floating-point register (if available).
9753 // 'k': A memory operand whose address is formed by a base register and
9754 // (optionally scaled) index register.
9755 // 'l': A signed 16-bit constant.
9756 // 'm': A memory operand whose address is formed by a base register and
9757 // offset that is suitable for use in instructions with the same
9758 // addressing mode as st.w and ld.w.
9759 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9760 // instruction)
9761 // 'I': A signed 12-bit constant (for arithmetic instructions).
9762 // 'J': Integer zero.
9763 // 'K': An unsigned 12-bit constant (for logic instructions).
9764 // "ZB": An address that is held in a general-purpose register. The offset is
9765 // zero.
9766 // "ZC": A memory operand whose address is formed by a base register and
9767 // offset that is suitable for use in instructions with the same
9768 // addressing mode as ll.w and sc.w.
9769 if (Constraint.size() == 1) {
9770 switch (Constraint[0]) {
9771 default:
9772 break;
9773 case 'f':
9774 case 'q':
9775 return C_RegisterClass;
9776 case 'l':
9777 case 'I':
9778 case 'J':
9779 case 'K':
9780 return C_Immediate;
9781 case 'k':
9782 return C_Memory;
9783 }
9784 }
9785
9786 if (Constraint == "ZC" || Constraint == "ZB")
9787 return C_Memory;
9788
9789 // 'm' is handled here.
9790 return TargetLowering::getConstraintType(Constraint);
9791}
9792
9793InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9794 StringRef ConstraintCode) const {
9795 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9799 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9800}
9801
9802std::pair<unsigned, const TargetRegisterClass *>
9803LoongArchTargetLowering::getRegForInlineAsmConstraint(
9804 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9805 // First, see if this is a constraint that directly corresponds to a LoongArch
9806 // register class.
9807 if (Constraint.size() == 1) {
9808 switch (Constraint[0]) {
9809 case 'r':
9810 // TODO: Support fixed vectors up to GRLen?
9811 if (VT.isVector())
9812 break;
9813 return std::make_pair(0U, &LoongArch::GPRRegClass);
9814 case 'q':
9815 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9816 case 'f':
9817 if (Subtarget.hasBasicF() && VT == MVT::f32)
9818 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9819 if (Subtarget.hasBasicD() && VT == MVT::f64)
9820 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9821 if (Subtarget.hasExtLSX() &&
9822 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9823 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9824 if (Subtarget.hasExtLASX() &&
9825 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9826 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9827 break;
9828 default:
9829 break;
9830 }
9831 }
9832
9833 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9834 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9835 // constraints while the official register name is prefixed with a '$'. So we
9836 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9837 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9838 // case insensitive, so no need to convert the constraint to upper case here.
9839 //
9840 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9841 // decode the usage of register name aliases into their official names. And
9842 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9843 // official register names.
9844 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9845 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9846 bool IsFP = Constraint[2] == 'f';
9847 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9848 std::pair<unsigned, const TargetRegisterClass *> R;
9850 TRI, join_items("", Temp.first, Temp.second), VT);
9851 // Match those names to the widest floating point register type available.
9852 if (IsFP) {
9853 unsigned RegNo = R.first;
9854 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9855 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9856 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9857 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9858 }
9859 }
9860 }
9861 return R;
9862 }
9863
9864 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9865}
9866
9867void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9868 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9869 SelectionDAG &DAG) const {
9870 // Currently only support length 1 constraints.
9871 if (Constraint.size() == 1) {
9872 switch (Constraint[0]) {
9873 case 'l':
9874 // Validate & create a 16-bit signed immediate operand.
9875 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9876 uint64_t CVal = C->getSExtValue();
9877 if (isInt<16>(CVal))
9878 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9879 Subtarget.getGRLenVT()));
9880 }
9881 return;
9882 case 'I':
9883 // Validate & create a 12-bit signed immediate operand.
9884 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9885 uint64_t CVal = C->getSExtValue();
9886 if (isInt<12>(CVal))
9887 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9888 Subtarget.getGRLenVT()));
9889 }
9890 return;
9891 case 'J':
9892 // Validate & create an integer zero operand.
9893 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9894 if (C->getZExtValue() == 0)
9895 Ops.push_back(
9896 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9897 return;
9898 case 'K':
9899 // Validate & create a 12-bit unsigned immediate operand.
9900 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9901 uint64_t CVal = C->getZExtValue();
9902 if (isUInt<12>(CVal))
9903 Ops.push_back(
9904 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9905 }
9906 return;
9907 default:
9908 break;
9909 }
9910 }
9912}
9913
9914#define GET_REGISTER_MATCHER
9915#include "LoongArchGenAsmMatcher.inc"
9916
9919 const MachineFunction &MF) const {
9920 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9921 std::string NewRegName = Name.second.str();
9922 Register Reg = MatchRegisterAltName(NewRegName);
9923 if (!Reg)
9924 Reg = MatchRegisterName(NewRegName);
9925 if (!Reg)
9926 return Reg;
9927 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9928 if (!ReservedRegs.test(Reg))
9929 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9930 StringRef(RegName) + "\"."));
9931 return Reg;
9932}
9933
9935 EVT VT, SDValue C) const {
9936 // TODO: Support vectors.
9937 if (!VT.isScalarInteger())
9938 return false;
9939
9940 // Omit the optimization if the data size exceeds GRLen.
9941 if (VT.getSizeInBits() > Subtarget.getGRLen())
9942 return false;
9943
9944 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9945 const APInt &Imm = ConstNode->getAPIntValue();
9946 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9947 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9948 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9949 return true;
9950 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9951 if (ConstNode->hasOneUse() &&
9952 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9953 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9954 return true;
9955 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9956 // in which the immediate has two set bits. Or Break (MUL x, imm)
9957 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9958 // equals to (1 << s0) - (1 << s1).
9959 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9960 unsigned Shifts = Imm.countr_zero();
9961 // Reject immediates which can be composed via a single LUI.
9962 if (Shifts >= 12)
9963 return false;
9964 // Reject multiplications can be optimized to
9965 // (SLLI (ALSL x, x, 1/2/3/4), s).
9966 APInt ImmPop = Imm.ashr(Shifts);
9967 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9968 return false;
9969 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9970 // since it needs one more instruction than other 3 cases.
9971 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9972 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9973 (ImmSmall - Imm).isPowerOf2())
9974 return true;
9975 }
9976 }
9977
9978 return false;
9979}
9980
9982 const AddrMode &AM,
9983 Type *Ty, unsigned AS,
9984 Instruction *I) const {
9985 // LoongArch has four basic addressing modes:
9986 // 1. reg
9987 // 2. reg + 12-bit signed offset
9988 // 3. reg + 14-bit signed offset left-shifted by 2
9989 // 4. reg1 + reg2
9990 // TODO: Add more checks after support vector extension.
9991
9992 // No global is ever allowed as a base.
9993 if (AM.BaseGV)
9994 return false;
9995
9996 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9997 // with `UAL` feature.
9998 if (!isInt<12>(AM.BaseOffs) &&
9999 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
10000 return false;
10001
10002 switch (AM.Scale) {
10003 case 0:
10004 // "r+i" or just "i", depending on HasBaseReg.
10005 break;
10006 case 1:
10007 // "r+r+i" is not allowed.
10008 if (AM.HasBaseReg && AM.BaseOffs)
10009 return false;
10010 // Otherwise we have "r+r" or "r+i".
10011 break;
10012 case 2:
10013 // "2*r+r" or "2*r+i" is not allowed.
10014 if (AM.HasBaseReg || AM.BaseOffs)
10015 return false;
10016 // Allow "2*r" as "r+r".
10017 break;
10018 default:
10019 return false;
10020 }
10021
10022 return true;
10023}
10024
10026 return isInt<12>(Imm);
10027}
10028
10030 return isInt<12>(Imm);
10031}
10032
10034 // Zexts are free if they can be combined with a load.
10035 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
10036 // poorly with type legalization of compares preferring sext.
10037 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
10038 EVT MemVT = LD->getMemoryVT();
10039 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
10040 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
10041 LD->getExtensionType() == ISD::ZEXTLOAD))
10042 return true;
10043 }
10044
10045 return TargetLowering::isZExtFree(Val, VT2);
10046}
10047
10049 EVT DstVT) const {
10050 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
10051}
10052
10054 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
10055}
10056
10058 // TODO: Support vectors.
10059 if (Y.getValueType().isVector())
10060 return false;
10061
10062 return !isa<ConstantSDNode>(Y);
10063}
10064
10066 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
10067 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
10068}
10069
10071 Type *Ty, bool IsSigned) const {
10072 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
10073 return true;
10074
10075 return IsSigned;
10076}
10077
10079 // Return false to suppress the unnecessary extensions if the LibCall
10080 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
10081 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
10082 Type.getSizeInBits() < Subtarget.getGRLen()))
10083 return false;
10084 return true;
10085}
10086
10087// memcpy, and other memory intrinsics, typically tries to use wider load/store
10088// if the source/dest is aligned and the copy size is large enough. We therefore
10089// want to align such objects passed to memory intrinsics.
10091 unsigned &MinSize,
10092 Align &PrefAlign) const {
10093 if (!isa<MemIntrinsic>(CI))
10094 return false;
10095
10096 if (Subtarget.is64Bit()) {
10097 MinSize = 8;
10098 PrefAlign = Align(8);
10099 } else {
10100 MinSize = 4;
10101 PrefAlign = Align(4);
10102 }
10103
10104 return true;
10105}
10106
10109 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
10110 VT.getVectorElementType() != MVT::i1)
10111 return TypeWidenVector;
10112
10114}
10115
10116bool LoongArchTargetLowering::splitValueIntoRegisterParts(
10117 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
10118 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
10119 bool IsABIRegCopy = CC.has_value();
10120 EVT ValueVT = Val.getValueType();
10121
10122 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
10123 PartVT == MVT::f32) {
10124 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
10125 // nan, and cast to f32.
10126 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
10127 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
10128 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
10129 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
10130 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
10131 Parts[0] = Val;
10132 return true;
10133 }
10134
10135 return false;
10136}
10137
10138SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
10139 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
10140 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
10141 bool IsABIRegCopy = CC.has_value();
10142
10143 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
10144 PartVT == MVT::f32) {
10145 SDValue Val = Parts[0];
10146
10147 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
10148 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
10149 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
10150 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
10151 return Val;
10152 }
10153
10154 return SDValue();
10155}
10156
10157MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
10158 CallingConv::ID CC,
10159 EVT VT) const {
10160 // Use f32 to pass f16.
10161 if (VT == MVT::f16 && Subtarget.hasBasicF())
10162 return MVT::f32;
10163
10165}
10166
10167unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
10168 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
10169 // Use f32 to pass f16.
10170 if (VT == MVT::f16 && Subtarget.hasBasicF())
10171 return 1;
10172
10174}
10175
10177 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
10178 const SelectionDAG &DAG, unsigned Depth) const {
10179 unsigned Opc = Op.getOpcode();
10180 Known.resetAll();
10181 switch (Opc) {
10182 default:
10183 break;
10184 case LoongArchISD::VPICK_ZEXT_ELT: {
10185 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
10186 EVT VT = cast<VTSDNode>(Op->getOperand(2))->getVT();
10187 unsigned VTBits = VT.getScalarSizeInBits();
10188 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
10189 Known.Zero.setBitsFrom(VTBits);
10190 break;
10191 }
10192 }
10193}
10194
10196 SDValue Op, const APInt &OriginalDemandedBits,
10197 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
10198 unsigned Depth) const {
10199 EVT VT = Op.getValueType();
10200 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
10201 unsigned Opc = Op.getOpcode();
10202 switch (Opc) {
10203 default:
10204 break;
10205 case LoongArchISD::VMSKLTZ:
10206 case LoongArchISD::XVMSKLTZ: {
10207 SDValue Src = Op.getOperand(0);
10208 MVT SrcVT = Src.getSimpleValueType();
10209 unsigned SrcBits = SrcVT.getScalarSizeInBits();
10210 unsigned NumElts = SrcVT.getVectorNumElements();
10211
10212 // If we don't need the sign bits at all just return zero.
10213 if (OriginalDemandedBits.countr_zero() >= NumElts)
10214 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
10215
10216 // Only demand the vector elements of the sign bits we need.
10217 APInt KnownUndef, KnownZero;
10218 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
10219 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
10220 TLO, Depth + 1))
10221 return true;
10222
10223 Known.Zero = KnownZero.zext(BitWidth);
10224 Known.Zero.setHighBits(BitWidth - NumElts);
10225
10226 // [X]VMSKLTZ only uses the MSB from each vector element.
10227 KnownBits KnownSrc;
10228 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
10229 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
10230 Depth + 1))
10231 return true;
10232
10233 if (KnownSrc.One[SrcBits - 1])
10234 Known.One.setLowBits(NumElts);
10235 else if (KnownSrc.Zero[SrcBits - 1])
10236 Known.Zero.setLowBits(NumElts);
10237
10238 // Attempt to avoid multi-use ops if we don't need anything from it.
10240 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
10241 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
10242 return false;
10243 }
10244 }
10245
10247 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
10248}
10249
10251 unsigned Opc = VecOp.getOpcode();
10252
10253 // Assume target opcodes can't be scalarized.
10254 // TODO - do we have any exceptions?
10255 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
10256 return false;
10257
10258 // If the vector op is not supported, try to convert to scalar.
10259 EVT VecVT = VecOp.getValueType();
10261 return true;
10262
10263 // If the vector op is supported, but the scalar op is not, the transform may
10264 // not be worthwhile.
10265 EVT ScalarVT = VecVT.getScalarType();
10266 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
10267}
10268
10270 unsigned Index) const {
10272 return false;
10273
10274 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
10275 return Index == 0;
10276}
10277
10279 unsigned Index) const {
10280 EVT EltVT = VT.getScalarType();
10281
10282 // Extract a scalar FP value from index 0 of a vector is free.
10283 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
10284}
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1499
bool isZero() const
Definition APFloat.h:1512
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1075
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:967
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1708
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:494
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2835
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...