LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
370 VT, Expand);
378 }
380 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
381 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
382 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
397 }
400 }
401
402 // Set operations for 'LASX' feature.
403
404 if (Subtarget.hasExtLASX()) {
405 for (MVT VT : LASXVTs) {
409
415
419 }
420 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
423 Legal);
425 VT, Legal);
432 Expand);
444 }
445 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
447 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
449 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
452 }
453 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
461 VT, Expand);
469 }
472 }
473
474 // Set DAG combine for LA32 and LA64.
475 if (Subtarget.hasBasicF()) {
477 }
478
483
484 // Set DAG combine for 'LSX' feature.
485
486 if (Subtarget.hasExtLSX()) {
491 }
492
493 // Set DAG combine for 'LASX' feature.
494 if (Subtarget.hasExtLASX()) {
499 }
500
501 // Compute derived properties from the register classes.
502 computeRegisterProperties(Subtarget.getRegisterInfo());
503
505
508
509 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
510
512
513 // Function alignments.
515 // Set preferred alignments.
516 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
517 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
518 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
519
520 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
521 if (Subtarget.hasLAMCAS())
523
524 if (Subtarget.hasSCQ()) {
527 }
528
529 // Disable strict node mutation.
530 IsStrictFPEnabled = true;
531}
532
534 const GlobalAddressSDNode *GA) const {
535 // In order to maximise the opportunity for common subexpression elimination,
536 // keep a separate ADD node for the global address offset instead of folding
537 // it in the global address node. Later peephole optimisations may choose to
538 // fold it back in when profitable.
539 return false;
540}
541
543 SelectionDAG &DAG) const {
544 switch (Op.getOpcode()) {
546 return lowerATOMIC_FENCE(Op, DAG);
548 return lowerEH_DWARF_CFA(Op, DAG);
550 return lowerGlobalAddress(Op, DAG);
552 return lowerGlobalTLSAddress(Op, DAG);
554 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
556 return lowerINTRINSIC_W_CHAIN(Op, DAG);
558 return lowerINTRINSIC_VOID(Op, DAG);
560 return lowerBlockAddress(Op, DAG);
561 case ISD::JumpTable:
562 return lowerJumpTable(Op, DAG);
563 case ISD::SHL_PARTS:
564 return lowerShiftLeftParts(Op, DAG);
565 case ISD::SRA_PARTS:
566 return lowerShiftRightParts(Op, DAG, true);
567 case ISD::SRL_PARTS:
568 return lowerShiftRightParts(Op, DAG, false);
570 return lowerConstantPool(Op, DAG);
571 case ISD::FP_TO_SINT:
572 return lowerFP_TO_SINT(Op, DAG);
573 case ISD::BITCAST:
574 return lowerBITCAST(Op, DAG);
575 case ISD::UINT_TO_FP:
576 return lowerUINT_TO_FP(Op, DAG);
577 case ISD::SINT_TO_FP:
578 return lowerSINT_TO_FP(Op, DAG);
579 case ISD::VASTART:
580 return lowerVASTART(Op, DAG);
581 case ISD::FRAMEADDR:
582 return lowerFRAMEADDR(Op, DAG);
583 case ISD::RETURNADDR:
584 return lowerRETURNADDR(Op, DAG);
586 return lowerWRITE_REGISTER(Op, DAG);
588 return lowerINSERT_VECTOR_ELT(Op, DAG);
590 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
592 return lowerBUILD_VECTOR(Op, DAG);
594 return lowerCONCAT_VECTORS(Op, DAG);
596 return lowerVECTOR_SHUFFLE(Op, DAG);
597 case ISD::BITREVERSE:
598 return lowerBITREVERSE(Op, DAG);
600 return lowerSCALAR_TO_VECTOR(Op, DAG);
601 case ISD::PREFETCH:
602 return lowerPREFETCH(Op, DAG);
603 case ISD::SELECT:
604 return lowerSELECT(Op, DAG);
605 case ISD::BRCOND:
606 return lowerBRCOND(Op, DAG);
607 case ISD::FP_TO_FP16:
608 return lowerFP_TO_FP16(Op, DAG);
609 case ISD::FP16_TO_FP:
610 return lowerFP16_TO_FP(Op, DAG);
611 case ISD::FP_TO_BF16:
612 return lowerFP_TO_BF16(Op, DAG);
613 case ISD::BF16_TO_FP:
614 return lowerBF16_TO_FP(Op, DAG);
616 return lowerVECREDUCE_ADD(Op, DAG);
617 case ISD::ROTL:
618 case ISD::ROTR:
619 return lowerRotate(Op, DAG);
627 return lowerVECREDUCE(Op, DAG);
628 case ISD::ConstantFP:
629 return lowerConstantFP(Op, DAG);
630 case ISD::SETCC:
631 return lowerSETCC(Op, DAG);
632 case ISD::FP_ROUND:
633 return lowerFP_ROUND(Op, DAG);
634 case ISD::FP_EXTEND:
635 return lowerFP_EXTEND(Op, DAG);
636 }
637 return SDValue();
638}
639
640// Helper to attempt to return a cheaper, bit-inverted version of \p V.
642 // TODO: don't always ignore oneuse constraints.
643 V = peekThroughBitcasts(V);
644 EVT VT = V.getValueType();
645
646 // Match not(xor X, -1) -> X.
647 if (V.getOpcode() == ISD::XOR &&
648 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
649 isAllOnesConstant(V.getOperand(1))))
650 return V.getOperand(0);
651
652 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
653 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
654 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
655 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
656 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
657 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
658 V.getOperand(1));
659 }
660 }
661
662 // Match not(SplatVector(not(X)) -> SplatVector(X).
663 if (V.getOpcode() == ISD::BUILD_VECTOR) {
664 if (SDValue SplatValue =
665 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
666 if (!V->isOnlyUserOf(SplatValue.getNode()))
667 return SDValue();
668
669 if (SDValue Not = isNOT(SplatValue, DAG)) {
670 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
671 return DAG.getSplat(VT, SDLoc(Not), Not);
672 }
673 }
674 }
675
676 // Match not(or(not(X),not(Y))) -> and(X, Y).
677 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
678 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
679 // TODO: Handle cases with single NOT operand -> VANDN
680 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
681 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
682 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
683 DAG.getBitcast(VT, Op1));
684 }
685
686 // TODO: Add more matching patterns. Such as,
687 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
688 // not(slt(C, X)) -> slt(X - 1, C)
689 return SDValue();
690}
691
692// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
693// LoongArchISD::VFCVT. For example:
694// x1 = fp_round x, 0
695// y1 = fp_round y, 0
696// z = concat_vectors x1, y1
697// Or
698// x1 = LoongArch::VFCVT undef, x
699// y1 = LoongArch::VFCVT undef, y
700// z = LoongArchISD::VPACKEV y1, x1
701// can be combined to:
702// z = LoongArch::VFCVT y, x
704 const LoongArchSubtarget &Subtarget) {
705 assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
706 (N->getOpcode() == LoongArchISD::VPACKEV)) &&
707 "Invalid Node");
708
709 SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
710 SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
711 unsigned Opcode0 = Op0.getOpcode();
712 unsigned Opcode1 = Op1.getOpcode();
713 if (Opcode0 != Opcode1)
714 return SDValue();
715
716 if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
717 return SDValue();
718
719 // Check if two nodes have only one use.
720 if (!Op0.hasOneUse() || !Op1.hasOneUse())
721 return SDValue();
722
723 EVT VT = N.getValueType();
724 EVT SVT0 = Op0.getValueType();
725 EVT SVT1 = Op1.getValueType();
726 // Check if two nodes have the same result type.
727 if (SVT0 != SVT1)
728 return SDValue();
729
730 // Check if two nodes have the same operand type.
731 EVT SSVT0 = Op0.getOperand(0).getValueType();
732 EVT SSVT1 = Op1.getOperand(0).getValueType();
733 if (SSVT0 != SSVT1)
734 return SDValue();
735
736 if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
737 if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
738 SSVT0 == MVT::v4f64) {
739 // A vector_shuffle is required in the final step, as xvfcvt instruction
740 // operates on each 128-bit segament as a lane.
741 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
742 Op1.getOperand(0), Op0.getOperand(0));
743 SDValue Undef = DAG.getUNDEF(Res.getValueType());
744 // After VFCVT, the high part of Res comes from the high parts of Op0 and
745 // Op1, and the low part comes from the low parts of Op0 and Op1. However,
746 // the desired order requires Op0 to fully occupy the lower half and Op1
747 // the upper half of Res. The Mask reorders the elements of Res to achieve
748 // this:
749 // - The first four elements (0, 1, 4, 5) come from Op0.
750 // - The next four elements (2, 3, 6, 7) come from Op1.
751 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
752 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
753 return DAG.getBitcast(VT, Res);
754 }
755 }
756
757 if (N->getOpcode() == LoongArchISD::VPACKEV &&
758 Opcode0 == LoongArchISD::VFCVT) {
759 // For VPACKEV, check if the first operation of LoongArchISD::VFCVT is
760 // undef.
761 if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
762 return SDValue();
763
764 if (Subtarget.hasExtLSX() && (VT == MVT::v2i64 || VT == MVT::v2f64) &&
765 SVT0 == MVT::v4f32 && SSVT0 == MVT::v2f64) {
766 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
767 Op0.getOperand(1), Op1.getOperand(1));
768 return DAG.getBitcast(VT, Res);
769 }
770 }
771
772 return SDValue();
773}
774
775SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
776 SelectionDAG &DAG) const {
777 SDLoc DL(Op);
778 SDValue In = Op.getOperand(0);
779 MVT VT = Op.getSimpleValueType();
780 MVT SVT = In.getSimpleValueType();
781
782 if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
783 SDValue Lo, Hi;
784 std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
785 return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
786 }
787
788 return SDValue();
789}
790
791SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op,
792 SelectionDAG &DAG) const {
793
794 SDLoc DL(Op);
795 EVT VT = Op.getValueType();
796 SDValue Src = Op->getOperand(0);
797 EVT SVT = Src.getValueType();
798
799 bool V2F32ToV2F64 =
800 VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX();
801 bool V4F32ToV4F64 =
802 VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX();
803 if (!V2F32ToV2F64 && !V4F32ToV4F64)
804 return SDValue();
805
806 // Check if Op is the high part of vector.
807 auto CheckVecHighPart = [](SDValue Op) {
809 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
810 SDValue SOp = Op.getOperand(0);
811 EVT SVT = SOp.getValueType();
812 if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0))
813 return SDValue();
814
815 const uint64_t Imm = Op.getConstantOperandVal(1);
816 if (Imm == SVT.getVectorNumElements() / 2)
817 return SOp;
818 return SDValue();
819 }
820 return SDValue();
821 };
822
823 unsigned Opcode;
824 SDValue VFCVTOp;
825 EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT();
826 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
827
828 // If the operand of ISD::FP_EXTEND comes from the high part of vector,
829 // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL.
830 if (SDValue V = CheckVecHighPart(Src)) {
831 assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() &&
832 "Unexpected wide vector");
833 Opcode = LoongArchISD::VFCVTH;
834 VFCVTOp = DAG.getBitcast(WideOpVT, V);
835 } else {
836 Opcode = LoongArchISD::VFCVTL;
837 VFCVTOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideOpVT,
838 DAG.getUNDEF(WideOpVT), Src, ZeroIdx);
839 }
840
841 // v2f64 = fp_extend v2f32
842 if (V2F32ToV2F64)
843 return DAG.getNode(Opcode, DL, VT, VFCVTOp);
844
845 // v4f64 = fp_extend v4f32
846 if (V4F32ToV4F64) {
847 // XVFCVT instruction operates on each 128-bit segment as a lane, so a
848 // vector_shuffle is required firstly.
849 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
850 SDValue Res = DAG.getVectorShuffle(WideOpVT, DL, VFCVTOp,
851 DAG.getUNDEF(WideOpVT), Mask);
852 Res = DAG.getNode(Opcode, DL, VT, Res);
853 return Res;
854 }
855
856 return SDValue();
857}
858
859SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
860 SelectionDAG &DAG) const {
861 EVT VT = Op.getValueType();
862 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
863 const APFloat &FPVal = CFP->getValueAPF();
864 SDLoc DL(CFP);
865
866 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
867 (VT == MVT::f64 && Subtarget.hasBasicD()));
868
869 // If value is 0.0 or -0.0, just ignore it.
870 if (FPVal.isZero())
871 return SDValue();
872
873 // If lsx enabled, use cheaper 'vldi' instruction if possible.
874 if (isFPImmVLDILegal(FPVal, VT))
875 return SDValue();
876
877 // Construct as integer, and move to float register.
878 APInt INTVal = FPVal.bitcastToAPInt();
879
880 // If more than MaterializeFPImmInsNum instructions will be used to
881 // generate the INTVal and move it to float register, fallback to
882 // use floating point load from the constant pool.
884 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
885 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
886 return SDValue();
887
888 switch (VT.getSimpleVT().SimpleTy) {
889 default:
890 llvm_unreachable("Unexpected floating point type!");
891 break;
892 case MVT::f32: {
893 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
894 if (Subtarget.is64Bit())
895 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
896 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
897 : LoongArchISD::MOVGR2FR_W,
898 DL, VT, NewVal);
899 }
900 case MVT::f64: {
901 if (Subtarget.is64Bit()) {
902 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
903 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
904 }
905 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
906 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
907 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
908 }
909 }
910
911 return SDValue();
912}
913
914// Ensure SETCC result and operand have the same bit width; isel does not
915// support mismatched widths.
916SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
917 SelectionDAG &DAG) const {
918 SDLoc DL(Op);
919 EVT ResultVT = Op.getValueType();
920 EVT OperandVT = Op.getOperand(0).getValueType();
921
922 EVT SetCCResultVT =
923 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
924
925 if (ResultVT == SetCCResultVT)
926 return Op;
927
928 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
929 "SETCC operands must have the same type!");
930
931 SDValue SetCCNode =
932 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
933 Op.getOperand(1), Op.getOperand(2));
934
935 if (ResultVT.bitsGT(SetCCResultVT))
936 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
937 else if (ResultVT.bitsLT(SetCCResultVT))
938 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
939
940 return SetCCNode;
941}
942
943// Lower vecreduce_add using vhaddw instructions.
944// For Example:
945// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
946// can be lowered to:
947// VHADDW_D_W vr0, vr0, vr0
948// VHADDW_Q_D vr0, vr0, vr0
949// VPICKVE2GR_D a0, vr0, 0
950// ADDI_W a0, a0, 0
951SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
952 SelectionDAG &DAG) const {
953
954 SDLoc DL(Op);
955 MVT OpVT = Op.getSimpleValueType();
956 SDValue Val = Op.getOperand(0);
957
958 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
959 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
960 unsigned ResBits = OpVT.getScalarSizeInBits();
961
962 unsigned LegalVecSize = 128;
963 bool isLASX256Vector =
964 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
965
966 // Ensure operand type legal or enable it legal.
967 while (!isTypeLegal(Val.getSimpleValueType())) {
968 Val = DAG.WidenVector(Val, DL);
969 }
970
971 // NumEles is designed for iterations count, v4i32 for LSX
972 // and v8i32 for LASX should have the same count.
973 if (isLASX256Vector) {
974 NumEles /= 2;
975 LegalVecSize = 256;
976 }
977
978 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
979 MVT IntTy = MVT::getIntegerVT(EleBits);
980 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
981 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
982 }
983
984 if (isLASX256Vector) {
985 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
986 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
987 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
988 }
989
990 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
991 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
992 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
993}
994
995// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
996// For Example:
997// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
998// can be lowered to:
999// VBSRL_V vr1, vr0, 8
1000// VMAX_W vr0, vr1, vr0
1001// VBSRL_V vr1, vr0, 4
1002// VMAX_W vr0, vr1, vr0
1003// VPICKVE2GR_W a0, vr0, 0
1004// For 256 bit vector, it is illegal and will be spilt into
1005// two 128 bit vector by default then processed by this.
1006SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
1007 SelectionDAG &DAG) const {
1008 SDLoc DL(Op);
1009
1010 MVT OpVT = Op.getSimpleValueType();
1011 SDValue Val = Op.getOperand(0);
1012
1013 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1014 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1015
1016 // Ensure operand type legal or enable it legal.
1017 while (!isTypeLegal(Val.getSimpleValueType())) {
1018 Val = DAG.WidenVector(Val, DL);
1019 }
1020
1021 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
1022 MVT VecTy = Val.getSimpleValueType();
1023 MVT GRLenVT = Subtarget.getGRLenVT();
1024
1025 for (int i = NumEles; i > 1; i /= 2) {
1026 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
1027 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
1028 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
1029 }
1030
1031 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1032 DAG.getConstant(0, DL, GRLenVT));
1033}
1034
1035SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
1036 SelectionDAG &DAG) const {
1037 unsigned IsData = Op.getConstantOperandVal(4);
1038
1039 // We don't support non-data prefetch.
1040 // Just preserve the chain.
1041 if (!IsData)
1042 return Op.getOperand(0);
1043
1044 return Op;
1045}
1046
1047SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
1048 SelectionDAG &DAG) const {
1049 MVT VT = Op.getSimpleValueType();
1050 assert(VT.isVector() && "Unexpected type");
1051
1052 SDLoc DL(Op);
1053 SDValue R = Op.getOperand(0);
1054 SDValue Amt = Op.getOperand(1);
1055 unsigned Opcode = Op.getOpcode();
1056 unsigned EltSizeInBits = VT.getScalarSizeInBits();
1057
1058 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
1059 if (V.getOpcode() != ISD::BUILD_VECTOR)
1060 return false;
1061 if (SDValue SplatValue =
1062 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
1063 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
1064 CstSplatValue = C->getAPIntValue();
1065 return true;
1066 }
1067 }
1068 return false;
1069 };
1070
1071 // Check for constant splat rotation amount.
1072 APInt CstSplatValue;
1073 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
1074 bool isROTL = Opcode == ISD::ROTL;
1075
1076 // Check for splat rotate by zero.
1077 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
1078 return R;
1079
1080 // LoongArch targets always prefer ISD::ROTR.
1081 if (isROTL) {
1082 SDValue Zero = DAG.getConstant(0, DL, VT);
1083 return DAG.getNode(ISD::ROTR, DL, VT, R,
1084 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
1085 }
1086
1087 // Rotate by a immediate.
1088 if (IsCstSplat) {
1089 // ISD::ROTR: Attemp to rotate by a positive immediate.
1090 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
1091 if (SDValue Urem =
1092 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
1093 return DAG.getNode(Opcode, DL, VT, R, Urem);
1094 }
1095
1096 return Op;
1097}
1098
1099// Return true if Val is equal to (setcc LHS, RHS, CC).
1100// Return false if Val is the inverse of (setcc LHS, RHS, CC).
1101// Otherwise, return std::nullopt.
1102static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
1103 ISD::CondCode CC, SDValue Val) {
1104 assert(Val->getOpcode() == ISD::SETCC);
1105 SDValue LHS2 = Val.getOperand(0);
1106 SDValue RHS2 = Val.getOperand(1);
1107 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
1108
1109 if (LHS == LHS2 && RHS == RHS2) {
1110 if (CC == CC2)
1111 return true;
1112 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1113 return false;
1114 } else if (LHS == RHS2 && RHS == LHS2) {
1116 if (CC == CC2)
1117 return true;
1118 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1119 return false;
1120 }
1121
1122 return std::nullopt;
1123}
1124
1126 const LoongArchSubtarget &Subtarget) {
1127 SDValue CondV = N->getOperand(0);
1128 SDValue TrueV = N->getOperand(1);
1129 SDValue FalseV = N->getOperand(2);
1130 MVT VT = N->getSimpleValueType(0);
1131 SDLoc DL(N);
1132
1133 // (select c, -1, y) -> -c | y
1134 if (isAllOnesConstant(TrueV)) {
1135 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1136 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
1137 }
1138 // (select c, y, -1) -> (c-1) | y
1139 if (isAllOnesConstant(FalseV)) {
1140 SDValue Neg =
1141 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1142 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
1143 }
1144
1145 // (select c, 0, y) -> (c-1) & y
1146 if (isNullConstant(TrueV)) {
1147 SDValue Neg =
1148 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1149 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
1150 }
1151 // (select c, y, 0) -> -c & y
1152 if (isNullConstant(FalseV)) {
1153 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1154 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
1155 }
1156
1157 // select c, ~x, x --> xor -c, x
1158 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
1159 const APInt &TrueVal = TrueV->getAsAPIntVal();
1160 const APInt &FalseVal = FalseV->getAsAPIntVal();
1161 if (~TrueVal == FalseVal) {
1162 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1163 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
1164 }
1165 }
1166
1167 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1168 // when both truev and falsev are also setcc.
1169 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1170 FalseV.getOpcode() == ISD::SETCC) {
1171 SDValue LHS = CondV.getOperand(0);
1172 SDValue RHS = CondV.getOperand(1);
1173 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1174
1175 // (select x, x, y) -> x | y
1176 // (select !x, x, y) -> x & y
1177 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1178 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1179 DAG.getFreeze(FalseV));
1180 }
1181 // (select x, y, x) -> x & y
1182 // (select !x, y, x) -> x | y
1183 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1184 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1185 DAG.getFreeze(TrueV), FalseV);
1186 }
1187 }
1188
1189 return SDValue();
1190}
1191
1192// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1193// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1194// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1195// being `0` or `-1`. In such cases we can replace `select` with `and`.
1196// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1197// than `c0`?
1198static SDValue
1200 const LoongArchSubtarget &Subtarget) {
1201 unsigned SelOpNo = 0;
1202 SDValue Sel = BO->getOperand(0);
1203 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1204 SelOpNo = 1;
1205 Sel = BO->getOperand(1);
1206 }
1207
1208 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1209 return SDValue();
1210
1211 unsigned ConstSelOpNo = 1;
1212 unsigned OtherSelOpNo = 2;
1213 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1214 ConstSelOpNo = 2;
1215 OtherSelOpNo = 1;
1216 }
1217 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1218 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1219 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1220 return SDValue();
1221
1222 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1223 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1224 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1225 return SDValue();
1226
1227 SDLoc DL(Sel);
1228 EVT VT = BO->getValueType(0);
1229
1230 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1231 if (SelOpNo == 1)
1232 std::swap(NewConstOps[0], NewConstOps[1]);
1233
1234 SDValue NewConstOp =
1235 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1236 if (!NewConstOp)
1237 return SDValue();
1238
1239 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1240 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1241 return SDValue();
1242
1243 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1244 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1245 if (SelOpNo == 1)
1246 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1247 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1248
1249 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1250 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1251 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1252}
1253
1254// Changes the condition code and swaps operands if necessary, so the SetCC
1255// operation matches one of the comparisons supported directly by branches
1256// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1257// compare with 1/-1.
1259 ISD::CondCode &CC, SelectionDAG &DAG) {
1260 // If this is a single bit test that can't be handled by ANDI, shift the
1261 // bit to be tested to the MSB and perform a signed compare with 0.
1262 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1263 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1264 isa<ConstantSDNode>(LHS.getOperand(1))) {
1265 uint64_t Mask = LHS.getConstantOperandVal(1);
1266 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1267 unsigned ShAmt = 0;
1268 if (isPowerOf2_64(Mask)) {
1269 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1270 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1271 } else {
1272 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1273 }
1274
1275 LHS = LHS.getOperand(0);
1276 if (ShAmt != 0)
1277 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1278 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1279 return;
1280 }
1281 }
1282
1283 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1284 int64_t C = RHSC->getSExtValue();
1285 switch (CC) {
1286 default:
1287 break;
1288 case ISD::SETGT:
1289 // Convert X > -1 to X >= 0.
1290 if (C == -1) {
1291 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1292 CC = ISD::SETGE;
1293 return;
1294 }
1295 break;
1296 case ISD::SETLT:
1297 // Convert X < 1 to 0 >= X.
1298 if (C == 1) {
1299 RHS = LHS;
1300 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1301 CC = ISD::SETGE;
1302 return;
1303 }
1304 break;
1305 }
1306 }
1307
1308 switch (CC) {
1309 default:
1310 break;
1311 case ISD::SETGT:
1312 case ISD::SETLE:
1313 case ISD::SETUGT:
1314 case ISD::SETULE:
1316 std::swap(LHS, RHS);
1317 break;
1318 }
1319}
1320
1321SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1322 SelectionDAG &DAG) const {
1323 SDValue CondV = Op.getOperand(0);
1324 SDValue TrueV = Op.getOperand(1);
1325 SDValue FalseV = Op.getOperand(2);
1326 SDLoc DL(Op);
1327 MVT VT = Op.getSimpleValueType();
1328 MVT GRLenVT = Subtarget.getGRLenVT();
1329
1330 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1331 return V;
1332
1333 if (Op.hasOneUse()) {
1334 unsigned UseOpc = Op->user_begin()->getOpcode();
1335 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1336 SDNode *BinOp = *Op->user_begin();
1337 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1338 DAG, Subtarget)) {
1339 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1340 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1341 // may return a constant node and cause crash in lowerSELECT.
1342 if (NewSel.getOpcode() == ISD::SELECT)
1343 return lowerSELECT(NewSel, DAG);
1344 return NewSel;
1345 }
1346 }
1347 }
1348
1349 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1350 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1351 // (select condv, truev, falsev)
1352 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1353 if (CondV.getOpcode() != ISD::SETCC ||
1354 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1355 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1356 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1357
1358 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1359
1360 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1361 }
1362
1363 // If the CondV is the output of a SETCC node which operates on GRLenVT
1364 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1365 // to take advantage of the integer compare+branch instructions. i.e.: (select
1366 // (setcc lhs, rhs, cc), truev, falsev)
1367 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1368 SDValue LHS = CondV.getOperand(0);
1369 SDValue RHS = CondV.getOperand(1);
1370 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1371
1372 // Special case for a select of 2 constants that have a difference of 1.
1373 // Normally this is done by DAGCombine, but if the select is introduced by
1374 // type legalization or op legalization, we miss it. Restricting to SETLT
1375 // case for now because that is what signed saturating add/sub need.
1376 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1377 // but we would probably want to swap the true/false values if the condition
1378 // is SETGE/SETLE to avoid an XORI.
1379 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1380 CCVal == ISD::SETLT) {
1381 const APInt &TrueVal = TrueV->getAsAPIntVal();
1382 const APInt &FalseVal = FalseV->getAsAPIntVal();
1383 if (TrueVal - 1 == FalseVal)
1384 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1385 if (TrueVal + 1 == FalseVal)
1386 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1387 }
1388
1389 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1390 // 1 < x ? x : 1 -> 0 < x ? x : 1
1391 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1392 RHS == TrueV && LHS == FalseV) {
1393 LHS = DAG.getConstant(0, DL, VT);
1394 // 0 <u x is the same as x != 0.
1395 if (CCVal == ISD::SETULT) {
1396 std::swap(LHS, RHS);
1397 CCVal = ISD::SETNE;
1398 }
1399 }
1400
1401 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1402 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1403 RHS == FalseV) {
1404 RHS = DAG.getConstant(0, DL, VT);
1405 }
1406
1407 SDValue TargetCC = DAG.getCondCode(CCVal);
1408
1409 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1410 // (select (setcc lhs, rhs, CC), constant, falsev)
1411 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1412 std::swap(TrueV, FalseV);
1413 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1414 }
1415
1416 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1417 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1418}
1419
1420SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1421 SelectionDAG &DAG) const {
1422 SDValue CondV = Op.getOperand(1);
1423 SDLoc DL(Op);
1424 MVT GRLenVT = Subtarget.getGRLenVT();
1425
1426 if (CondV.getOpcode() == ISD::SETCC) {
1427 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1428 SDValue LHS = CondV.getOperand(0);
1429 SDValue RHS = CondV.getOperand(1);
1430 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1431
1432 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1433
1434 SDValue TargetCC = DAG.getCondCode(CCVal);
1435 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1436 Op.getOperand(0), LHS, RHS, TargetCC,
1437 Op.getOperand(2));
1438 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1439 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1440 Op.getOperand(0), CondV, Op.getOperand(2));
1441 }
1442 }
1443
1444 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1445 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1446 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1447}
1448
1449SDValue
1450LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1451 SelectionDAG &DAG) const {
1452 SDLoc DL(Op);
1453 MVT OpVT = Op.getSimpleValueType();
1454
1455 SDValue Vector = DAG.getUNDEF(OpVT);
1456 SDValue Val = Op.getOperand(0);
1457 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1458
1459 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1460}
1461
1462SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1463 SelectionDAG &DAG) const {
1464 EVT ResTy = Op->getValueType(0);
1465 SDValue Src = Op->getOperand(0);
1466 SDLoc DL(Op);
1467
1468 // LoongArchISD::BITREV_8B is not supported on LA32.
1469 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1470 return SDValue();
1471
1472 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1473 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1474 unsigned int NewEltNum = NewVT.getVectorNumElements();
1475
1476 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1477
1479 for (unsigned int i = 0; i < NewEltNum; i++) {
1480 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1481 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1482 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1483 ? (unsigned)LoongArchISD::BITREV_8B
1484 : (unsigned)ISD::BITREVERSE;
1485 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1486 }
1487 SDValue Res =
1488 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1489
1490 switch (ResTy.getSimpleVT().SimpleTy) {
1491 default:
1492 return SDValue();
1493 case MVT::v16i8:
1494 case MVT::v32i8:
1495 return Res;
1496 case MVT::v8i16:
1497 case MVT::v16i16:
1498 case MVT::v4i32:
1499 case MVT::v8i32: {
1501 for (unsigned int i = 0; i < NewEltNum; i++)
1502 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1503 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1504 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1505 }
1506 }
1507}
1508
1509// Widen element type to get a new mask value (if possible).
1510// For example:
1511// shufflevector <4 x i32> %a, <4 x i32> %b,
1512// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1513// is equivalent to:
1514// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1515// can be lowered to:
1516// VPACKOD_D vr0, vr0, vr1
1518 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1519 unsigned EltBits = VT.getScalarSizeInBits();
1520
1521 if (EltBits > 32 || EltBits == 1)
1522 return SDValue();
1523
1524 SmallVector<int, 8> NewMask;
1525 if (widenShuffleMaskElts(Mask, NewMask)) {
1526 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1527 : MVT::getIntegerVT(EltBits * 2);
1528 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1529 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1530 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1531 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1532 return DAG.getBitcast(
1533 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1534 }
1535 }
1536
1537 return SDValue();
1538}
1539
1540/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1541/// instruction.
1542// The funciton matches elements from one of the input vector shuffled to the
1543// left or right with zeroable elements 'shifted in'. It handles both the
1544// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1545// lane.
1546// Mostly copied from X86.
1547static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1548 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1549 int MaskOffset, const APInt &Zeroable) {
1550 int Size = Mask.size();
1551 unsigned SizeInBits = Size * ScalarSizeInBits;
1552
1553 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1554 for (int i = 0; i < Size; i += Scale)
1555 for (int j = 0; j < Shift; ++j)
1556 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1557 return false;
1558
1559 return true;
1560 };
1561
1562 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1563 int Step = 1) {
1564 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1565 if (!(Mask[i] == -1 || Mask[i] == Low))
1566 return false;
1567 return true;
1568 };
1569
1570 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1571 for (int i = 0; i != Size; i += Scale) {
1572 unsigned Pos = Left ? i + Shift : i;
1573 unsigned Low = Left ? i : i + Shift;
1574 unsigned Len = Scale - Shift;
1575 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1576 return -1;
1577 }
1578
1579 int ShiftEltBits = ScalarSizeInBits * Scale;
1580 bool ByteShift = ShiftEltBits > 64;
1581 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1582 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1583 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1584
1585 // Normalize the scale for byte shifts to still produce an i64 element
1586 // type.
1587 Scale = ByteShift ? Scale / 2 : Scale;
1588
1589 // We need to round trip through the appropriate type for the shift.
1590 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1591 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1592 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1593 return (int)ShiftAmt;
1594 };
1595
1596 unsigned MaxWidth = 128;
1597 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1598 for (int Shift = 1; Shift != Scale; ++Shift)
1599 for (bool Left : {true, false})
1600 if (CheckZeros(Shift, Scale, Left)) {
1601 int ShiftAmt = MatchShift(Shift, Scale, Left);
1602 if (0 < ShiftAmt)
1603 return ShiftAmt;
1604 }
1605
1606 // no match
1607 return -1;
1608}
1609
1610/// Lower VECTOR_SHUFFLE as shift (if possible).
1611///
1612/// For example:
1613/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1614/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1615/// is lowered to:
1616/// (VBSLL_V $v0, $v0, 4)
1617///
1618/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1619/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1620/// is lowered to:
1621/// (VSLLI_D $v0, $v0, 32)
1623 MVT VT, SDValue V1, SDValue V2,
1624 SelectionDAG &DAG,
1625 const LoongArchSubtarget &Subtarget,
1626 const APInt &Zeroable) {
1627 int Size = Mask.size();
1628 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1629
1630 MVT ShiftVT;
1631 SDValue V = V1;
1632 unsigned Opcode;
1633
1634 // Try to match shuffle against V1 shift.
1635 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1636 Mask, 0, Zeroable);
1637
1638 // If V1 failed, try to match shuffle against V2 shift.
1639 if (ShiftAmt < 0) {
1640 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1641 Mask, Size, Zeroable);
1642 V = V2;
1643 }
1644
1645 if (ShiftAmt < 0)
1646 return SDValue();
1647
1648 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1649 "Illegal integer vector type");
1650 V = DAG.getBitcast(ShiftVT, V);
1651 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1652 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1653 return DAG.getBitcast(VT, V);
1654}
1655
1656/// Determine whether a range fits a regular pattern of values.
1657/// This function accounts for the possibility of jumping over the End iterator.
1658template <typename ValType>
1659static bool
1661 unsigned CheckStride,
1663 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1664 auto &I = Begin;
1665
1666 while (I != End) {
1667 if (*I != -1 && *I != ExpectedIndex)
1668 return false;
1669 ExpectedIndex += ExpectedIndexStride;
1670
1671 // Incrementing past End is undefined behaviour so we must increment one
1672 // step at a time and check for End at each step.
1673 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1674 ; // Empty loop body.
1675 }
1676 return true;
1677}
1678
1679/// Compute whether each element of a shuffle is zeroable.
1680///
1681/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1683 SDValue V2, APInt &KnownUndef,
1684 APInt &KnownZero) {
1685 int Size = Mask.size();
1686 KnownUndef = KnownZero = APInt::getZero(Size);
1687
1688 V1 = peekThroughBitcasts(V1);
1689 V2 = peekThroughBitcasts(V2);
1690
1691 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1692 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1693
1694 int VectorSizeInBits = V1.getValueSizeInBits();
1695 int ScalarSizeInBits = VectorSizeInBits / Size;
1696 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1697 (void)ScalarSizeInBits;
1698
1699 for (int i = 0; i < Size; ++i) {
1700 int M = Mask[i];
1701 if (M < 0) {
1702 KnownUndef.setBit(i);
1703 continue;
1704 }
1705 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1706 KnownZero.setBit(i);
1707 continue;
1708 }
1709 }
1710}
1711
1712/// Test whether a shuffle mask is equivalent within each sub-lane.
1713///
1714/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1715/// non-trivial to compute in the face of undef lanes. The representation is
1716/// suitable for use with existing 128-bit shuffles as entries from the second
1717/// vector have been remapped to [LaneSize, 2*LaneSize).
1718static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1719 ArrayRef<int> Mask,
1720 SmallVectorImpl<int> &RepeatedMask) {
1721 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1722 RepeatedMask.assign(LaneSize, -1);
1723 int Size = Mask.size();
1724 for (int i = 0; i < Size; ++i) {
1725 assert(Mask[i] == -1 || Mask[i] >= 0);
1726 if (Mask[i] < 0)
1727 continue;
1728 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1729 // This entry crosses lanes, so there is no way to model this shuffle.
1730 return false;
1731
1732 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1733 // Adjust second vector indices to start at LaneSize instead of Size.
1734 int LocalM =
1735 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1736 if (RepeatedMask[i % LaneSize] < 0)
1737 // This is the first non-undef entry in this slot of a 128-bit lane.
1738 RepeatedMask[i % LaneSize] = LocalM;
1739 else if (RepeatedMask[i % LaneSize] != LocalM)
1740 // Found a mismatch with the repeated mask.
1741 return false;
1742 }
1743 return true;
1744}
1745
1746/// Attempts to match vector shuffle as byte rotation.
1748 ArrayRef<int> Mask) {
1749
1750 SDValue Lo, Hi;
1751 SmallVector<int, 16> RepeatedMask;
1752
1753 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1754 return -1;
1755
1756 int NumElts = RepeatedMask.size();
1757 int Rotation = 0;
1758 int Scale = 16 / NumElts;
1759
1760 for (int i = 0; i < NumElts; ++i) {
1761 int M = RepeatedMask[i];
1762 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1763 "Unexpected mask index.");
1764 if (M < 0)
1765 continue;
1766
1767 // Determine where a rotated vector would have started.
1768 int StartIdx = i - (M % NumElts);
1769 if (StartIdx == 0)
1770 return -1;
1771
1772 // If we found the tail of a vector the rotation must be the missing
1773 // front. If we found the head of a vector, it must be how much of the
1774 // head.
1775 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1776
1777 if (Rotation == 0)
1778 Rotation = CandidateRotation;
1779 else if (Rotation != CandidateRotation)
1780 return -1;
1781
1782 // Compute which value this mask is pointing at.
1783 SDValue MaskV = M < NumElts ? V1 : V2;
1784
1785 // Compute which of the two target values this index should be assigned
1786 // to. This reflects whether the high elements are remaining or the low
1787 // elements are remaining.
1788 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1789
1790 // Either set up this value if we've not encountered it before, or check
1791 // that it remains consistent.
1792 if (!TargetV)
1793 TargetV = MaskV;
1794 else if (TargetV != MaskV)
1795 return -1;
1796 }
1797
1798 // Check that we successfully analyzed the mask, and normalize the results.
1799 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1800 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1801 if (!Lo)
1802 Lo = Hi;
1803 else if (!Hi)
1804 Hi = Lo;
1805
1806 V1 = Lo;
1807 V2 = Hi;
1808
1809 return Rotation * Scale;
1810}
1811
1812/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1813///
1814/// For example:
1815/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1816/// <2 x i32> <i32 3, i32 0>
1817/// is lowered to:
1818/// (VBSRL_V $v1, $v1, 8)
1819/// (VBSLL_V $v0, $v0, 8)
1820/// (VOR_V $v0, $V0, $v1)
1821static SDValue
1823 SDValue V1, SDValue V2, SelectionDAG &DAG,
1824 const LoongArchSubtarget &Subtarget) {
1825
1826 SDValue Lo = V1, Hi = V2;
1827 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1828 if (ByteRotation <= 0)
1829 return SDValue();
1830
1831 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1832 Lo = DAG.getBitcast(ByteVT, Lo);
1833 Hi = DAG.getBitcast(ByteVT, Hi);
1834
1835 int LoByteShift = 16 - ByteRotation;
1836 int HiByteShift = ByteRotation;
1837 MVT GRLenVT = Subtarget.getGRLenVT();
1838
1839 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1840 DAG.getConstant(LoByteShift, DL, GRLenVT));
1841 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1842 DAG.getConstant(HiByteShift, DL, GRLenVT));
1843 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1844}
1845
1846/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1847///
1848/// For example:
1849/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1850/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1851/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1852/// is lowered to:
1853/// (VREPLI $v1, 0)
1854/// (VILVL $v0, $v1, $v0)
1856 ArrayRef<int> Mask, MVT VT,
1857 SDValue V1, SDValue V2,
1858 SelectionDAG &DAG,
1859 const APInt &Zeroable) {
1860 int Bits = VT.getSizeInBits();
1861 int EltBits = VT.getScalarSizeInBits();
1862 int NumElements = VT.getVectorNumElements();
1863
1864 if (Zeroable.isAllOnes())
1865 return DAG.getConstant(0, DL, VT);
1866
1867 // Define a helper function to check a particular ext-scale and lower to it if
1868 // valid.
1869 auto Lower = [&](int Scale) -> SDValue {
1870 SDValue InputV;
1871 bool AnyExt = true;
1872 int Offset = 0;
1873 for (int i = 0; i < NumElements; i++) {
1874 int M = Mask[i];
1875 if (M < 0)
1876 continue;
1877 if (i % Scale != 0) {
1878 // Each of the extended elements need to be zeroable.
1879 if (!Zeroable[i])
1880 return SDValue();
1881
1882 AnyExt = false;
1883 continue;
1884 }
1885
1886 // Each of the base elements needs to be consecutive indices into the
1887 // same input vector.
1888 SDValue V = M < NumElements ? V1 : V2;
1889 M = M % NumElements;
1890 if (!InputV) {
1891 InputV = V;
1892 Offset = M - (i / Scale);
1893
1894 // These offset can't be handled
1895 if (Offset % (NumElements / Scale))
1896 return SDValue();
1897 } else if (InputV != V)
1898 return SDValue();
1899
1900 if (M != (Offset + (i / Scale)))
1901 return SDValue(); // Non-consecutive strided elements.
1902 }
1903
1904 // If we fail to find an input, we have a zero-shuffle which should always
1905 // have already been handled.
1906 if (!InputV)
1907 return SDValue();
1908
1909 do {
1910 unsigned VilVLoHi = LoongArchISD::VILVL;
1911 if (Offset >= (NumElements / 2)) {
1912 VilVLoHi = LoongArchISD::VILVH;
1913 Offset -= (NumElements / 2);
1914 }
1915
1916 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1917 SDValue Ext =
1918 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1919 InputV = DAG.getBitcast(InputVT, InputV);
1920 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1921 Scale /= 2;
1922 EltBits *= 2;
1923 NumElements /= 2;
1924 } while (Scale > 1);
1925 return DAG.getBitcast(VT, InputV);
1926 };
1927
1928 // Each iteration, try extending the elements half as much, but into twice as
1929 // many elements.
1930 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1931 NumExtElements *= 2) {
1932 if (SDValue V = Lower(NumElements / NumExtElements))
1933 return V;
1934 }
1935 return SDValue();
1936}
1937
1938/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1939///
1940/// VREPLVEI performs vector broadcast based on an element specified by an
1941/// integer immediate, with its mask being similar to:
1942/// <x, x, x, ...>
1943/// where x is any valid index.
1944///
1945/// When undef's appear in the mask they are treated as if they were whatever
1946/// value is necessary in order to fit the above form.
1947static SDValue
1949 SDValue V1, SelectionDAG &DAG,
1950 const LoongArchSubtarget &Subtarget) {
1951 int SplatIndex = -1;
1952 for (const auto &M : Mask) {
1953 if (M != -1) {
1954 SplatIndex = M;
1955 break;
1956 }
1957 }
1958
1959 if (SplatIndex == -1)
1960 return DAG.getUNDEF(VT);
1961
1962 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1963 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1964 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1965 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1966 }
1967
1968 return SDValue();
1969}
1970
1971/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1972///
1973/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1974/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1975///
1976/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1977/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1978/// When undef's appear they are treated as if they were whatever value is
1979/// necessary in order to fit the above forms.
1980///
1981/// For example:
1982/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1983/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1984/// i32 7, i32 6, i32 5, i32 4>
1985/// is lowered to:
1986/// (VSHUF4I_H $v0, $v1, 27)
1987/// where the 27 comes from:
1988/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1989static SDValue
1991 SDValue V1, SDValue V2, SelectionDAG &DAG,
1992 const LoongArchSubtarget &Subtarget) {
1993
1994 unsigned SubVecSize = 4;
1995 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1996 SubVecSize = 2;
1997
1998 int SubMask[4] = {-1, -1, -1, -1};
1999 for (unsigned i = 0; i < SubVecSize; ++i) {
2000 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
2001 int M = Mask[j];
2002
2003 // Convert from vector index to 4-element subvector index
2004 // If an index refers to an element outside of the subvector then give up
2005 if (M != -1) {
2006 M -= 4 * (j / SubVecSize);
2007 if (M < 0 || M >= 4)
2008 return SDValue();
2009 }
2010
2011 // If the mask has an undef, replace it with the current index.
2012 // Note that it might still be undef if the current index is also undef
2013 if (SubMask[i] == -1)
2014 SubMask[i] = M;
2015 // Check that non-undef values are the same as in the mask. If they
2016 // aren't then give up
2017 else if (M != -1 && M != SubMask[i])
2018 return SDValue();
2019 }
2020 }
2021
2022 // Calculate the immediate. Replace any remaining undefs with zero
2023 int Imm = 0;
2024 for (int i = SubVecSize - 1; i >= 0; --i) {
2025 int M = SubMask[i];
2026
2027 if (M == -1)
2028 M = 0;
2029
2030 Imm <<= 2;
2031 Imm |= M & 0x3;
2032 }
2033
2034 MVT GRLenVT = Subtarget.getGRLenVT();
2035
2036 // Return vshuf4i.d
2037 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2038 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2039 DAG.getConstant(Imm, DL, GRLenVT));
2040
2041 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
2042 DAG.getConstant(Imm, DL, GRLenVT));
2043}
2044
2045/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
2046///
2047/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
2048/// reverse whose mask likes:
2049/// <7, 6, 5, 4, 3, 2, 1, 0>
2050///
2051/// When undef's appear in the mask they are treated as if they were whatever
2052/// value is necessary in order to fit the above forms.
2053static SDValue
2055 SDValue V1, SelectionDAG &DAG,
2056 const LoongArchSubtarget &Subtarget) {
2057 // Only vectors with i8/i16 elements which cannot match other patterns
2058 // directly needs to do this.
2059 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
2060 VT != MVT::v16i16)
2061 return SDValue();
2062
2063 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
2064 return SDValue();
2065
2066 int WidenNumElts = VT.getVectorNumElements() / 4;
2067 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
2068 for (int i = 0; i < WidenNumElts; ++i)
2069 WidenMask[i] = WidenNumElts - 1 - i;
2070
2071 MVT WidenVT = MVT::getVectorVT(
2072 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
2073 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
2074 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
2075 DAG.getUNDEF(WidenVT), WidenMask);
2076
2077 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
2078 DAG.getBitcast(VT, WidenRev),
2079 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
2080}
2081
2082/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
2083///
2084/// VPACKEV interleaves the even elements from each vector.
2085///
2086/// It is possible to lower into VPACKEV when the mask consists of two of the
2087/// following forms interleaved:
2088/// <0, 2, 4, ...>
2089/// <n, n+2, n+4, ...>
2090/// where n is the number of elements in the vector.
2091/// For example:
2092/// <0, 0, 2, 2, 4, 4, ...>
2093/// <0, n, 2, n+2, 4, n+4, ...>
2094///
2095/// When undef's appear in the mask they are treated as if they were whatever
2096/// value is necessary in order to fit the above forms.
2098 MVT VT, SDValue V1, SDValue V2,
2099 SelectionDAG &DAG) {
2100
2101 const auto &Begin = Mask.begin();
2102 const auto &End = Mask.end();
2103 SDValue OriV1 = V1, OriV2 = V2;
2104
2105 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2106 V1 = OriV1;
2107 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
2108 V1 = OriV2;
2109 else
2110 return SDValue();
2111
2112 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2113 V2 = OriV1;
2114 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
2115 V2 = OriV2;
2116 else
2117 return SDValue();
2118
2119 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
2120}
2121
2122/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
2123///
2124/// VPACKOD interleaves the odd elements from each vector.
2125///
2126/// It is possible to lower into VPACKOD when the mask consists of two of the
2127/// following forms interleaved:
2128/// <1, 3, 5, ...>
2129/// <n+1, n+3, n+5, ...>
2130/// where n is the number of elements in the vector.
2131/// For example:
2132/// <1, 1, 3, 3, 5, 5, ...>
2133/// <1, n+1, 3, n+3, 5, n+5, ...>
2134///
2135/// When undef's appear in the mask they are treated as if they were whatever
2136/// value is necessary in order to fit the above forms.
2138 MVT VT, SDValue V1, SDValue V2,
2139 SelectionDAG &DAG) {
2140
2141 const auto &Begin = Mask.begin();
2142 const auto &End = Mask.end();
2143 SDValue OriV1 = V1, OriV2 = V2;
2144
2145 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2146 V1 = OriV1;
2147 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
2148 V1 = OriV2;
2149 else
2150 return SDValue();
2151
2152 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2153 V2 = OriV1;
2154 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
2155 V2 = OriV2;
2156 else
2157 return SDValue();
2158
2159 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
2160}
2161
2162/// Lower VECTOR_SHUFFLE into VILVH (if possible).
2163///
2164/// VILVH interleaves consecutive elements from the left (highest-indexed) half
2165/// of each vector.
2166///
2167/// It is possible to lower into VILVH when the mask consists of two of the
2168/// following forms interleaved:
2169/// <x, x+1, x+2, ...>
2170/// <n+x, n+x+1, n+x+2, ...>
2171/// where n is the number of elements in the vector and x is half n.
2172/// For example:
2173/// <x, x, x+1, x+1, x+2, x+2, ...>
2174/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2175///
2176/// When undef's appear in the mask they are treated as if they were whatever
2177/// value is necessary in order to fit the above forms.
2179 MVT VT, SDValue V1, SDValue V2,
2180 SelectionDAG &DAG) {
2181
2182 const auto &Begin = Mask.begin();
2183 const auto &End = Mask.end();
2184 unsigned HalfSize = Mask.size() / 2;
2185 SDValue OriV1 = V1, OriV2 = V2;
2186
2187 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2188 V1 = OriV1;
2189 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2190 V1 = OriV2;
2191 else
2192 return SDValue();
2193
2194 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2195 V2 = OriV1;
2196 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2197 1))
2198 V2 = OriV2;
2199 else
2200 return SDValue();
2201
2202 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2203}
2204
2205/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2206///
2207/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2208/// of each vector.
2209///
2210/// It is possible to lower into VILVL when the mask consists of two of the
2211/// following forms interleaved:
2212/// <0, 1, 2, ...>
2213/// <n, n+1, n+2, ...>
2214/// where n is the number of elements in the vector.
2215/// For example:
2216/// <0, 0, 1, 1, 2, 2, ...>
2217/// <0, n, 1, n+1, 2, n+2, ...>
2218///
2219/// When undef's appear in the mask they are treated as if they were whatever
2220/// value is necessary in order to fit the above forms.
2222 MVT VT, SDValue V1, SDValue V2,
2223 SelectionDAG &DAG) {
2224
2225 const auto &Begin = Mask.begin();
2226 const auto &End = Mask.end();
2227 SDValue OriV1 = V1, OriV2 = V2;
2228
2229 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2230 V1 = OriV1;
2231 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2232 V1 = OriV2;
2233 else
2234 return SDValue();
2235
2236 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2237 V2 = OriV1;
2238 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2239 V2 = OriV2;
2240 else
2241 return SDValue();
2242
2243 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2244}
2245
2246/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2247///
2248/// VPICKEV copies the even elements of each vector into the result vector.
2249///
2250/// It is possible to lower into VPICKEV when the mask consists of two of the
2251/// following forms concatenated:
2252/// <0, 2, 4, ...>
2253/// <n, n+2, n+4, ...>
2254/// where n is the number of elements in the vector.
2255/// For example:
2256/// <0, 2, 4, ..., 0, 2, 4, ...>
2257/// <0, 2, 4, ..., n, n+2, n+4, ...>
2258///
2259/// When undef's appear in the mask they are treated as if they were whatever
2260/// value is necessary in order to fit the above forms.
2262 MVT VT, SDValue V1, SDValue V2,
2263 SelectionDAG &DAG) {
2264
2265 const auto &Begin = Mask.begin();
2266 const auto &Mid = Mask.begin() + Mask.size() / 2;
2267 const auto &End = Mask.end();
2268 SDValue OriV1 = V1, OriV2 = V2;
2269
2270 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2271 V1 = OriV1;
2272 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2273 V1 = OriV2;
2274 else
2275 return SDValue();
2276
2277 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2278 V2 = OriV1;
2279 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2280 V2 = OriV2;
2281
2282 else
2283 return SDValue();
2284
2285 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2286}
2287
2288/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2289///
2290/// VPICKOD copies the odd elements of each vector into the result vector.
2291///
2292/// It is possible to lower into VPICKOD when the mask consists of two of the
2293/// following forms concatenated:
2294/// <1, 3, 5, ...>
2295/// <n+1, n+3, n+5, ...>
2296/// where n is the number of elements in the vector.
2297/// For example:
2298/// <1, 3, 5, ..., 1, 3, 5, ...>
2299/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2300///
2301/// When undef's appear in the mask they are treated as if they were whatever
2302/// value is necessary in order to fit the above forms.
2304 MVT VT, SDValue V1, SDValue V2,
2305 SelectionDAG &DAG) {
2306
2307 const auto &Begin = Mask.begin();
2308 const auto &Mid = Mask.begin() + Mask.size() / 2;
2309 const auto &End = Mask.end();
2310 SDValue OriV1 = V1, OriV2 = V2;
2311
2312 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2313 V1 = OriV1;
2314 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2315 V1 = OriV2;
2316 else
2317 return SDValue();
2318
2319 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2320 V2 = OriV1;
2321 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2322 V2 = OriV2;
2323 else
2324 return SDValue();
2325
2326 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2327}
2328
2329/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2330///
2331/// VEXTRINS copies one element of a vector into any place of the result
2332/// vector and makes no change to the rest elements of the result vector.
2333///
2334/// It is possible to lower into VEXTRINS when the mask takes the form:
2335/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2336/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2337/// where n is the number of elements in the vector and i is in [0, n).
2338/// For example:
2339/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2340/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2341///
2342/// When undef's appear in the mask they are treated as if they were whatever
2343/// value is necessary in order to fit the above forms.
2344static SDValue
2346 SDValue V1, SDValue V2, SelectionDAG &DAG,
2347 const LoongArchSubtarget &Subtarget) {
2348 unsigned NumElts = VT.getVectorNumElements();
2349 MVT EltVT = VT.getVectorElementType();
2350 MVT GRLenVT = Subtarget.getGRLenVT();
2351
2352 if (Mask.size() != NumElts)
2353 return SDValue();
2354
2355 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2356 int DiffCount = 0;
2357 int DiffPos = -1;
2358 for (unsigned i = 0; i < NumElts; ++i) {
2359 if (Mask[i] == -1)
2360 continue;
2361 if (Mask[i] != int(Base + i)) {
2362 ++DiffCount;
2363 DiffPos = int(i);
2364 if (DiffCount > 1)
2365 return SDValue();
2366 }
2367 }
2368
2369 // Need exactly one differing element to lower into VEXTRINS.
2370 if (DiffCount != 1)
2371 return SDValue();
2372
2373 // DiffMask must be in [0, 2N).
2374 int DiffMask = Mask[DiffPos];
2375 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2376 return SDValue();
2377
2378 // Determine source vector and source index.
2379 SDValue SrcVec;
2380 unsigned SrcIdx;
2381 if (unsigned(DiffMask) < NumElts) {
2382 SrcVec = V1;
2383 SrcIdx = unsigned(DiffMask);
2384 } else {
2385 SrcVec = V2;
2386 SrcIdx = unsigned(DiffMask) - NumElts;
2387 }
2388
2389 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2390 // patterns of VEXTRINS in tablegen.
2391 SDValue Extracted = DAG.getNode(
2392 ISD::EXTRACT_VECTOR_ELT, DL, EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2393 SrcVec, DAG.getConstant(SrcIdx, DL, GRLenVT));
2394 SDValue Result =
2395 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, (Base == 0) ? V1 : V2,
2396 Extracted, DAG.getConstant(DiffPos, DL, GRLenVT));
2397
2398 return Result;
2399 };
2400
2401 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2402 if (SDValue Result = tryLowerToExtrAndIns(0))
2403 return Result;
2404 return tryLowerToExtrAndIns(NumElts);
2405}
2406
2407/// Lower VECTOR_SHUFFLE into VSHUF.
2408///
2409/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2410/// adding it as an operand to the resulting VSHUF.
2412 MVT VT, SDValue V1, SDValue V2,
2413 SelectionDAG &DAG,
2414 const LoongArchSubtarget &Subtarget) {
2415
2417 for (auto M : Mask)
2418 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2419
2420 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2421 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2422
2423 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2424 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2425 // VSHF concatenates the vectors in a bitwise fashion:
2426 // <0b00, 0b01> + <0b10, 0b11> ->
2427 // 0b0100 + 0b1110 -> 0b01001110
2428 // <0b10, 0b11, 0b00, 0b01>
2429 // We must therefore swap the operands to get the correct result.
2430 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2431}
2432
2433/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2434///
2435/// This routine breaks down the specific type of 128-bit shuffle and
2436/// dispatches to the lowering routines accordingly.
2438 SDValue V1, SDValue V2, SelectionDAG &DAG,
2439 const LoongArchSubtarget &Subtarget) {
2440 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2441 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2442 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2443 "Vector type is unsupported for lsx!");
2445 "Two operands have different types!");
2446 assert(VT.getVectorNumElements() == Mask.size() &&
2447 "Unexpected mask size for shuffle!");
2448 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2449
2450 APInt KnownUndef, KnownZero;
2451 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2452 APInt Zeroable = KnownUndef | KnownZero;
2453
2454 SDValue Result;
2455 // TODO: Add more comparison patterns.
2456 if (V2.isUndef()) {
2457 if ((Result =
2458 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2459 return Result;
2460 if ((Result =
2461 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2462 return Result;
2463 if ((Result =
2464 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2465 return Result;
2466
2467 // TODO: This comment may be enabled in the future to better match the
2468 // pattern for instruction selection.
2469 /* V2 = V1; */
2470 }
2471
2472 // It is recommended not to change the pattern comparison order for better
2473 // performance.
2474 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2475 return Result;
2476 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2477 return Result;
2478 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2479 return Result;
2480 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2481 return Result;
2482 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2483 return Result;
2484 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2485 return Result;
2486 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2487 (Result =
2488 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2489 return Result;
2490 if ((Result =
2491 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2492 return Result;
2493 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2494 Zeroable)))
2495 return Result;
2496 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2497 Zeroable)))
2498 return Result;
2499 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2500 Subtarget)))
2501 return Result;
2502 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2503 return NewShuffle;
2504 if ((Result =
2505 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2506 return Result;
2507 return SDValue();
2508}
2509
2510/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2511///
2512/// It is a XVREPLVEI when the mask is:
2513/// <x, x, x, ..., x+n, x+n, x+n, ...>
2514/// where the number of x is equal to n and n is half the length of vector.
2515///
2516/// When undef's appear in the mask they are treated as if they were whatever
2517/// value is necessary in order to fit the above form.
2518static SDValue
2520 SDValue V1, SelectionDAG &DAG,
2521 const LoongArchSubtarget &Subtarget) {
2522 int SplatIndex = -1;
2523 for (const auto &M : Mask) {
2524 if (M != -1) {
2525 SplatIndex = M;
2526 break;
2527 }
2528 }
2529
2530 if (SplatIndex == -1)
2531 return DAG.getUNDEF(VT);
2532
2533 const auto &Begin = Mask.begin();
2534 const auto &End = Mask.end();
2535 int HalfSize = Mask.size() / 2;
2536
2537 if (SplatIndex >= HalfSize)
2538 return SDValue();
2539
2540 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2541 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2542 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2543 0)) {
2544 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2545 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2546 }
2547
2548 return SDValue();
2549}
2550
2551/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2552static SDValue
2554 SDValue V1, SDValue V2, SelectionDAG &DAG,
2555 const LoongArchSubtarget &Subtarget) {
2556 // XVSHUF4I_D must be handled separately because it is different from other
2557 // types of [X]VSHUF4I instructions.
2558 if (Mask.size() == 4) {
2559 unsigned MaskImm = 0;
2560 for (int i = 1; i >= 0; --i) {
2561 int MLo = Mask[i];
2562 int MHi = Mask[i + 2];
2563 if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) ||
2564 !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7)))
2565 return SDValue();
2566 if (MHi != -1 && MLo != -1 && MHi != MLo + 2)
2567 return SDValue();
2568
2569 MaskImm <<= 2;
2570 if (MLo != -1)
2571 MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3;
2572 else if (MHi != -1)
2573 MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3;
2574 }
2575
2576 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2577 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2578 }
2579
2580 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2581}
2582
2583/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2584static SDValue
2586 SDValue V1, SelectionDAG &DAG,
2587 const LoongArchSubtarget &Subtarget) {
2588 // Only consider XVPERMI_D.
2589 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2590 return SDValue();
2591
2592 unsigned MaskImm = 0;
2593 for (unsigned i = 0; i < Mask.size(); ++i) {
2594 if (Mask[i] == -1)
2595 continue;
2596 MaskImm |= Mask[i] << (i * 2);
2597 }
2598
2599 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2600 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2601}
2602
2603/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2605 MVT VT, SDValue V1, SelectionDAG &DAG,
2606 const LoongArchSubtarget &Subtarget) {
2607 // LoongArch LASX only have XVPERM_W.
2608 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2609 return SDValue();
2610
2611 unsigned NumElts = VT.getVectorNumElements();
2612 unsigned HalfSize = NumElts / 2;
2613 bool FrontLo = true, FrontHi = true;
2614 bool BackLo = true, BackHi = true;
2615
2616 auto inRange = [](int val, int low, int high) {
2617 return (val == -1) || (val >= low && val < high);
2618 };
2619
2620 for (unsigned i = 0; i < HalfSize; ++i) {
2621 int Fronti = Mask[i];
2622 int Backi = Mask[i + HalfSize];
2623
2624 FrontLo &= inRange(Fronti, 0, HalfSize);
2625 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2626 BackLo &= inRange(Backi, 0, HalfSize);
2627 BackHi &= inRange(Backi, HalfSize, NumElts);
2628 }
2629
2630 // If both the lower and upper 128-bit parts access only one half of the
2631 // vector (either lower or upper), avoid using xvperm.w. The latency of
2632 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2633 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2634 return SDValue();
2635
2637 MVT GRLenVT = Subtarget.getGRLenVT();
2638 for (unsigned i = 0; i < NumElts; ++i)
2639 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2640 : DAG.getConstant(Mask[i], DL, GRLenVT));
2641 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2642
2643 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2644}
2645
2646/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2648 MVT VT, SDValue V1, SDValue V2,
2649 SelectionDAG &DAG) {
2650 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2651}
2652
2653/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2655 MVT VT, SDValue V1, SDValue V2,
2656 SelectionDAG &DAG) {
2657 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2658}
2659
2660/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2662 MVT VT, SDValue V1, SDValue V2,
2663 SelectionDAG &DAG) {
2664
2665 const auto &Begin = Mask.begin();
2666 const auto &End = Mask.end();
2667 unsigned HalfSize = Mask.size() / 2;
2668 unsigned LeftSize = HalfSize / 2;
2669 SDValue OriV1 = V1, OriV2 = V2;
2670
2671 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2672 1) &&
2673 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2674 V1 = OriV1;
2675 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2676 Mask.size() + HalfSize - LeftSize, 1) &&
2677 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2678 Mask.size() + HalfSize + LeftSize, 1))
2679 V1 = OriV2;
2680 else
2681 return SDValue();
2682
2683 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2684 1) &&
2685 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2686 1))
2687 V2 = OriV1;
2688 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2689 Mask.size() + HalfSize - LeftSize, 1) &&
2690 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2691 Mask.size() + HalfSize + LeftSize, 1))
2692 V2 = OriV2;
2693 else
2694 return SDValue();
2695
2696 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2697}
2698
2699/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2701 MVT VT, SDValue V1, SDValue V2,
2702 SelectionDAG &DAG) {
2703
2704 const auto &Begin = Mask.begin();
2705 const auto &End = Mask.end();
2706 unsigned HalfSize = Mask.size() / 2;
2707 SDValue OriV1 = V1, OriV2 = V2;
2708
2709 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2710 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2711 V1 = OriV1;
2712 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2713 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2714 Mask.size() + HalfSize, 1))
2715 V1 = OriV2;
2716 else
2717 return SDValue();
2718
2719 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2720 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2721 V2 = OriV1;
2722 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2723 1) &&
2724 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2725 Mask.size() + HalfSize, 1))
2726 V2 = OriV2;
2727 else
2728 return SDValue();
2729
2730 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2731}
2732
2733/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2735 MVT VT, SDValue V1, SDValue V2,
2736 SelectionDAG &DAG) {
2737
2738 const auto &Begin = Mask.begin();
2739 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2740 const auto &Mid = Mask.begin() + Mask.size() / 2;
2741 const auto &RightMid = Mask.end() - Mask.size() / 4;
2742 const auto &End = Mask.end();
2743 unsigned HalfSize = Mask.size() / 2;
2744 SDValue OriV1 = V1, OriV2 = V2;
2745
2746 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2747 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2748 V1 = OriV1;
2749 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2750 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2751 V1 = OriV2;
2752 else
2753 return SDValue();
2754
2755 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2756 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2757 V2 = OriV1;
2758 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2759 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2760 V2 = OriV2;
2761
2762 else
2763 return SDValue();
2764
2765 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2766}
2767
2768/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2770 MVT VT, SDValue V1, SDValue V2,
2771 SelectionDAG &DAG) {
2772
2773 const auto &Begin = Mask.begin();
2774 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2775 const auto &Mid = Mask.begin() + Mask.size() / 2;
2776 const auto &RightMid = Mask.end() - Mask.size() / 4;
2777 const auto &End = Mask.end();
2778 unsigned HalfSize = Mask.size() / 2;
2779 SDValue OriV1 = V1, OriV2 = V2;
2780
2781 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2782 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2783 V1 = OriV1;
2784 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2785 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2786 2))
2787 V1 = OriV2;
2788 else
2789 return SDValue();
2790
2791 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2792 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2793 V2 = OriV1;
2794 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2795 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2796 2))
2797 V2 = OriV2;
2798 else
2799 return SDValue();
2800
2801 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2802}
2803
2804/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
2805static SDValue
2807 SDValue V1, SDValue V2, SelectionDAG &DAG,
2808 const LoongArchSubtarget &Subtarget) {
2809 int NumElts = VT.getVectorNumElements();
2810 int HalfSize = NumElts / 2;
2811 MVT EltVT = VT.getVectorElementType();
2812 MVT GRLenVT = Subtarget.getGRLenVT();
2813
2814 if ((int)Mask.size() != NumElts)
2815 return SDValue();
2816
2817 auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
2818 SmallVector<int> DiffPos;
2819 for (int i = 0; i < NumElts; ++i) {
2820 if (Mask[i] == -1)
2821 continue;
2822 if (Mask[i] != Base + i) {
2823 DiffPos.push_back(i);
2824 if (DiffPos.size() > 2)
2825 return SDValue();
2826 }
2827 }
2828
2829 // Need exactly two differing element to lower into XVEXTRINS.
2830 // If only one differing element, the element at a distance of
2831 // HalfSize from it must be undef.
2832 if (DiffPos.size() == 1) {
2833 if (DiffPos[0] < HalfSize && Mask[DiffPos[0] + HalfSize] == -1)
2834 DiffPos.push_back(DiffPos[0] + HalfSize);
2835 else if (DiffPos[0] >= HalfSize && Mask[DiffPos[0] - HalfSize] == -1)
2836 DiffPos.insert(DiffPos.begin(), DiffPos[0] - HalfSize);
2837 else
2838 return SDValue();
2839 }
2840 if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + HalfSize)
2841 return SDValue();
2842
2843 // DiffMask must be in its low or high part.
2844 int DiffMaskLo = Mask[DiffPos[0]];
2845 int DiffMaskHi = Mask[DiffPos[1]];
2846 DiffMaskLo = DiffMaskLo == -1 ? DiffMaskHi - HalfSize : DiffMaskLo;
2847 DiffMaskHi = DiffMaskHi == -1 ? DiffMaskLo + HalfSize : DiffMaskHi;
2848 if (!(DiffMaskLo >= 0 && DiffMaskLo < HalfSize) &&
2849 !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + HalfSize))
2850 return SDValue();
2851 if (!(DiffMaskHi >= HalfSize && DiffMaskHi < NumElts) &&
2852 !(DiffMaskHi >= NumElts + HalfSize && DiffMaskHi < 2 * NumElts))
2853 return SDValue();
2854 if (DiffMaskHi != DiffMaskLo + HalfSize)
2855 return SDValue();
2856
2857 // Determine source vector and source index.
2858 SDValue SrcVec = (DiffMaskLo < HalfSize) ? V1 : V2;
2859 int SrcIdxLo =
2860 (DiffMaskLo < HalfSize) ? DiffMaskLo : (DiffMaskLo - NumElts);
2861 bool IsEltFP = EltVT.isFloatingPoint();
2862
2863 // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
2864 // the patterns of XVEXTRINS in tablegen.
2865 SDValue BaseVec = (Base == 0) ? V1 : V2;
2866 SDValue EltLo =
2867 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
2868 SrcVec, DAG.getConstant(SrcIdxLo, DL, GRLenVT));
2869 SDValue InsLo = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, BaseVec, EltLo,
2870 DAG.getConstant(DiffPos[0], DL, GRLenVT));
2871 SDValue EltHi =
2872 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
2873 SrcVec, DAG.getConstant(SrcIdxLo + HalfSize, DL, GRLenVT));
2874 SDValue Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsLo, EltHi,
2875 DAG.getConstant(DiffPos[1], DL, GRLenVT));
2876
2877 return Result;
2878 };
2879
2880 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2881 if (SDValue Result = tryLowerToExtrAndIns(0))
2882 return Result;
2883 return tryLowerToExtrAndIns(NumElts);
2884}
2885
2886/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2887static SDValue
2889 SDValue V1, SDValue V2, SelectionDAG &DAG,
2890 const LoongArchSubtarget &Subtarget) {
2891 // LoongArch LASX only supports xvinsve0.{w/d}.
2892 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2893 VT != MVT::v4f64)
2894 return SDValue();
2895
2896 MVT GRLenVT = Subtarget.getGRLenVT();
2897 int MaskSize = Mask.size();
2898 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2899
2900 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2901 // all other elements are either 'Base + i' or undef (-1). On success, return
2902 // the index of the replaced element. Otherwise, just return -1.
2903 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2904 int Idx = -1;
2905 for (int i = 0; i < MaskSize; ++i) {
2906 if (Mask[i] == Base + i || Mask[i] == -1)
2907 continue;
2908 if (Mask[i] != Replaced)
2909 return -1;
2910 if (Idx == -1)
2911 Idx = i;
2912 else
2913 return -1;
2914 }
2915 return Idx;
2916 };
2917
2918 // Case 1: the lowest element of V2 replaces one element in V1.
2919 int Idx = checkReplaceOne(0, MaskSize);
2920 if (Idx != -1)
2921 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2922 DAG.getConstant(Idx, DL, GRLenVT));
2923
2924 // Case 2: the lowest element of V1 replaces one element in V2.
2925 Idx = checkReplaceOne(MaskSize, 0);
2926 if (Idx != -1)
2927 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2928 DAG.getConstant(Idx, DL, GRLenVT));
2929
2930 return SDValue();
2931}
2932
2933/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2935 MVT VT, SDValue V1, SDValue V2,
2936 SelectionDAG &DAG) {
2937
2938 int MaskSize = Mask.size();
2939 int HalfSize = Mask.size() / 2;
2940 const auto &Begin = Mask.begin();
2941 const auto &Mid = Mask.begin() + HalfSize;
2942 const auto &End = Mask.end();
2943
2944 // VECTOR_SHUFFLE concatenates the vectors:
2945 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2946 // shuffling ->
2947 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2948 //
2949 // XVSHUF concatenates the vectors:
2950 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2951 // shuffling ->
2952 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2953 SmallVector<SDValue, 8> MaskAlloc;
2954 for (auto it = Begin; it < Mid; it++) {
2955 if (*it < 0) // UNDEF
2956 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2957 else if ((*it >= 0 && *it < HalfSize) ||
2958 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2959 int M = *it < HalfSize ? *it : *it - HalfSize;
2960 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2961 } else
2962 return SDValue();
2963 }
2964 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2965
2966 for (auto it = Mid; it < End; it++) {
2967 if (*it < 0) // UNDEF
2968 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2969 else if ((*it >= HalfSize && *it < MaskSize) ||
2970 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2971 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2972 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2973 } else
2974 return SDValue();
2975 }
2976 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2977
2978 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2979 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2980 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2981}
2982
2983/// Shuffle vectors by lane to generate more optimized instructions.
2984/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2985///
2986/// Therefore, except for the following four cases, other cases are regarded
2987/// as cross-lane shuffles, where optimization is relatively limited.
2988///
2989/// - Shuffle high, low lanes of two inputs vector
2990/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2991/// - Shuffle low, high lanes of two inputs vector
2992/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2993/// - Shuffle low, low lanes of two inputs vector
2994/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2995/// - Shuffle high, high lanes of two inputs vector
2996/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2997///
2998/// The first case is the closest to LoongArch instructions and the other
2999/// cases need to be converted to it for processing.
3000///
3001/// This function will return true for the last three cases above and will
3002/// modify V1, V2 and Mask. Otherwise, return false for the first case and
3003/// cross-lane shuffle cases.
3005 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
3006 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
3007
3008 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
3009
3010 int MaskSize = Mask.size();
3011 int HalfSize = Mask.size() / 2;
3012 MVT GRLenVT = Subtarget.getGRLenVT();
3013
3014 HalfMaskType preMask = None, postMask = None;
3015
3016 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3017 return M < 0 || (M >= 0 && M < HalfSize) ||
3018 (M >= MaskSize && M < MaskSize + HalfSize);
3019 }))
3020 preMask = HighLaneTy;
3021 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3022 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3023 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3024 }))
3025 preMask = LowLaneTy;
3026
3027 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3028 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3029 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3030 }))
3031 postMask = LowLaneTy;
3032 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3033 return M < 0 || (M >= 0 && M < HalfSize) ||
3034 (M >= MaskSize && M < MaskSize + HalfSize);
3035 }))
3036 postMask = HighLaneTy;
3037
3038 // The pre-half of mask is high lane type, and the post-half of mask
3039 // is low lane type, which is closest to the LoongArch instructions.
3040 //
3041 // Note: In the LoongArch architecture, the high lane of mask corresponds
3042 // to the lower 128-bit of vector register, and the low lane of mask
3043 // corresponds the higher 128-bit of vector register.
3044 if (preMask == HighLaneTy && postMask == LowLaneTy) {
3045 return false;
3046 }
3047 if (preMask == LowLaneTy && postMask == HighLaneTy) {
3048 V1 = DAG.getBitcast(MVT::v4i64, V1);
3049 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3050 DAG.getConstant(0b01001110, DL, GRLenVT));
3051 V1 = DAG.getBitcast(VT, V1);
3052
3053 if (!V2.isUndef()) {
3054 V2 = DAG.getBitcast(MVT::v4i64, V2);
3055 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3056 DAG.getConstant(0b01001110, DL, GRLenVT));
3057 V2 = DAG.getBitcast(VT, V2);
3058 }
3059
3060 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3061 *it = *it < 0 ? *it : *it - HalfSize;
3062 }
3063 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3064 *it = *it < 0 ? *it : *it + HalfSize;
3065 }
3066 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
3067 V1 = DAG.getBitcast(MVT::v4i64, V1);
3068 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3069 DAG.getConstant(0b11101110, DL, GRLenVT));
3070 V1 = DAG.getBitcast(VT, V1);
3071
3072 if (!V2.isUndef()) {
3073 V2 = DAG.getBitcast(MVT::v4i64, V2);
3074 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3075 DAG.getConstant(0b11101110, DL, GRLenVT));
3076 V2 = DAG.getBitcast(VT, V2);
3077 }
3078
3079 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3080 *it = *it < 0 ? *it : *it - HalfSize;
3081 }
3082 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
3083 V1 = DAG.getBitcast(MVT::v4i64, V1);
3084 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3085 DAG.getConstant(0b01000100, DL, GRLenVT));
3086 V1 = DAG.getBitcast(VT, V1);
3087
3088 if (!V2.isUndef()) {
3089 V2 = DAG.getBitcast(MVT::v4i64, V2);
3090 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3091 DAG.getConstant(0b01000100, DL, GRLenVT));
3092 V2 = DAG.getBitcast(VT, V2);
3093 }
3094
3095 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3096 *it = *it < 0 ? *it : *it + HalfSize;
3097 }
3098 } else { // cross-lane
3099 return false;
3100 }
3101
3102 return true;
3103}
3104
3105/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
3106/// Only for 256-bit vector.
3107///
3108/// For example:
3109/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
3110/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
3111/// is lowerded to:
3112/// (XVPERMI $xr2, $xr0, 78)
3113/// (XVSHUF $xr1, $xr2, $xr0)
3114/// (XVORI $xr0, $xr1, 0)
3116 ArrayRef<int> Mask,
3117 MVT VT, SDValue V1,
3118 SDValue V2,
3119 SelectionDAG &DAG) {
3120 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
3121 int Size = Mask.size();
3122 int LaneSize = Size / 2;
3123
3124 bool LaneCrossing[2] = {false, false};
3125 for (int i = 0; i < Size; ++i)
3126 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
3127 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
3128
3129 // Ensure that all lanes ared involved.
3130 if (!LaneCrossing[0] && !LaneCrossing[1])
3131 return SDValue();
3132
3133 SmallVector<int> InLaneMask;
3134 InLaneMask.assign(Mask.begin(), Mask.end());
3135 for (int i = 0; i < Size; ++i) {
3136 int &M = InLaneMask[i];
3137 if (M < 0)
3138 continue;
3139 if (((M % Size) / LaneSize) != (i / LaneSize))
3140 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
3141 }
3142
3143 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
3144 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
3145 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
3146 Flipped = DAG.getBitcast(VT, Flipped);
3147 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
3148}
3149
3150/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
3151///
3152/// This routine breaks down the specific type of 256-bit shuffle and
3153/// dispatches to the lowering routines accordingly.
3155 SDValue V1, SDValue V2, SelectionDAG &DAG,
3156 const LoongArchSubtarget &Subtarget) {
3157 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
3158 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
3159 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
3160 "Vector type is unsupported for lasx!");
3162 "Two operands have different types!");
3163 assert(VT.getVectorNumElements() == Mask.size() &&
3164 "Unexpected mask size for shuffle!");
3165 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
3166 assert(Mask.size() >= 4 && "Mask size is less than 4.");
3167
3168 APInt KnownUndef, KnownZero;
3169 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
3170 APInt Zeroable = KnownUndef | KnownZero;
3171
3172 SDValue Result;
3173 // TODO: Add more comparison patterns.
3174 if (V2.isUndef()) {
3175 if ((Result =
3176 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
3177 return Result;
3178 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
3179 Subtarget)))
3180 return Result;
3181 // Try to widen vectors to gain more optimization opportunities.
3182 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
3183 return NewShuffle;
3184 if ((Result =
3185 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
3186 return Result;
3187 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
3188 return Result;
3189 if ((Result =
3190 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
3191 return Result;
3192
3193 // TODO: This comment may be enabled in the future to better match the
3194 // pattern for instruction selection.
3195 /* V2 = V1; */
3196 }
3197
3198 // It is recommended not to change the pattern comparison order for better
3199 // performance.
3200 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
3201 return Result;
3202 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
3203 return Result;
3204 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
3205 return Result;
3206 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
3207 return Result;
3208 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
3209 return Result;
3210 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
3211 return Result;
3212 if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
3213 (Result =
3214 lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3215 return Result;
3216 if ((Result =
3217 lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3218 return Result;
3219 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
3220 Zeroable)))
3221 return Result;
3222 if ((Result =
3223 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3224 return Result;
3225 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
3226 Subtarget)))
3227 return Result;
3228
3229 // canonicalize non cross-lane shuffle vector
3230 SmallVector<int> NewMask(Mask);
3231 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
3232 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
3233
3234 // FIXME: Handling the remaining cases earlier can degrade performance
3235 // in some situations. Further analysis is required to enable more
3236 // effective optimizations.
3237 if (V2.isUndef()) {
3238 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
3239 V1, V2, DAG)))
3240 return Result;
3241 }
3242
3243 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
3244 return NewShuffle;
3245 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
3246 return Result;
3247
3248 return SDValue();
3249}
3250
3251SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3252 SelectionDAG &DAG) const {
3253 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
3254 ArrayRef<int> OrigMask = SVOp->getMask();
3255 SDValue V1 = Op.getOperand(0);
3256 SDValue V2 = Op.getOperand(1);
3257 MVT VT = Op.getSimpleValueType();
3258 int NumElements = VT.getVectorNumElements();
3259 SDLoc DL(Op);
3260
3261 bool V1IsUndef = V1.isUndef();
3262 bool V2IsUndef = V2.isUndef();
3263 if (V1IsUndef && V2IsUndef)
3264 return DAG.getUNDEF(VT);
3265
3266 // When we create a shuffle node we put the UNDEF node to second operand,
3267 // but in some cases the first operand may be transformed to UNDEF.
3268 // In this case we should just commute the node.
3269 if (V1IsUndef)
3270 return DAG.getCommutedVectorShuffle(*SVOp);
3271
3272 // Check for non-undef masks pointing at an undef vector and make the masks
3273 // undef as well. This makes it easier to match the shuffle based solely on
3274 // the mask.
3275 if (V2IsUndef &&
3276 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
3277 SmallVector<int, 8> NewMask(OrigMask);
3278 for (int &M : NewMask)
3279 if (M >= NumElements)
3280 M = -1;
3281 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
3282 }
3283
3284 // Check for illegal shuffle mask element index values.
3285 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3286 (void)MaskUpperLimit;
3287 assert(llvm::all_of(OrigMask,
3288 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3289 "Out of bounds shuffle index");
3290
3291 // For each vector width, delegate to a specialized lowering routine.
3292 if (VT.is128BitVector())
3293 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3294
3295 if (VT.is256BitVector())
3296 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3297
3298 return SDValue();
3299}
3300
3301SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3302 SelectionDAG &DAG) const {
3303 // Custom lower to ensure the libcall return is passed in an FPR on hard
3304 // float ABIs.
3305 SDLoc DL(Op);
3306 MakeLibCallOptions CallOptions;
3307 SDValue Op0 = Op.getOperand(0);
3308 SDValue Chain = SDValue();
3309 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
3310 SDValue Res;
3311 std::tie(Res, Chain) =
3312 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
3313 if (Subtarget.is64Bit())
3314 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3315 return DAG.getBitcast(MVT::i32, Res);
3316}
3317
3318SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3319 SelectionDAG &DAG) const {
3320 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3321 // float ABIs.
3322 SDLoc DL(Op);
3323 MakeLibCallOptions CallOptions;
3324 SDValue Op0 = Op.getOperand(0);
3325 SDValue Chain = SDValue();
3326 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3327 DL, MVT::f32, Op0)
3328 : DAG.getBitcast(MVT::f32, Op0);
3329 SDValue Res;
3330 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
3331 CallOptions, DL, Chain);
3332 return Res;
3333}
3334
3335SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3336 SelectionDAG &DAG) const {
3337 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3338 SDLoc DL(Op);
3339 MakeLibCallOptions CallOptions;
3340 RTLIB::Libcall LC =
3341 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
3342 SDValue Res =
3343 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
3344 if (Subtarget.is64Bit())
3345 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3346 return DAG.getBitcast(MVT::i32, Res);
3347}
3348
3349SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3350 SelectionDAG &DAG) const {
3351 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3352 MVT VT = Op.getSimpleValueType();
3353 SDLoc DL(Op);
3354 Op = DAG.getNode(
3355 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
3356 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3357 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3358 DL, MVT::f32, Op)
3359 : DAG.getBitcast(MVT::f32, Op);
3360 if (VT != MVT::f32)
3361 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3362 return Res;
3363}
3364
3365// Lower BUILD_VECTOR as broadcast load (if possible).
3366// For example:
3367// %a = load i8, ptr %ptr
3368// %b = build_vector %a, %a, %a, %a
3369// is lowered to :
3370// (VLDREPL_B $a0, 0)
3372 const SDLoc &DL,
3373 SelectionDAG &DAG) {
3374 MVT VT = BVOp->getSimpleValueType(0);
3375 int NumOps = BVOp->getNumOperands();
3376
3377 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3378 "Unsupported vector type for broadcast.");
3379
3380 SDValue IdentitySrc;
3381 bool IsIdeneity = true;
3382
3383 for (int i = 0; i != NumOps; i++) {
3384 SDValue Op = BVOp->getOperand(i);
3385 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3386 IsIdeneity = false;
3387 break;
3388 }
3389 IdentitySrc = BVOp->getOperand(0);
3390 }
3391
3392 // make sure that this load is valid and only has one user.
3393 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3394 return SDValue();
3395
3396 auto *LN = cast<LoadSDNode>(IdentitySrc);
3397 auto ExtType = LN->getExtensionType();
3398
3399 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3400 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3401 // Indexed loads and stores are not supported on LoongArch.
3402 assert(LN->isUnindexed() && "Unexpected indexed load.");
3403
3404 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3405 // The offset operand of unindexed load is always undefined, so there is
3406 // no need to pass it to VLDREPL.
3407 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3408 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3409 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3410 return BCast;
3411 }
3412 return SDValue();
3413}
3414
3415// Sequentially insert elements from Ops into Vector, from low to high indices.
3416// Note: Ops can have fewer elements than Vector.
3418 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3419 EVT ResTy) {
3420 assert(Ops.size() <= ResTy.getVectorNumElements());
3421
3422 SDValue Op0 = Ops[0];
3423 if (!Op0.isUndef())
3424 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3425 for (unsigned i = 1; i < Ops.size(); ++i) {
3426 SDValue Opi = Ops[i];
3427 if (Opi.isUndef())
3428 continue;
3429 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3430 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3431 }
3432}
3433
3434// Build a ResTy subvector from Node, taking NumElts elements starting at index
3435// 'first'.
3437 SelectionDAG &DAG, SDLoc DL,
3438 const LoongArchSubtarget &Subtarget,
3439 EVT ResTy, unsigned first) {
3440 unsigned NumElts = ResTy.getVectorNumElements();
3441
3442 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3443
3444 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3445 Node->op_begin() + first + NumElts);
3446 SDValue Vector = DAG.getUNDEF(ResTy);
3447 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3448 return Vector;
3449}
3450
3451SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3452 SelectionDAG &DAG) const {
3453 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3454 MVT VT = Node->getSimpleValueType(0);
3455 EVT ResTy = Op->getValueType(0);
3456 unsigned NumElts = ResTy.getVectorNumElements();
3457 SDLoc DL(Op);
3458 APInt SplatValue, SplatUndef;
3459 unsigned SplatBitSize;
3460 bool HasAnyUndefs;
3461 bool IsConstant = false;
3462 bool UseSameConstant = true;
3463 SDValue ConstantValue;
3464 bool Is128Vec = ResTy.is128BitVector();
3465 bool Is256Vec = ResTy.is256BitVector();
3466
3467 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3468 (!Subtarget.hasExtLASX() || !Is256Vec))
3469 return SDValue();
3470
3471 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3472 return Result;
3473
3474 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3475 /*MinSplatBits=*/8) &&
3476 SplatBitSize <= 64) {
3477 // We can only cope with 8, 16, 32, or 64-bit elements.
3478 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3479 SplatBitSize != 64)
3480 return SDValue();
3481
3482 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3483 // We can only handle 64-bit elements that are within
3484 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3485 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3486 if (!SplatValue.isSignedIntN(10) &&
3487 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3488 return SDValue();
3489 if ((Is128Vec && ResTy == MVT::v4i32) ||
3490 (Is256Vec && ResTy == MVT::v8i32))
3491 return Op;
3492 }
3493
3494 EVT ViaVecTy;
3495
3496 switch (SplatBitSize) {
3497 default:
3498 return SDValue();
3499 case 8:
3500 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3501 break;
3502 case 16:
3503 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3504 break;
3505 case 32:
3506 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3507 break;
3508 case 64:
3509 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3510 break;
3511 }
3512
3513 // SelectionDAG::getConstant will promote SplatValue appropriately.
3514 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3515
3516 // Bitcast to the type we originally wanted.
3517 if (ViaVecTy != ResTy)
3518 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3519
3520 return Result;
3521 }
3522
3523 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3524 return Op;
3525
3526 for (unsigned i = 0; i < NumElts; ++i) {
3527 SDValue Opi = Node->getOperand(i);
3528 if (isIntOrFPConstant(Opi)) {
3529 IsConstant = true;
3530 if (!ConstantValue.getNode())
3531 ConstantValue = Opi;
3532 else if (ConstantValue != Opi)
3533 UseSameConstant = false;
3534 }
3535 }
3536
3537 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3538 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3539 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3540 for (unsigned i = 0; i < NumElts; ++i) {
3541 SDValue Opi = Node->getOperand(i);
3542 if (!isIntOrFPConstant(Opi))
3543 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3544 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3545 }
3546 return Result;
3547 }
3548
3549 if (!IsConstant) {
3550 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3551 // the sub-sequence of the vector and then broadcast the sub-sequence.
3552 //
3553 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3554 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3555 // generates worse code in some cases. This could be further optimized
3556 // with more consideration.
3558 BitVector UndefElements;
3559 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3560 UndefElements.count() == 0) {
3561 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3562 // because the high part can be simply treated as undef.
3563 SDValue Vector = DAG.getUNDEF(ResTy);
3564 EVT FillTy = Is256Vec
3566 : ResTy;
3567 SDValue FillVec =
3568 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3569
3570 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3571
3572 unsigned SeqLen = Sequence.size();
3573 unsigned SplatLen = NumElts / SeqLen;
3574 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3575 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3576
3577 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3578 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3579 if (SplatEltTy == MVT::i128)
3580 SplatTy = MVT::v4i64;
3581
3582 SDValue SplatVec;
3583 SDValue SrcVec = DAG.getBitcast(
3584 SplatTy,
3585 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3586 if (Is256Vec) {
3587 SplatVec =
3588 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3589 : LoongArchISD::XVREPLVE0,
3590 DL, SplatTy, SrcVec);
3591 } else {
3592 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3593 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3594 }
3595
3596 return DAG.getBitcast(ResTy, SplatVec);
3597 }
3598
3599 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3600 // using memory operations is much lower.
3601 //
3602 // For 256-bit vectors, normally split into two halves and concatenate.
3603 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3604 // one non-undef element, skip spliting to avoid a worse result.
3605 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3606 ResTy == MVT::v4f64) {
3607 unsigned NonUndefCount = 0;
3608 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3609 if (!Node->getOperand(i).isUndef()) {
3610 ++NonUndefCount;
3611 if (NonUndefCount > 1)
3612 break;
3613 }
3614 }
3615 if (NonUndefCount == 1)
3616 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3617 }
3618
3619 EVT VecTy =
3620 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3621 SDValue Vector =
3622 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3623
3624 if (Is128Vec)
3625 return Vector;
3626
3627 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3628 VecTy, NumElts / 2);
3629
3630 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3631 }
3632
3633 return SDValue();
3634}
3635
3636SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3637 SelectionDAG &DAG) const {
3638 SDLoc DL(Op);
3639 MVT ResVT = Op.getSimpleValueType();
3640 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3641
3642 unsigned NumOperands = Op.getNumOperands();
3643 unsigned NumFreezeUndef = 0;
3644 unsigned NumZero = 0;
3645 unsigned NumNonZero = 0;
3646 unsigned NonZeros = 0;
3647 SmallSet<SDValue, 4> Undefs;
3648 for (unsigned i = 0; i != NumOperands; ++i) {
3649 SDValue SubVec = Op.getOperand(i);
3650 if (SubVec.isUndef())
3651 continue;
3652 if (ISD::isFreezeUndef(SubVec.getNode())) {
3653 // If the freeze(undef) has multiple uses then we must fold to zero.
3654 if (SubVec.hasOneUse()) {
3655 ++NumFreezeUndef;
3656 } else {
3657 ++NumZero;
3658 Undefs.insert(SubVec);
3659 }
3660 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3661 ++NumZero;
3662 else {
3663 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3664 NonZeros |= 1 << i;
3665 ++NumNonZero;
3666 }
3667 }
3668
3669 // If we have more than 2 non-zeros, build each half separately.
3670 if (NumNonZero > 2) {
3671 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3672 ArrayRef<SDUse> Ops = Op->ops();
3673 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3674 Ops.slice(0, NumOperands / 2));
3675 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3676 Ops.slice(NumOperands / 2));
3677 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3678 }
3679
3680 // Otherwise, build it up through insert_subvectors.
3681 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3682 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3683 : DAG.getUNDEF(ResVT));
3684
3685 // Replace Undef operands with ZeroVector.
3686 for (SDValue U : Undefs)
3687 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3688
3689 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3690 unsigned NumSubElems = SubVT.getVectorNumElements();
3691 for (unsigned i = 0; i != NumOperands; ++i) {
3692 if ((NonZeros & (1 << i)) == 0)
3693 continue;
3694
3695 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3696 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3697 }
3698
3699 return Vec;
3700}
3701
3702SDValue
3703LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3704 SelectionDAG &DAG) const {
3705 MVT EltVT = Op.getSimpleValueType();
3706 SDValue Vec = Op->getOperand(0);
3707 EVT VecTy = Vec->getValueType(0);
3708 SDValue Idx = Op->getOperand(1);
3709 SDLoc DL(Op);
3710 MVT GRLenVT = Subtarget.getGRLenVT();
3711
3712 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3713
3714 if (isa<ConstantSDNode>(Idx))
3715 return Op;
3716
3717 switch (VecTy.getSimpleVT().SimpleTy) {
3718 default:
3719 llvm_unreachable("Unexpected type");
3720 case MVT::v32i8:
3721 case MVT::v16i16:
3722 case MVT::v4i64:
3723 case MVT::v4f64: {
3724 // Extract the high half subvector and place it to the low half of a new
3725 // vector. It doesn't matter what the high half of the new vector is.
3726 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3727 SDValue VecHi =
3728 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3729 SDValue TmpVec =
3730 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3731 VecHi, DAG.getConstant(0, DL, GRLenVT));
3732
3733 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3734 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3735 // desired element.
3736 SDValue IdxCp =
3737 Subtarget.is64Bit()
3738 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3739 : DAG.getBitcast(MVT::f32, Idx);
3740 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3741 SDValue MaskVec =
3742 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3743 SDValue ResVec =
3744 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3745
3746 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3747 DAG.getConstant(0, DL, GRLenVT));
3748 }
3749 case MVT::v8i32:
3750 case MVT::v8f32: {
3751 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3752 SDValue SplatValue =
3753 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3754
3755 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3756 DAG.getConstant(0, DL, GRLenVT));
3757 }
3758 }
3759}
3760
3761SDValue
3762LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3763 SelectionDAG &DAG) const {
3764 MVT VT = Op.getSimpleValueType();
3765 MVT EltVT = VT.getVectorElementType();
3766 unsigned NumElts = VT.getVectorNumElements();
3767 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3768 SDLoc DL(Op);
3769 SDValue Op0 = Op.getOperand(0);
3770 SDValue Op1 = Op.getOperand(1);
3771 SDValue Op2 = Op.getOperand(2);
3772
3773 if (isa<ConstantSDNode>(Op2))
3774 return Op;
3775
3776 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3777 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3778
3779 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3780 return SDValue();
3781
3782 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3783 SmallVector<SDValue, 32> RawIndices;
3784 SDValue SplatIdx;
3785 SDValue Indices;
3786
3787 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3788 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3789 for (unsigned i = 0; i < NumElts; ++i) {
3790 RawIndices.push_back(Op2);
3791 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3792 }
3793 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3794 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3795
3796 RawIndices.clear();
3797 for (unsigned i = 0; i < NumElts; ++i) {
3798 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3799 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3800 }
3801 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3802 Indices = DAG.getBitcast(IdxVTy, Indices);
3803 } else {
3804 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3805
3806 for (unsigned i = 0; i < NumElts; ++i)
3807 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3808 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3809 }
3810
3811 // insert vec, elt, idx
3812 // =>
3813 // select (splatidx == {0,1,2...}) ? splatelt : vec
3814 SDValue SelectCC =
3815 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3816 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3817}
3818
3819SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3820 SelectionDAG &DAG) const {
3821 SDLoc DL(Op);
3822 SyncScope::ID FenceSSID =
3823 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3824
3825 // singlethread fences only synchronize with signal handlers on the same
3826 // thread and thus only need to preserve instruction order, not actually
3827 // enforce memory ordering.
3828 if (FenceSSID == SyncScope::SingleThread)
3829 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3830 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3831
3832 return Op;
3833}
3834
3835SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3836 SelectionDAG &DAG) const {
3837
3838 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3839 DAG.getContext()->emitError(
3840 "On LA64, only 64-bit registers can be written.");
3841 return Op.getOperand(0);
3842 }
3843
3844 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3845 DAG.getContext()->emitError(
3846 "On LA32, only 32-bit registers can be written.");
3847 return Op.getOperand(0);
3848 }
3849
3850 return Op;
3851}
3852
3853SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3854 SelectionDAG &DAG) const {
3855 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3856 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3857 "be a constant integer");
3858 return SDValue();
3859 }
3860
3861 MachineFunction &MF = DAG.getMachineFunction();
3863 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3864 EVT VT = Op.getValueType();
3865 SDLoc DL(Op);
3866 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3867 unsigned Depth = Op.getConstantOperandVal(0);
3868 int GRLenInBytes = Subtarget.getGRLen() / 8;
3869
3870 while (Depth--) {
3871 int Offset = -(GRLenInBytes * 2);
3872 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3873 DAG.getSignedConstant(Offset, DL, VT));
3874 FrameAddr =
3875 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3876 }
3877 return FrameAddr;
3878}
3879
3880SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3881 SelectionDAG &DAG) const {
3882 // Currently only support lowering return address for current frame.
3883 if (Op.getConstantOperandVal(0) != 0) {
3884 DAG.getContext()->emitError(
3885 "return address can only be determined for the current frame");
3886 return SDValue();
3887 }
3888
3889 MachineFunction &MF = DAG.getMachineFunction();
3891 MVT GRLenVT = Subtarget.getGRLenVT();
3892
3893 // Return the value of the return address register, marking it an implicit
3894 // live-in.
3895 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3896 getRegClassFor(GRLenVT));
3897 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3898}
3899
3900SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3901 SelectionDAG &DAG) const {
3902 MachineFunction &MF = DAG.getMachineFunction();
3903 auto Size = Subtarget.getGRLen() / 8;
3904 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3905 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3906}
3907
3908SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3909 SelectionDAG &DAG) const {
3910 MachineFunction &MF = DAG.getMachineFunction();
3911 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3912
3913 SDLoc DL(Op);
3914 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3916
3917 // vastart just stores the address of the VarArgsFrameIndex slot into the
3918 // memory location argument.
3919 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3920 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3921 MachinePointerInfo(SV));
3922}
3923
3924SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3925 SelectionDAG &DAG) const {
3926 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3927 !Subtarget.hasBasicD() && "unexpected target features");
3928
3929 SDLoc DL(Op);
3930 SDValue Op0 = Op.getOperand(0);
3931 if (Op0->getOpcode() == ISD::AND) {
3932 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3933 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3934 return Op;
3935 }
3936
3937 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3938 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3939 Op0.getConstantOperandVal(2) == UINT64_C(0))
3940 return Op;
3941
3942 if (Op0.getOpcode() == ISD::AssertZext &&
3943 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3944 return Op;
3945
3946 EVT OpVT = Op0.getValueType();
3947 EVT RetVT = Op.getValueType();
3948 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3949 MakeLibCallOptions CallOptions;
3950 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3951 SDValue Chain = SDValue();
3953 std::tie(Result, Chain) =
3954 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3955 return Result;
3956}
3957
3958SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3959 SelectionDAG &DAG) const {
3960 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3961 !Subtarget.hasBasicD() && "unexpected target features");
3962
3963 SDLoc DL(Op);
3964 SDValue Op0 = Op.getOperand(0);
3965
3966 if ((Op0.getOpcode() == ISD::AssertSext ||
3968 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3969 return Op;
3970
3971 EVT OpVT = Op0.getValueType();
3972 EVT RetVT = Op.getValueType();
3973 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3974 MakeLibCallOptions CallOptions;
3975 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3976 SDValue Chain = SDValue();
3978 std::tie(Result, Chain) =
3979 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3980 return Result;
3981}
3982
3983SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3984 SelectionDAG &DAG) const {
3985
3986 SDLoc DL(Op);
3987 EVT VT = Op.getValueType();
3988 SDValue Op0 = Op.getOperand(0);
3989 EVT Op0VT = Op0.getValueType();
3990
3991 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3992 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3993 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3994 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3995 }
3996 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3997 SDValue Lo, Hi;
3998 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3999 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
4000 }
4001 return Op;
4002}
4003
4004SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
4005 SelectionDAG &DAG) const {
4006
4007 SDLoc DL(Op);
4008 SDValue Op0 = Op.getOperand(0);
4009
4010 if (Op0.getValueType() == MVT::f16)
4011 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
4012
4013 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
4014 !Subtarget.hasBasicD()) {
4015 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
4016 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
4017 }
4018
4019 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
4020 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
4021 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
4022}
4023
4025 SelectionDAG &DAG, unsigned Flags) {
4026 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
4027}
4028
4030 SelectionDAG &DAG, unsigned Flags) {
4031 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
4032 Flags);
4033}
4034
4036 SelectionDAG &DAG, unsigned Flags) {
4037 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
4038 N->getOffset(), Flags);
4039}
4040
4042 SelectionDAG &DAG, unsigned Flags) {
4043 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
4044}
4045
4046template <class NodeTy>
4047SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4049 bool IsLocal) const {
4050 SDLoc DL(N);
4051 EVT Ty = getPointerTy(DAG.getDataLayout());
4052 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4053 SDValue Load;
4054
4055 switch (M) {
4056 default:
4057 report_fatal_error("Unsupported code model");
4058
4059 case CodeModel::Large: {
4060 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4061
4062 // This is not actually used, but is necessary for successfully matching
4063 // the PseudoLA_*_LARGE nodes.
4064 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4065 if (IsLocal) {
4066 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
4067 // eventually becomes the desired 5-insn code sequence.
4068 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
4069 Tmp, Addr),
4070 0);
4071 } else {
4072 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
4073 // eventually becomes the desired 5-insn code sequence.
4074 Load = SDValue(
4075 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
4076 0);
4077 }
4078 break;
4079 }
4080
4081 case CodeModel::Small:
4082 case CodeModel::Medium:
4083 if (IsLocal) {
4084 // This generates the pattern (PseudoLA_PCREL sym), which
4085 //
4086 // for la32r expands to:
4087 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4088 //
4089 // for la32s and la64 expands to:
4090 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
4091 Load = SDValue(
4092 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
4093 } else {
4094 // This generates the pattern (PseudoLA_GOT sym), which
4095 //
4096 // for la32r expands to:
4097 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4098 //
4099 // for la32s and la64 expands to:
4100 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
4101 Load =
4102 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
4103 }
4104 }
4105
4106 if (!IsLocal) {
4107 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4108 MachineFunction &MF = DAG.getMachineFunction();
4109 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4113 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4114 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
4115 }
4116
4117 return Load;
4118}
4119
4120SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
4121 SelectionDAG &DAG) const {
4122 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
4123 DAG.getTarget().getCodeModel());
4124}
4125
4126SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
4127 SelectionDAG &DAG) const {
4128 return getAddr(cast<JumpTableSDNode>(Op), DAG,
4129 DAG.getTarget().getCodeModel());
4130}
4131
4132SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
4133 SelectionDAG &DAG) const {
4134 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
4135 DAG.getTarget().getCodeModel());
4136}
4137
4138SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
4139 SelectionDAG &DAG) const {
4140 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4141 assert(N->getOffset() == 0 && "unexpected offset in global node");
4142 auto CM = DAG.getTarget().getCodeModel();
4143 const GlobalValue *GV = N->getGlobal();
4144
4145 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
4146 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
4147 CM = *GCM;
4148 }
4149
4150 return getAddr(N, DAG, CM, GV->isDSOLocal());
4151}
4152
4153SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
4154 SelectionDAG &DAG,
4155 unsigned Opc, bool UseGOT,
4156 bool Large) const {
4157 SDLoc DL(N);
4158 EVT Ty = getPointerTy(DAG.getDataLayout());
4159 MVT GRLenVT = Subtarget.getGRLenVT();
4160
4161 // This is not actually used, but is necessary for successfully matching the
4162 // PseudoLA_*_LARGE nodes.
4163 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4164 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4165
4166 // Only IE needs an extra argument for large code model.
4167 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
4168 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4169 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4170
4171 // If it is LE for normal/medium code model, the add tp operation will occur
4172 // during the pseudo-instruction expansion.
4173 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
4174 return Offset;
4175
4176 if (UseGOT) {
4177 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4178 MachineFunction &MF = DAG.getMachineFunction();
4179 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4183 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4184 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
4185 }
4186
4187 // Add the thread pointer.
4188 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
4189 DAG.getRegister(LoongArch::R2, GRLenVT));
4190}
4191
4192SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
4193 SelectionDAG &DAG,
4194 unsigned Opc,
4195 bool Large) const {
4196 SDLoc DL(N);
4197 EVT Ty = getPointerTy(DAG.getDataLayout());
4198 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
4199
4200 // This is not actually used, but is necessary for successfully matching the
4201 // PseudoLA_*_LARGE nodes.
4202 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4203
4204 // Use a PC-relative addressing mode to access the dynamic GOT address.
4205 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4206 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4207 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4208
4209 // Prepare argument list to generate call.
4211 Args.emplace_back(Load, CallTy);
4212
4213 // Setup call to __tls_get_addr.
4214 TargetLowering::CallLoweringInfo CLI(DAG);
4215 CLI.setDebugLoc(DL)
4216 .setChain(DAG.getEntryNode())
4217 .setLibCallee(CallingConv::C, CallTy,
4218 DAG.getExternalSymbol("__tls_get_addr", Ty),
4219 std::move(Args));
4220
4221 return LowerCallTo(CLI).first;
4222}
4223
4224SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
4225 SelectionDAG &DAG, unsigned Opc,
4226 bool Large) const {
4227 SDLoc DL(N);
4228 EVT Ty = getPointerTy(DAG.getDataLayout());
4229 const GlobalValue *GV = N->getGlobal();
4230
4231 // This is not actually used, but is necessary for successfully matching the
4232 // PseudoLA_*_LARGE nodes.
4233 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4234
4235 // Use a PC-relative addressing mode to access the global dynamic GOT address.
4236 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
4237 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
4238 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4239 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4240}
4241
4242SDValue
4243LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
4244 SelectionDAG &DAG) const {
4247 report_fatal_error("In GHC calling convention TLS is not supported");
4248
4249 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
4250 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
4251
4252 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4253 assert(N->getOffset() == 0 && "unexpected offset in global node");
4254
4255 if (DAG.getTarget().useEmulatedTLS())
4256 reportFatalUsageError("the emulated TLS is prohibited");
4257
4258 bool IsDesc = DAG.getTarget().useTLSDESC();
4259
4260 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
4262 // In this model, application code calls the dynamic linker function
4263 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
4264 // runtime.
4265 if (!IsDesc)
4266 return getDynamicTLSAddr(N, DAG,
4267 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
4268 : LoongArch::PseudoLA_TLS_GD,
4269 Large);
4270 break;
4272 // Same as GeneralDynamic, except for assembly modifiers and relocation
4273 // records.
4274 if (!IsDesc)
4275 return getDynamicTLSAddr(N, DAG,
4276 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
4277 : LoongArch::PseudoLA_TLS_LD,
4278 Large);
4279 break;
4281 // This model uses the GOT to resolve TLS offsets.
4282 return getStaticTLSAddr(N, DAG,
4283 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
4284 : LoongArch::PseudoLA_TLS_IE,
4285 /*UseGOT=*/true, Large);
4287 // This model is used when static linking as the TLS offsets are resolved
4288 // during program linking.
4289 //
4290 // This node doesn't need an extra argument for the large code model.
4291 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
4292 /*UseGOT=*/false, Large);
4293 }
4294
4295 return getTLSDescAddr(N, DAG,
4296 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4297 : LoongArch::PseudoLA_TLS_DESC,
4298 Large);
4299}
4300
4301template <unsigned N>
4303 SelectionDAG &DAG, bool IsSigned = false) {
4304 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
4305 // Check the ImmArg.
4306 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4307 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4308 DAG.getContext()->emitError(Op->getOperationName(0) +
4309 ": argument out of range.");
4310 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
4311 }
4312 return SDValue();
4313}
4314
4315SDValue
4316LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4317 SelectionDAG &DAG) const {
4318 switch (Op.getConstantOperandVal(0)) {
4319 default:
4320 return SDValue(); // Don't custom lower most intrinsics.
4321 case Intrinsic::thread_pointer: {
4322 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4323 return DAG.getRegister(LoongArch::R2, PtrVT);
4324 }
4325 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4326 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4327 case Intrinsic::loongarch_lsx_vreplvei_d:
4328 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4329 return checkIntrinsicImmArg<1>(Op, 2, DAG);
4330 case Intrinsic::loongarch_lsx_vreplvei_w:
4331 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4332 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4333 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4334 case Intrinsic::loongarch_lasx_xvpickve_d:
4335 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4336 return checkIntrinsicImmArg<2>(Op, 2, DAG);
4337 case Intrinsic::loongarch_lasx_xvinsve0_d:
4338 return checkIntrinsicImmArg<2>(Op, 3, DAG);
4339 case Intrinsic::loongarch_lsx_vsat_b:
4340 case Intrinsic::loongarch_lsx_vsat_bu:
4341 case Intrinsic::loongarch_lsx_vrotri_b:
4342 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4343 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4344 case Intrinsic::loongarch_lsx_vsrlri_b:
4345 case Intrinsic::loongarch_lsx_vsrari_b:
4346 case Intrinsic::loongarch_lsx_vreplvei_h:
4347 case Intrinsic::loongarch_lasx_xvsat_b:
4348 case Intrinsic::loongarch_lasx_xvsat_bu:
4349 case Intrinsic::loongarch_lasx_xvrotri_b:
4350 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4351 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4352 case Intrinsic::loongarch_lasx_xvsrlri_b:
4353 case Intrinsic::loongarch_lasx_xvsrari_b:
4354 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4355 case Intrinsic::loongarch_lasx_xvpickve_w:
4356 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4357 return checkIntrinsicImmArg<3>(Op, 2, DAG);
4358 case Intrinsic::loongarch_lasx_xvinsve0_w:
4359 return checkIntrinsicImmArg<3>(Op, 3, DAG);
4360 case Intrinsic::loongarch_lsx_vsat_h:
4361 case Intrinsic::loongarch_lsx_vsat_hu:
4362 case Intrinsic::loongarch_lsx_vrotri_h:
4363 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4364 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4365 case Intrinsic::loongarch_lsx_vsrlri_h:
4366 case Intrinsic::loongarch_lsx_vsrari_h:
4367 case Intrinsic::loongarch_lsx_vreplvei_b:
4368 case Intrinsic::loongarch_lasx_xvsat_h:
4369 case Intrinsic::loongarch_lasx_xvsat_hu:
4370 case Intrinsic::loongarch_lasx_xvrotri_h:
4371 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4372 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4373 case Intrinsic::loongarch_lasx_xvsrlri_h:
4374 case Intrinsic::loongarch_lasx_xvsrari_h:
4375 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4376 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4377 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4378 case Intrinsic::loongarch_lsx_vsrani_b_h:
4379 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4380 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4381 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4382 case Intrinsic::loongarch_lsx_vssrani_b_h:
4383 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4384 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4385 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4386 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4387 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4388 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4389 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4390 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4391 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4392 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4393 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4394 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4395 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4396 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4397 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4398 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4399 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4400 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4401 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4402 case Intrinsic::loongarch_lsx_vsat_w:
4403 case Intrinsic::loongarch_lsx_vsat_wu:
4404 case Intrinsic::loongarch_lsx_vrotri_w:
4405 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4406 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4407 case Intrinsic::loongarch_lsx_vsrlri_w:
4408 case Intrinsic::loongarch_lsx_vsrari_w:
4409 case Intrinsic::loongarch_lsx_vslei_bu:
4410 case Intrinsic::loongarch_lsx_vslei_hu:
4411 case Intrinsic::loongarch_lsx_vslei_wu:
4412 case Intrinsic::loongarch_lsx_vslei_du:
4413 case Intrinsic::loongarch_lsx_vslti_bu:
4414 case Intrinsic::loongarch_lsx_vslti_hu:
4415 case Intrinsic::loongarch_lsx_vslti_wu:
4416 case Intrinsic::loongarch_lsx_vslti_du:
4417 case Intrinsic::loongarch_lsx_vbsll_v:
4418 case Intrinsic::loongarch_lsx_vbsrl_v:
4419 case Intrinsic::loongarch_lasx_xvsat_w:
4420 case Intrinsic::loongarch_lasx_xvsat_wu:
4421 case Intrinsic::loongarch_lasx_xvrotri_w:
4422 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4423 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4424 case Intrinsic::loongarch_lasx_xvsrlri_w:
4425 case Intrinsic::loongarch_lasx_xvsrari_w:
4426 case Intrinsic::loongarch_lasx_xvslei_bu:
4427 case Intrinsic::loongarch_lasx_xvslei_hu:
4428 case Intrinsic::loongarch_lasx_xvslei_wu:
4429 case Intrinsic::loongarch_lasx_xvslei_du:
4430 case Intrinsic::loongarch_lasx_xvslti_bu:
4431 case Intrinsic::loongarch_lasx_xvslti_hu:
4432 case Intrinsic::loongarch_lasx_xvslti_wu:
4433 case Intrinsic::loongarch_lasx_xvslti_du:
4434 case Intrinsic::loongarch_lasx_xvbsll_v:
4435 case Intrinsic::loongarch_lasx_xvbsrl_v:
4436 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4437 case Intrinsic::loongarch_lsx_vseqi_b:
4438 case Intrinsic::loongarch_lsx_vseqi_h:
4439 case Intrinsic::loongarch_lsx_vseqi_w:
4440 case Intrinsic::loongarch_lsx_vseqi_d:
4441 case Intrinsic::loongarch_lsx_vslei_b:
4442 case Intrinsic::loongarch_lsx_vslei_h:
4443 case Intrinsic::loongarch_lsx_vslei_w:
4444 case Intrinsic::loongarch_lsx_vslei_d:
4445 case Intrinsic::loongarch_lsx_vslti_b:
4446 case Intrinsic::loongarch_lsx_vslti_h:
4447 case Intrinsic::loongarch_lsx_vslti_w:
4448 case Intrinsic::loongarch_lsx_vslti_d:
4449 case Intrinsic::loongarch_lasx_xvseqi_b:
4450 case Intrinsic::loongarch_lasx_xvseqi_h:
4451 case Intrinsic::loongarch_lasx_xvseqi_w:
4452 case Intrinsic::loongarch_lasx_xvseqi_d:
4453 case Intrinsic::loongarch_lasx_xvslei_b:
4454 case Intrinsic::loongarch_lasx_xvslei_h:
4455 case Intrinsic::loongarch_lasx_xvslei_w:
4456 case Intrinsic::loongarch_lasx_xvslei_d:
4457 case Intrinsic::loongarch_lasx_xvslti_b:
4458 case Intrinsic::loongarch_lasx_xvslti_h:
4459 case Intrinsic::loongarch_lasx_xvslti_w:
4460 case Intrinsic::loongarch_lasx_xvslti_d:
4461 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4462 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4463 case Intrinsic::loongarch_lsx_vsrani_h_w:
4464 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4465 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4466 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4467 case Intrinsic::loongarch_lsx_vssrani_h_w:
4468 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4469 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4470 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4471 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4472 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4473 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4474 case Intrinsic::loongarch_lsx_vfrstpi_b:
4475 case Intrinsic::loongarch_lsx_vfrstpi_h:
4476 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4477 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4478 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4479 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4480 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4481 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4482 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4483 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4484 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4485 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4486 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4487 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4488 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4489 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4490 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4491 case Intrinsic::loongarch_lsx_vsat_d:
4492 case Intrinsic::loongarch_lsx_vsat_du:
4493 case Intrinsic::loongarch_lsx_vrotri_d:
4494 case Intrinsic::loongarch_lsx_vsrlri_d:
4495 case Intrinsic::loongarch_lsx_vsrari_d:
4496 case Intrinsic::loongarch_lasx_xvsat_d:
4497 case Intrinsic::loongarch_lasx_xvsat_du:
4498 case Intrinsic::loongarch_lasx_xvrotri_d:
4499 case Intrinsic::loongarch_lasx_xvsrlri_d:
4500 case Intrinsic::loongarch_lasx_xvsrari_d:
4501 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4502 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4503 case Intrinsic::loongarch_lsx_vsrani_w_d:
4504 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4505 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4506 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4507 case Intrinsic::loongarch_lsx_vssrani_w_d:
4508 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4509 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4510 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4511 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4512 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4513 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4514 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4515 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4516 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4517 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4518 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4519 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4520 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4521 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4522 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4523 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4524 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4525 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4526 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4527 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4528 case Intrinsic::loongarch_lsx_vsrani_d_q:
4529 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4530 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4531 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4532 case Intrinsic::loongarch_lsx_vssrani_d_q:
4533 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4534 case Intrinsic::loongarch_lsx_vssrani_du_q:
4535 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4536 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4537 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4538 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4539 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4540 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4541 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4542 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4543 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4544 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4545 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4546 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4547 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4548 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4549 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4550 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4551 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4552 case Intrinsic::loongarch_lsx_vnori_b:
4553 case Intrinsic::loongarch_lsx_vshuf4i_b:
4554 case Intrinsic::loongarch_lsx_vshuf4i_h:
4555 case Intrinsic::loongarch_lsx_vshuf4i_w:
4556 case Intrinsic::loongarch_lasx_xvnori_b:
4557 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4558 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4559 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4560 case Intrinsic::loongarch_lasx_xvpermi_d:
4561 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4562 case Intrinsic::loongarch_lsx_vshuf4i_d:
4563 case Intrinsic::loongarch_lsx_vpermi_w:
4564 case Intrinsic::loongarch_lsx_vbitseli_b:
4565 case Intrinsic::loongarch_lsx_vextrins_b:
4566 case Intrinsic::loongarch_lsx_vextrins_h:
4567 case Intrinsic::loongarch_lsx_vextrins_w:
4568 case Intrinsic::loongarch_lsx_vextrins_d:
4569 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4570 case Intrinsic::loongarch_lasx_xvpermi_w:
4571 case Intrinsic::loongarch_lasx_xvpermi_q:
4572 case Intrinsic::loongarch_lasx_xvbitseli_b:
4573 case Intrinsic::loongarch_lasx_xvextrins_b:
4574 case Intrinsic::loongarch_lasx_xvextrins_h:
4575 case Intrinsic::loongarch_lasx_xvextrins_w:
4576 case Intrinsic::loongarch_lasx_xvextrins_d:
4577 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4578 case Intrinsic::loongarch_lsx_vrepli_b:
4579 case Intrinsic::loongarch_lsx_vrepli_h:
4580 case Intrinsic::loongarch_lsx_vrepli_w:
4581 case Intrinsic::loongarch_lsx_vrepli_d:
4582 case Intrinsic::loongarch_lasx_xvrepli_b:
4583 case Intrinsic::loongarch_lasx_xvrepli_h:
4584 case Intrinsic::loongarch_lasx_xvrepli_w:
4585 case Intrinsic::loongarch_lasx_xvrepli_d:
4586 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4587 case Intrinsic::loongarch_lsx_vldi:
4588 case Intrinsic::loongarch_lasx_xvldi:
4589 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4590 }
4591}
4592
4593// Helper function that emits error message for intrinsics with chain and return
4594// merge values of a UNDEF and the chain.
4596 StringRef ErrorMsg,
4597 SelectionDAG &DAG) {
4598 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4599 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4600 SDLoc(Op));
4601}
4602
4603SDValue
4604LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4605 SelectionDAG &DAG) const {
4606 SDLoc DL(Op);
4607 MVT GRLenVT = Subtarget.getGRLenVT();
4608 EVT VT = Op.getValueType();
4609 SDValue Chain = Op.getOperand(0);
4610 const StringRef ErrorMsgOOR = "argument out of range";
4611 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4612 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4613
4614 switch (Op.getConstantOperandVal(1)) {
4615 default:
4616 return Op;
4617 case Intrinsic::loongarch_crc_w_b_w:
4618 case Intrinsic::loongarch_crc_w_h_w:
4619 case Intrinsic::loongarch_crc_w_w_w:
4620 case Intrinsic::loongarch_crc_w_d_w:
4621 case Intrinsic::loongarch_crcc_w_b_w:
4622 case Intrinsic::loongarch_crcc_w_h_w:
4623 case Intrinsic::loongarch_crcc_w_w_w:
4624 case Intrinsic::loongarch_crcc_w_d_w:
4625 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4626 case Intrinsic::loongarch_csrrd_w:
4627 case Intrinsic::loongarch_csrrd_d: {
4628 unsigned Imm = Op.getConstantOperandVal(2);
4629 return !isUInt<14>(Imm)
4630 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4631 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4632 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4633 }
4634 case Intrinsic::loongarch_csrwr_w:
4635 case Intrinsic::loongarch_csrwr_d: {
4636 unsigned Imm = Op.getConstantOperandVal(3);
4637 return !isUInt<14>(Imm)
4638 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4639 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4640 {Chain, Op.getOperand(2),
4641 DAG.getConstant(Imm, DL, GRLenVT)});
4642 }
4643 case Intrinsic::loongarch_csrxchg_w:
4644 case Intrinsic::loongarch_csrxchg_d: {
4645 unsigned Imm = Op.getConstantOperandVal(4);
4646 return !isUInt<14>(Imm)
4647 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4648 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4649 {Chain, Op.getOperand(2), Op.getOperand(3),
4650 DAG.getConstant(Imm, DL, GRLenVT)});
4651 }
4652 case Intrinsic::loongarch_iocsrrd_d: {
4653 return DAG.getNode(
4654 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4655 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4656 }
4657#define IOCSRRD_CASE(NAME, NODE) \
4658 case Intrinsic::loongarch_##NAME: { \
4659 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4660 {Chain, Op.getOperand(2)}); \
4661 }
4662 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4663 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4664 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4665#undef IOCSRRD_CASE
4666 case Intrinsic::loongarch_cpucfg: {
4667 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4668 {Chain, Op.getOperand(2)});
4669 }
4670 case Intrinsic::loongarch_lddir_d: {
4671 unsigned Imm = Op.getConstantOperandVal(3);
4672 return !isUInt<8>(Imm)
4673 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4674 : Op;
4675 }
4676 case Intrinsic::loongarch_movfcsr2gr: {
4677 if (!Subtarget.hasBasicF())
4678 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4679 unsigned Imm = Op.getConstantOperandVal(2);
4680 return !isUInt<2>(Imm)
4681 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4682 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4683 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4684 }
4685 case Intrinsic::loongarch_lsx_vld:
4686 case Intrinsic::loongarch_lsx_vldrepl_b:
4687 case Intrinsic::loongarch_lasx_xvld:
4688 case Intrinsic::loongarch_lasx_xvldrepl_b:
4689 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4690 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4691 : SDValue();
4692 case Intrinsic::loongarch_lsx_vldrepl_h:
4693 case Intrinsic::loongarch_lasx_xvldrepl_h:
4694 return !isShiftedInt<11, 1>(
4695 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4697 Op, "argument out of range or not a multiple of 2", DAG)
4698 : SDValue();
4699 case Intrinsic::loongarch_lsx_vldrepl_w:
4700 case Intrinsic::loongarch_lasx_xvldrepl_w:
4701 return !isShiftedInt<10, 2>(
4702 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4704 Op, "argument out of range or not a multiple of 4", DAG)
4705 : SDValue();
4706 case Intrinsic::loongarch_lsx_vldrepl_d:
4707 case Intrinsic::loongarch_lasx_xvldrepl_d:
4708 return !isShiftedInt<9, 3>(
4709 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4711 Op, "argument out of range or not a multiple of 8", DAG)
4712 : SDValue();
4713 }
4714}
4715
4716// Helper function that emits error message for intrinsics with void return
4717// value and return the chain.
4719 SelectionDAG &DAG) {
4720
4721 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4722 return Op.getOperand(0);
4723}
4724
4725SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4726 SelectionDAG &DAG) const {
4727 SDLoc DL(Op);
4728 MVT GRLenVT = Subtarget.getGRLenVT();
4729 SDValue Chain = Op.getOperand(0);
4730 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4731 SDValue Op2 = Op.getOperand(2);
4732 const StringRef ErrorMsgOOR = "argument out of range";
4733 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4734 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4735 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4736
4737 switch (IntrinsicEnum) {
4738 default:
4739 // TODO: Add more Intrinsics.
4740 return SDValue();
4741 case Intrinsic::loongarch_cacop_d:
4742 case Intrinsic::loongarch_cacop_w: {
4743 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4744 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4745 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4746 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4747 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4748 unsigned Imm1 = Op2->getAsZExtVal();
4749 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4750 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4751 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4752 return Op;
4753 }
4754 case Intrinsic::loongarch_dbar: {
4755 unsigned Imm = Op2->getAsZExtVal();
4756 return !isUInt<15>(Imm)
4757 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4758 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4759 DAG.getConstant(Imm, DL, GRLenVT));
4760 }
4761 case Intrinsic::loongarch_ibar: {
4762 unsigned Imm = Op2->getAsZExtVal();
4763 return !isUInt<15>(Imm)
4764 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4765 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4766 DAG.getConstant(Imm, DL, GRLenVT));
4767 }
4768 case Intrinsic::loongarch_break: {
4769 unsigned Imm = Op2->getAsZExtVal();
4770 return !isUInt<15>(Imm)
4771 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4772 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4773 DAG.getConstant(Imm, DL, GRLenVT));
4774 }
4775 case Intrinsic::loongarch_movgr2fcsr: {
4776 if (!Subtarget.hasBasicF())
4777 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4778 unsigned Imm = Op2->getAsZExtVal();
4779 return !isUInt<2>(Imm)
4780 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4781 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4782 DAG.getConstant(Imm, DL, GRLenVT),
4783 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4784 Op.getOperand(3)));
4785 }
4786 case Intrinsic::loongarch_syscall: {
4787 unsigned Imm = Op2->getAsZExtVal();
4788 return !isUInt<15>(Imm)
4789 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4790 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4791 DAG.getConstant(Imm, DL, GRLenVT));
4792 }
4793#define IOCSRWR_CASE(NAME, NODE) \
4794 case Intrinsic::loongarch_##NAME: { \
4795 SDValue Op3 = Op.getOperand(3); \
4796 return Subtarget.is64Bit() \
4797 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4798 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4799 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4800 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4801 Op3); \
4802 }
4803 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4804 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4805 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4806#undef IOCSRWR_CASE
4807 case Intrinsic::loongarch_iocsrwr_d: {
4808 return !Subtarget.is64Bit()
4809 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4810 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4811 Op2,
4812 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4813 Op.getOperand(3)));
4814 }
4815#define ASRT_LE_GT_CASE(NAME) \
4816 case Intrinsic::loongarch_##NAME: { \
4817 return !Subtarget.is64Bit() \
4818 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4819 : Op; \
4820 }
4821 ASRT_LE_GT_CASE(asrtle_d)
4822 ASRT_LE_GT_CASE(asrtgt_d)
4823#undef ASRT_LE_GT_CASE
4824 case Intrinsic::loongarch_ldpte_d: {
4825 unsigned Imm = Op.getConstantOperandVal(3);
4826 return !Subtarget.is64Bit()
4827 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4828 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4829 : Op;
4830 }
4831 case Intrinsic::loongarch_lsx_vst:
4832 case Intrinsic::loongarch_lasx_xvst:
4833 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4834 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4835 : SDValue();
4836 case Intrinsic::loongarch_lasx_xvstelm_b:
4837 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4838 !isUInt<5>(Op.getConstantOperandVal(5)))
4839 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4840 : SDValue();
4841 case Intrinsic::loongarch_lsx_vstelm_b:
4842 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4843 !isUInt<4>(Op.getConstantOperandVal(5)))
4844 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4845 : SDValue();
4846 case Intrinsic::loongarch_lasx_xvstelm_h:
4847 return (!isShiftedInt<8, 1>(
4848 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4849 !isUInt<4>(Op.getConstantOperandVal(5)))
4851 Op, "argument out of range or not a multiple of 2", DAG)
4852 : SDValue();
4853 case Intrinsic::loongarch_lsx_vstelm_h:
4854 return (!isShiftedInt<8, 1>(
4855 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4856 !isUInt<3>(Op.getConstantOperandVal(5)))
4858 Op, "argument out of range or not a multiple of 2", DAG)
4859 : SDValue();
4860 case Intrinsic::loongarch_lasx_xvstelm_w:
4861 return (!isShiftedInt<8, 2>(
4862 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4863 !isUInt<3>(Op.getConstantOperandVal(5)))
4865 Op, "argument out of range or not a multiple of 4", DAG)
4866 : SDValue();
4867 case Intrinsic::loongarch_lsx_vstelm_w:
4868 return (!isShiftedInt<8, 2>(
4869 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4870 !isUInt<2>(Op.getConstantOperandVal(5)))
4872 Op, "argument out of range or not a multiple of 4", DAG)
4873 : SDValue();
4874 case Intrinsic::loongarch_lasx_xvstelm_d:
4875 return (!isShiftedInt<8, 3>(
4876 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4877 !isUInt<2>(Op.getConstantOperandVal(5)))
4879 Op, "argument out of range or not a multiple of 8", DAG)
4880 : SDValue();
4881 case Intrinsic::loongarch_lsx_vstelm_d:
4882 return (!isShiftedInt<8, 3>(
4883 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4884 !isUInt<1>(Op.getConstantOperandVal(5)))
4886 Op, "argument out of range or not a multiple of 8", DAG)
4887 : SDValue();
4888 }
4889}
4890
4891SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4892 SelectionDAG &DAG) const {
4893 SDLoc DL(Op);
4894 SDValue Lo = Op.getOperand(0);
4895 SDValue Hi = Op.getOperand(1);
4896 SDValue Shamt = Op.getOperand(2);
4897 EVT VT = Lo.getValueType();
4898
4899 // if Shamt-GRLen < 0: // Shamt < GRLen
4900 // Lo = Lo << Shamt
4901 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4902 // else:
4903 // Lo = 0
4904 // Hi = Lo << (Shamt-GRLen)
4905
4906 SDValue Zero = DAG.getConstant(0, DL, VT);
4907 SDValue One = DAG.getConstant(1, DL, VT);
4908 SDValue MinusGRLen =
4909 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4910 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4911 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4912 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4913
4914 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4915 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4916 SDValue ShiftRightLo =
4917 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4918 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4919 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4920 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4921
4922 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4923
4924 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4925 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4926
4927 SDValue Parts[2] = {Lo, Hi};
4928 return DAG.getMergeValues(Parts, DL);
4929}
4930
4931SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4932 SelectionDAG &DAG,
4933 bool IsSRA) const {
4934 SDLoc DL(Op);
4935 SDValue Lo = Op.getOperand(0);
4936 SDValue Hi = Op.getOperand(1);
4937 SDValue Shamt = Op.getOperand(2);
4938 EVT VT = Lo.getValueType();
4939
4940 // SRA expansion:
4941 // if Shamt-GRLen < 0: // Shamt < GRLen
4942 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4943 // Hi = Hi >>s Shamt
4944 // else:
4945 // Lo = Hi >>s (Shamt-GRLen);
4946 // Hi = Hi >>s (GRLen-1)
4947 //
4948 // SRL expansion:
4949 // if Shamt-GRLen < 0: // Shamt < GRLen
4950 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4951 // Hi = Hi >>u Shamt
4952 // else:
4953 // Lo = Hi >>u (Shamt-GRLen);
4954 // Hi = 0;
4955
4956 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4957
4958 SDValue Zero = DAG.getConstant(0, DL, VT);
4959 SDValue One = DAG.getConstant(1, DL, VT);
4960 SDValue MinusGRLen =
4961 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4962 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4963 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4964 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4965
4966 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4967 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4968 SDValue ShiftLeftHi =
4969 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4970 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4971 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4972 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4973 SDValue HiFalse =
4974 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4975
4976 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4977
4978 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4979 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4980
4981 SDValue Parts[2] = {Lo, Hi};
4982 return DAG.getMergeValues(Parts, DL);
4983}
4984
4985// Returns the opcode of the target-specific SDNode that implements the 32-bit
4986// form of the given Opcode.
4987static unsigned getLoongArchWOpcode(unsigned Opcode) {
4988 switch (Opcode) {
4989 default:
4990 llvm_unreachable("Unexpected opcode");
4991 case ISD::SDIV:
4992 return LoongArchISD::DIV_W;
4993 case ISD::UDIV:
4994 return LoongArchISD::DIV_WU;
4995 case ISD::SREM:
4996 return LoongArchISD::MOD_W;
4997 case ISD::UREM:
4998 return LoongArchISD::MOD_WU;
4999 case ISD::SHL:
5000 return LoongArchISD::SLL_W;
5001 case ISD::SRA:
5002 return LoongArchISD::SRA_W;
5003 case ISD::SRL:
5004 return LoongArchISD::SRL_W;
5005 case ISD::ROTL:
5006 case ISD::ROTR:
5007 return LoongArchISD::ROTR_W;
5008 case ISD::CTTZ:
5009 return LoongArchISD::CTZ_W;
5010 case ISD::CTLZ:
5011 return LoongArchISD::CLZ_W;
5012 }
5013}
5014
5015// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
5016// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
5017// otherwise be promoted to i64, making it difficult to select the
5018// SLL_W/.../*W later one because the fact the operation was originally of
5019// type i8/i16/i32 is lost.
5021 unsigned ExtOpc = ISD::ANY_EXTEND) {
5022 SDLoc DL(N);
5023 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
5024 SDValue NewOp0, NewRes;
5025
5026 switch (NumOp) {
5027 default:
5028 llvm_unreachable("Unexpected NumOp");
5029 case 1: {
5030 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5031 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
5032 break;
5033 }
5034 case 2: {
5035 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5036 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
5037 if (N->getOpcode() == ISD::ROTL) {
5038 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
5039 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
5040 }
5041 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
5042 break;
5043 }
5044 // TODO:Handle more NumOp.
5045 }
5046
5047 // ReplaceNodeResults requires we maintain the same type for the return
5048 // value.
5049 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
5050}
5051
5052// Converts the given 32-bit operation to a i64 operation with signed extension
5053// semantic to reduce the signed extension instructions.
5055 SDLoc DL(N);
5056 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5057 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5058 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
5059 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
5060 DAG.getValueType(MVT::i32));
5061 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
5062}
5063
5064// Helper function that emits error message for intrinsics with/without chain
5065// and return a UNDEF or and the chain as the results.
5068 StringRef ErrorMsg, bool WithChain = true) {
5069 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
5070 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
5071 if (!WithChain)
5072 return;
5073 Results.push_back(N->getOperand(0));
5074}
5075
5076template <unsigned N>
5077static void
5079 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
5080 unsigned ResOp) {
5081 const StringRef ErrorMsgOOR = "argument out of range";
5082 unsigned Imm = Node->getConstantOperandVal(2);
5083 if (!isUInt<N>(Imm)) {
5085 /*WithChain=*/false);
5086 return;
5087 }
5088 SDLoc DL(Node);
5089 SDValue Vec = Node->getOperand(1);
5090
5091 SDValue PickElt =
5092 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
5093 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
5095 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
5096 PickElt.getValue(0)));
5097}
5098
5101 SelectionDAG &DAG,
5102 const LoongArchSubtarget &Subtarget,
5103 unsigned ResOp) {
5104 SDLoc DL(N);
5105 SDValue Vec = N->getOperand(1);
5106
5107 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
5108 Results.push_back(
5109 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
5110}
5111
5112static void
5114 SelectionDAG &DAG,
5115 const LoongArchSubtarget &Subtarget) {
5116 switch (N->getConstantOperandVal(0)) {
5117 default:
5118 llvm_unreachable("Unexpected Intrinsic.");
5119 case Intrinsic::loongarch_lsx_vpickve2gr_b:
5120 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5121 LoongArchISD::VPICK_SEXT_ELT);
5122 break;
5123 case Intrinsic::loongarch_lsx_vpickve2gr_h:
5124 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
5125 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5126 LoongArchISD::VPICK_SEXT_ELT);
5127 break;
5128 case Intrinsic::loongarch_lsx_vpickve2gr_w:
5129 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5130 LoongArchISD::VPICK_SEXT_ELT);
5131 break;
5132 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
5133 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5134 LoongArchISD::VPICK_ZEXT_ELT);
5135 break;
5136 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
5137 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
5138 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5139 LoongArchISD::VPICK_ZEXT_ELT);
5140 break;
5141 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
5142 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5143 LoongArchISD::VPICK_ZEXT_ELT);
5144 break;
5145 case Intrinsic::loongarch_lsx_bz_b:
5146 case Intrinsic::loongarch_lsx_bz_h:
5147 case Intrinsic::loongarch_lsx_bz_w:
5148 case Intrinsic::loongarch_lsx_bz_d:
5149 case Intrinsic::loongarch_lasx_xbz_b:
5150 case Intrinsic::loongarch_lasx_xbz_h:
5151 case Intrinsic::loongarch_lasx_xbz_w:
5152 case Intrinsic::loongarch_lasx_xbz_d:
5153 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5154 LoongArchISD::VALL_ZERO);
5155 break;
5156 case Intrinsic::loongarch_lsx_bz_v:
5157 case Intrinsic::loongarch_lasx_xbz_v:
5158 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5159 LoongArchISD::VANY_ZERO);
5160 break;
5161 case Intrinsic::loongarch_lsx_bnz_b:
5162 case Intrinsic::loongarch_lsx_bnz_h:
5163 case Intrinsic::loongarch_lsx_bnz_w:
5164 case Intrinsic::loongarch_lsx_bnz_d:
5165 case Intrinsic::loongarch_lasx_xbnz_b:
5166 case Intrinsic::loongarch_lasx_xbnz_h:
5167 case Intrinsic::loongarch_lasx_xbnz_w:
5168 case Intrinsic::loongarch_lasx_xbnz_d:
5169 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5170 LoongArchISD::VALL_NONZERO);
5171 break;
5172 case Intrinsic::loongarch_lsx_bnz_v:
5173 case Intrinsic::loongarch_lasx_xbnz_v:
5174 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5175 LoongArchISD::VANY_NONZERO);
5176 break;
5177 }
5178}
5179
5182 SelectionDAG &DAG) {
5183 assert(N->getValueType(0) == MVT::i128 &&
5184 "AtomicCmpSwap on types less than 128 should be legal");
5185 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
5186
5187 unsigned Opcode;
5188 switch (MemOp->getMergedOrdering()) {
5192 Opcode = LoongArch::PseudoCmpXchg128Acquire;
5193 break;
5196 Opcode = LoongArch::PseudoCmpXchg128;
5197 break;
5198 default:
5199 llvm_unreachable("Unexpected ordering!");
5200 }
5201
5202 SDLoc DL(N);
5203 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
5204 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
5205 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
5206 NewVal.first, NewVal.second, N->getOperand(0)};
5207
5208 SDNode *CmpSwap = DAG.getMachineNode(
5209 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
5210 Ops);
5211 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
5212 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
5213 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
5214 Results.push_back(SDValue(CmpSwap, 3));
5215}
5216
5219 SDLoc DL(N);
5220 EVT VT = N->getValueType(0);
5221 switch (N->getOpcode()) {
5222 default:
5223 llvm_unreachable("Don't know how to legalize this operation");
5224 case ISD::ADD:
5225 case ISD::SUB:
5226 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5227 "Unexpected custom legalisation");
5228 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
5229 break;
5230 case ISD::SDIV:
5231 case ISD::UDIV:
5232 case ISD::SREM:
5233 case ISD::UREM:
5234 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5235 "Unexpected custom legalisation");
5236 Results.push_back(customLegalizeToWOp(N, DAG, 2,
5237 Subtarget.hasDiv32() && VT == MVT::i32
5239 : ISD::SIGN_EXTEND));
5240 break;
5241 case ISD::SHL:
5242 case ISD::SRA:
5243 case ISD::SRL:
5244 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5245 "Unexpected custom legalisation");
5246 if (N->getOperand(1).getOpcode() != ISD::Constant) {
5247 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5248 break;
5249 }
5250 break;
5251 case ISD::ROTL:
5252 case ISD::ROTR:
5253 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5254 "Unexpected custom legalisation");
5255 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5256 break;
5257 case ISD::FP_TO_SINT: {
5258 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5259 "Unexpected custom legalisation");
5260 SDValue Src = N->getOperand(0);
5261 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
5262 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
5264 if (!isTypeLegal(Src.getValueType()))
5265 return;
5266 if (Src.getValueType() == MVT::f16)
5267 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
5268 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
5269 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
5270 return;
5271 }
5272 // If the FP type needs to be softened, emit a library call using the 'si'
5273 // version. If we left it to default legalization we'd end up with 'di'.
5274 RTLIB::Libcall LC;
5275 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
5276 MakeLibCallOptions CallOptions;
5277 EVT OpVT = Src.getValueType();
5278 CallOptions.setTypeListBeforeSoften(OpVT, VT);
5279 SDValue Chain = SDValue();
5280 SDValue Result;
5281 std::tie(Result, Chain) =
5282 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
5283 Results.push_back(Result);
5284 break;
5285 }
5286 case ISD::BITCAST: {
5287 SDValue Src = N->getOperand(0);
5288 EVT SrcVT = Src.getValueType();
5289 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5290 Subtarget.hasBasicF()) {
5291 SDValue Dst =
5292 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
5293 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
5294 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5295 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
5296 DAG.getVTList(MVT::i32, MVT::i32), Src);
5297 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5298 NewReg.getValue(0), NewReg.getValue(1));
5299 Results.push_back(RetReg);
5300 }
5301 break;
5302 }
5303 case ISD::FP_TO_UINT: {
5304 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5305 "Unexpected custom legalisation");
5306 auto &TLI = DAG.getTargetLoweringInfo();
5307 SDValue Tmp1, Tmp2;
5308 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
5309 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
5310 break;
5311 }
5312 case ISD::FP_ROUND: {
5313 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5314 "Unexpected custom legalisation");
5315 // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
5316 // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
5317 // a target-specific LoongArchISD::VFCVT to optimize it.
5318 SDValue Op0 = N->getOperand(0);
5319 EVT OpVT = Op0.getValueType();
5320 if (OpVT == MVT::v2f64) {
5321 SDValue Undef = DAG.getUNDEF(OpVT);
5322 SDValue Dst =
5323 DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Op0);
5324 Results.push_back(Dst);
5325 }
5326 break;
5327 }
5328 case ISD::BSWAP: {
5329 SDValue Src = N->getOperand(0);
5330 assert((VT == MVT::i16 || VT == MVT::i32) &&
5331 "Unexpected custom legalization");
5332 MVT GRLenVT = Subtarget.getGRLenVT();
5333 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5334 SDValue Tmp;
5335 switch (VT.getSizeInBits()) {
5336 default:
5337 llvm_unreachable("Unexpected operand width");
5338 case 16:
5339 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
5340 break;
5341 case 32:
5342 // Only LA64 will get to here due to the size mismatch between VT and
5343 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5344 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
5345 break;
5346 }
5347 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5348 break;
5349 }
5350 case ISD::BITREVERSE: {
5351 SDValue Src = N->getOperand(0);
5352 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5353 "Unexpected custom legalization");
5354 MVT GRLenVT = Subtarget.getGRLenVT();
5355 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5356 SDValue Tmp;
5357 switch (VT.getSizeInBits()) {
5358 default:
5359 llvm_unreachable("Unexpected operand width");
5360 case 8:
5361 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
5362 break;
5363 case 32:
5364 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
5365 break;
5366 }
5367 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5368 break;
5369 }
5370 case ISD::CTLZ:
5371 case ISD::CTTZ: {
5372 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5373 "Unexpected custom legalisation");
5374 Results.push_back(customLegalizeToWOp(N, DAG, 1));
5375 break;
5376 }
5378 SDValue Chain = N->getOperand(0);
5379 SDValue Op2 = N->getOperand(2);
5380 MVT GRLenVT = Subtarget.getGRLenVT();
5381 const StringRef ErrorMsgOOR = "argument out of range";
5382 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5383 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5384
5385 switch (N->getConstantOperandVal(1)) {
5386 default:
5387 llvm_unreachable("Unexpected Intrinsic.");
5388 case Intrinsic::loongarch_movfcsr2gr: {
5389 if (!Subtarget.hasBasicF()) {
5390 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5391 return;
5392 }
5393 unsigned Imm = Op2->getAsZExtVal();
5394 if (!isUInt<2>(Imm)) {
5395 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5396 return;
5397 }
5398 SDValue MOVFCSR2GRResults = DAG.getNode(
5399 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5400 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5401 Results.push_back(
5402 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5403 Results.push_back(MOVFCSR2GRResults.getValue(1));
5404 break;
5405 }
5406#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5407 case Intrinsic::loongarch_##NAME: { \
5408 SDValue NODE = DAG.getNode( \
5409 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5410 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5411 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5412 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5413 Results.push_back(NODE.getValue(1)); \
5414 break; \
5415 }
5416 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5417 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5418 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5419 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5420 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5421 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5422#undef CRC_CASE_EXT_BINARYOP
5423
5424#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5425 case Intrinsic::loongarch_##NAME: { \
5426 SDValue NODE = DAG.getNode( \
5427 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5428 {Chain, Op2, \
5429 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5430 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5431 Results.push_back(NODE.getValue(1)); \
5432 break; \
5433 }
5434 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5435 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5436#undef CRC_CASE_EXT_UNARYOP
5437#define CSR_CASE(ID) \
5438 case Intrinsic::loongarch_##ID: { \
5439 if (!Subtarget.is64Bit()) \
5440 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5441 break; \
5442 }
5443 CSR_CASE(csrrd_d);
5444 CSR_CASE(csrwr_d);
5445 CSR_CASE(csrxchg_d);
5446 CSR_CASE(iocsrrd_d);
5447#undef CSR_CASE
5448 case Intrinsic::loongarch_csrrd_w: {
5449 unsigned Imm = Op2->getAsZExtVal();
5450 if (!isUInt<14>(Imm)) {
5451 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5452 return;
5453 }
5454 SDValue CSRRDResults =
5455 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5456 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5457 Results.push_back(
5458 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5459 Results.push_back(CSRRDResults.getValue(1));
5460 break;
5461 }
5462 case Intrinsic::loongarch_csrwr_w: {
5463 unsigned Imm = N->getConstantOperandVal(3);
5464 if (!isUInt<14>(Imm)) {
5465 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5466 return;
5467 }
5468 SDValue CSRWRResults =
5469 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5470 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5471 DAG.getConstant(Imm, DL, GRLenVT)});
5472 Results.push_back(
5473 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5474 Results.push_back(CSRWRResults.getValue(1));
5475 break;
5476 }
5477 case Intrinsic::loongarch_csrxchg_w: {
5478 unsigned Imm = N->getConstantOperandVal(4);
5479 if (!isUInt<14>(Imm)) {
5480 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5481 return;
5482 }
5483 SDValue CSRXCHGResults = DAG.getNode(
5484 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5485 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5486 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5487 DAG.getConstant(Imm, DL, GRLenVT)});
5488 Results.push_back(
5489 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5490 Results.push_back(CSRXCHGResults.getValue(1));
5491 break;
5492 }
5493#define IOCSRRD_CASE(NAME, NODE) \
5494 case Intrinsic::loongarch_##NAME: { \
5495 SDValue IOCSRRDResults = \
5496 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5497 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5498 Results.push_back( \
5499 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5500 Results.push_back(IOCSRRDResults.getValue(1)); \
5501 break; \
5502 }
5503 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5504 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5505 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5506#undef IOCSRRD_CASE
5507 case Intrinsic::loongarch_cpucfg: {
5508 SDValue CPUCFGResults =
5509 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5510 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5511 Results.push_back(
5512 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5513 Results.push_back(CPUCFGResults.getValue(1));
5514 break;
5515 }
5516 case Intrinsic::loongarch_lddir_d: {
5517 if (!Subtarget.is64Bit()) {
5518 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5519 return;
5520 }
5521 break;
5522 }
5523 }
5524 break;
5525 }
5526 case ISD::READ_REGISTER: {
5527 if (Subtarget.is64Bit())
5528 DAG.getContext()->emitError(
5529 "On LA64, only 64-bit registers can be read.");
5530 else
5531 DAG.getContext()->emitError(
5532 "On LA32, only 32-bit registers can be read.");
5533 Results.push_back(DAG.getUNDEF(VT));
5534 Results.push_back(N->getOperand(0));
5535 break;
5536 }
5538 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5539 break;
5540 }
5541 case ISD::LROUND: {
5542 SDValue Op0 = N->getOperand(0);
5543 EVT OpVT = Op0.getValueType();
5544 RTLIB::Libcall LC =
5545 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5546 MakeLibCallOptions CallOptions;
5547 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5548 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5549 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5550 Results.push_back(Result);
5551 break;
5552 }
5553 case ISD::ATOMIC_CMP_SWAP: {
5555 break;
5556 }
5557 case ISD::TRUNCATE: {
5558 MVT VT = N->getSimpleValueType(0);
5559 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5560 return;
5561
5562 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5563 SDValue In = N->getOperand(0);
5564 EVT InVT = In.getValueType();
5565 EVT InEltVT = InVT.getVectorElementType();
5566 EVT EltVT = VT.getVectorElementType();
5567 unsigned MinElts = VT.getVectorNumElements();
5568 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5569 unsigned InBits = InVT.getSizeInBits();
5570
5571 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5572 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5573 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5574 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5575 for (unsigned I = 0; I < MinElts; ++I)
5576 TruncMask[I] = Scale * I;
5577
5578 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5579 MVT SVT = In.getSimpleValueType().getScalarType();
5580 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5581 SDValue WidenIn =
5582 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5583 DAG.getVectorIdxConstant(0, DL));
5584 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5585 "Illegal vector type in truncation");
5586 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5587 Results.push_back(
5588 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5589 return;
5590 }
5591 }
5592
5593 break;
5594 }
5595 }
5596}
5597
5598/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5600 SelectionDAG &DAG) {
5601 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5602
5603 MVT VT = N->getSimpleValueType(0);
5604 if (!VT.is128BitVector() && !VT.is256BitVector())
5605 return SDValue();
5606
5607 SDValue X, Y;
5608 SDValue N0 = N->getOperand(0);
5609 SDValue N1 = N->getOperand(1);
5610
5611 if (SDValue Not = isNOT(N0, DAG)) {
5612 X = Not;
5613 Y = N1;
5614 } else if (SDValue Not = isNOT(N1, DAG)) {
5615 X = Not;
5616 Y = N0;
5617 } else
5618 return SDValue();
5619
5620 X = DAG.getBitcast(VT, X);
5621 Y = DAG.getBitcast(VT, Y);
5622 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5623}
5624
5625static bool isConstantSplatVector(SDValue N, APInt &SplatValue,
5626 unsigned MinSizeInBits) {
5629
5630 if (!Node)
5631 return false;
5632
5633 APInt SplatUndef;
5634 unsigned SplatBitSize;
5635 bool HasAnyUndefs;
5636
5637 return Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
5638 HasAnyUndefs, MinSizeInBits,
5639 /*IsBigEndian=*/false);
5640}
5641
5644 const LoongArchSubtarget &Subtarget) {
5645 if (DCI.isBeforeLegalizeOps())
5646 return SDValue();
5647
5648 EVT VT = N->getValueType(0);
5649 if (!VT.isVector())
5650 return SDValue();
5651
5652 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
5653 return SDValue();
5654
5655 EVT EltVT = VT.getVectorElementType();
5656 if (!EltVT.isInteger())
5657 return SDValue();
5658
5659 // match:
5660 //
5661 // add
5662 // (and
5663 // (srl X, shift-1) / X
5664 // 1)
5665 // (srl/sra X, shift)
5666
5667 SDValue Add0 = N->getOperand(0);
5668 SDValue Add1 = N->getOperand(1);
5669 SDValue And;
5670 SDValue Shr;
5671
5672 if (Add0.getOpcode() == ISD::AND) {
5673 And = Add0;
5674 Shr = Add1;
5675 } else if (Add1.getOpcode() == ISD::AND) {
5676 And = Add1;
5677 Shr = Add0;
5678 } else {
5679 return SDValue();
5680 }
5681
5682 // match:
5683 //
5684 // srl/sra X, shift
5685
5686 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
5687 return SDValue();
5688
5689 SDValue X = Shr.getOperand(0);
5690 SDValue Shift = Shr.getOperand(1);
5691 APInt ShiftVal;
5692
5693 if (!isConstantSplatVector(Shift, ShiftVal, EltVT.getSizeInBits()))
5694 return SDValue();
5695
5696 if (ShiftVal == 0)
5697 return SDValue();
5698
5699 // match:
5700 //
5701 // and
5702 // (srl X, shift-1) / X
5703 // 1
5704
5705 SDValue One = And.getOperand(1);
5706 APInt SplatVal;
5707
5708 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
5709 return SDValue();
5710
5711 if (SplatVal != 1)
5712 return SDValue();
5713
5714 if (And.getOperand(0) == X) {
5715 // match:
5716 //
5717 // shift == 1
5718
5719 if (ShiftVal != 1)
5720 return SDValue();
5721 } else {
5722 // match:
5723 //
5724 // srl X, shift-1
5725
5726 SDValue Srl = And.getOperand(0);
5727
5728 if (Srl.getOpcode() != ISD::SRL)
5729 return SDValue();
5730
5731 if (Srl.getOperand(0) != X)
5732 return SDValue();
5733
5734 // match:
5735 //
5736 // shift-1
5737
5738 SDValue ShiftMinus1 = Srl.getOperand(1);
5739
5740 if (!isConstantSplatVector(ShiftMinus1, SplatVal, EltVT.getSizeInBits()))
5741 return SDValue();
5742
5743 if (ShiftVal != (SplatVal + 1))
5744 return SDValue();
5745 }
5746
5747 // We matched a rounded right shift pattern and can lower it
5748 // to a single vector rounded shift instruction.
5749
5750 SDLoc DL(N);
5751 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
5752 : LoongArchISD::VSRAR,
5753 DL, VT, X, Shift);
5754}
5755
5758 const LoongArchSubtarget &Subtarget) {
5759 if (DCI.isBeforeLegalizeOps())
5760 return SDValue();
5761
5762 SDValue FirstOperand = N->getOperand(0);
5763 SDValue SecondOperand = N->getOperand(1);
5764 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5765 EVT ValTy = N->getValueType(0);
5766 SDLoc DL(N);
5767 uint64_t lsb, msb;
5768 unsigned SMIdx, SMLen;
5769 ConstantSDNode *CN;
5770 SDValue NewOperand;
5771 MVT GRLenVT = Subtarget.getGRLenVT();
5772
5773 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5774 return R;
5775
5776 // BSTRPICK requires the 32S feature.
5777 if (!Subtarget.has32S())
5778 return SDValue();
5779
5780 // Op's second operand must be a shifted mask.
5781 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5782 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5783 return SDValue();
5784
5785 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5786 // Pattern match BSTRPICK.
5787 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5788 // => BSTRPICK $dst, $src, msb, lsb
5789 // where msb = lsb + len - 1
5790
5791 // The second operand of the shift must be an immediate.
5792 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5793 return SDValue();
5794
5795 lsb = CN->getZExtValue();
5796
5797 // Return if the shifted mask does not start at bit 0 or the sum of its
5798 // length and lsb exceeds the word's size.
5799 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5800 return SDValue();
5801
5802 NewOperand = FirstOperand.getOperand(0);
5803 } else {
5804 // Pattern match BSTRPICK.
5805 // $dst = and $src, (2**len- 1) , if len > 12
5806 // => BSTRPICK $dst, $src, msb, lsb
5807 // where lsb = 0 and msb = len - 1
5808
5809 // If the mask is <= 0xfff, andi can be used instead.
5810 if (CN->getZExtValue() <= 0xfff)
5811 return SDValue();
5812
5813 // Return if the MSB exceeds.
5814 if (SMIdx + SMLen > ValTy.getSizeInBits())
5815 return SDValue();
5816
5817 if (SMIdx > 0) {
5818 // Omit if the constant has more than 2 uses. This a conservative
5819 // decision. Whether it is a win depends on the HW microarchitecture.
5820 // However it should always be better for 1 and 2 uses.
5821 if (CN->use_size() > 2)
5822 return SDValue();
5823 // Return if the constant can be composed by a single LU12I.W.
5824 if ((CN->getZExtValue() & 0xfff) == 0)
5825 return SDValue();
5826 // Return if the constand can be composed by a single ADDI with
5827 // the zero register.
5828 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5829 return SDValue();
5830 }
5831
5832 lsb = SMIdx;
5833 NewOperand = FirstOperand;
5834 }
5835
5836 msb = lsb + SMLen - 1;
5837 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5838 DAG.getConstant(msb, DL, GRLenVT),
5839 DAG.getConstant(lsb, DL, GRLenVT));
5840 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5841 return NR0;
5842 // Try to optimize to
5843 // bstrpick $Rd, $Rs, msb, lsb
5844 // slli $Rd, $Rd, lsb
5845 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5846 DAG.getConstant(lsb, DL, GRLenVT));
5847}
5848
5851 const LoongArchSubtarget &Subtarget) {
5852 // BSTRPICK requires the 32S feature.
5853 if (!Subtarget.has32S())
5854 return SDValue();
5855
5856 if (DCI.isBeforeLegalizeOps())
5857 return SDValue();
5858
5859 // $dst = srl (and $src, Mask), Shamt
5860 // =>
5861 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5862 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5863 //
5864
5865 SDValue FirstOperand = N->getOperand(0);
5866 ConstantSDNode *CN;
5867 EVT ValTy = N->getValueType(0);
5868 SDLoc DL(N);
5869 MVT GRLenVT = Subtarget.getGRLenVT();
5870 unsigned MaskIdx, MaskLen;
5871 uint64_t Shamt;
5872
5873 // The first operand must be an AND and the second operand of the AND must be
5874 // a shifted mask.
5875 if (FirstOperand.getOpcode() != ISD::AND ||
5876 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5877 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5878 return SDValue();
5879
5880 // The second operand (shift amount) must be an immediate.
5881 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5882 return SDValue();
5883
5884 Shamt = CN->getZExtValue();
5885 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5886 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5887 FirstOperand->getOperand(0),
5888 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5889 DAG.getConstant(Shamt, DL, GRLenVT));
5890
5891 return SDValue();
5892}
5893
5894// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5895// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5896static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5897 unsigned Depth) {
5898 // Limit recursion.
5900 return false;
5901 switch (Src.getOpcode()) {
5902 case ISD::SETCC:
5903 case ISD::TRUNCATE:
5904 return Src.getOperand(0).getValueSizeInBits() == Size;
5905 case ISD::FREEZE:
5906 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5907 case ISD::AND:
5908 case ISD::XOR:
5909 case ISD::OR:
5910 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5911 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5912 case ISD::SELECT:
5913 case ISD::VSELECT:
5914 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5915 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5916 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5917 case ISD::BUILD_VECTOR:
5918 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5919 ISD::isBuildVectorAllOnes(Src.getNode());
5920 }
5921 return false;
5922}
5923
5924// Helper to push sign extension of vXi1 SETCC result through bitops.
5926 SDValue Src, const SDLoc &DL) {
5927 switch (Src.getOpcode()) {
5928 case ISD::SETCC:
5929 case ISD::FREEZE:
5930 case ISD::TRUNCATE:
5931 case ISD::BUILD_VECTOR:
5932 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5933 case ISD::AND:
5934 case ISD::XOR:
5935 case ISD::OR:
5936 return DAG.getNode(
5937 Src.getOpcode(), DL, SExtVT,
5938 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5939 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5940 case ISD::SELECT:
5941 case ISD::VSELECT:
5942 return DAG.getSelect(
5943 DL, SExtVT, Src.getOperand(0),
5944 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5945 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5946 }
5947 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5948}
5949
5950static SDValue
5953 const LoongArchSubtarget &Subtarget) {
5954 SDLoc DL(N);
5955 EVT VT = N->getValueType(0);
5956 SDValue Src = N->getOperand(0);
5957 EVT SrcVT = Src.getValueType();
5958
5959 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5960 return SDValue();
5961
5962 bool UseLASX;
5963 unsigned Opc = ISD::DELETED_NODE;
5964 EVT CmpVT = Src.getOperand(0).getValueType();
5965 EVT EltVT = CmpVT.getVectorElementType();
5966
5967 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5968 UseLASX = false;
5969 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5970 CmpVT.getSizeInBits() == 256)
5971 UseLASX = true;
5972 else
5973 return SDValue();
5974
5975 SDValue SrcN1 = Src.getOperand(1);
5976 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5977 default:
5978 break;
5979 case ISD::SETEQ:
5980 // x == 0 => not (vmsknez.b x)
5981 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5982 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5983 break;
5984 case ISD::SETGT:
5985 // x > -1 => vmskgez.b x
5986 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5987 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5988 break;
5989 case ISD::SETGE:
5990 // x >= 0 => vmskgez.b x
5991 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5992 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5993 break;
5994 case ISD::SETLT:
5995 // x < 0 => vmskltz.{b,h,w,d} x
5996 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5997 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5998 EltVT == MVT::i64))
5999 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6000 break;
6001 case ISD::SETLE:
6002 // x <= -1 => vmskltz.{b,h,w,d} x
6003 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
6004 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6005 EltVT == MVT::i64))
6006 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6007 break;
6008 case ISD::SETNE:
6009 // x != 0 => vmsknez.b x
6010 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6011 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
6012 break;
6013 }
6014
6015 if (Opc == ISD::DELETED_NODE)
6016 return SDValue();
6017
6018 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
6020 V = DAG.getZExtOrTrunc(V, DL, T);
6021 return DAG.getBitcast(VT, V);
6022}
6023
6026 const LoongArchSubtarget &Subtarget) {
6027 SDLoc DL(N);
6028 EVT VT = N->getValueType(0);
6029 SDValue Src = N->getOperand(0);
6030 EVT SrcVT = Src.getValueType();
6031 MVT GRLenVT = Subtarget.getGRLenVT();
6032
6033 if (!DCI.isBeforeLegalizeOps())
6034 return SDValue();
6035
6036 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
6037 return SDValue();
6038
6039 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
6040 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
6041 if (Res)
6042 return Res;
6043
6044 // Generate vXi1 using [X]VMSKLTZ
6045 MVT SExtVT;
6046 unsigned Opc;
6047 bool UseLASX = false;
6048 bool PropagateSExt = false;
6049
6050 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
6051 EVT CmpVT = Src.getOperand(0).getValueType();
6052 if (CmpVT.getSizeInBits() > 256)
6053 return SDValue();
6054 }
6055
6056 switch (SrcVT.getSimpleVT().SimpleTy) {
6057 default:
6058 return SDValue();
6059 case MVT::v2i1:
6060 SExtVT = MVT::v2i64;
6061 break;
6062 case MVT::v4i1:
6063 SExtVT = MVT::v4i32;
6064 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6065 SExtVT = MVT::v4i64;
6066 UseLASX = true;
6067 PropagateSExt = true;
6068 }
6069 break;
6070 case MVT::v8i1:
6071 SExtVT = MVT::v8i16;
6072 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6073 SExtVT = MVT::v8i32;
6074 UseLASX = true;
6075 PropagateSExt = true;
6076 }
6077 break;
6078 case MVT::v16i1:
6079 SExtVT = MVT::v16i8;
6080 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6081 SExtVT = MVT::v16i16;
6082 UseLASX = true;
6083 PropagateSExt = true;
6084 }
6085 break;
6086 case MVT::v32i1:
6087 SExtVT = MVT::v32i8;
6088 UseLASX = true;
6089 break;
6090 };
6091 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
6092 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6093
6094 SDValue V;
6095 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
6096 if (Src.getSimpleValueType() == MVT::v32i8) {
6097 SDValue Lo, Hi;
6098 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
6099 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
6100 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
6101 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
6102 DAG.getShiftAmountConstant(16, GRLenVT, DL));
6103 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
6104 } else if (UseLASX) {
6105 return SDValue();
6106 }
6107 }
6108
6109 if (!V) {
6110 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6111 V = DAG.getNode(Opc, DL, GRLenVT, Src);
6112 }
6113
6115 V = DAG.getZExtOrTrunc(V, DL, T);
6116 return DAG.getBitcast(VT, V);
6117}
6118
6121 const LoongArchSubtarget &Subtarget) {
6122 MVT GRLenVT = Subtarget.getGRLenVT();
6123 EVT ValTy = N->getValueType(0);
6124 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6125 ConstantSDNode *CN0, *CN1;
6126 SDLoc DL(N);
6127 unsigned ValBits = ValTy.getSizeInBits();
6128 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
6129 unsigned Shamt;
6130 bool SwapAndRetried = false;
6131
6132 // BSTRPICK requires the 32S feature.
6133 if (!Subtarget.has32S())
6134 return SDValue();
6135
6136 if (DCI.isBeforeLegalizeOps())
6137 return SDValue();
6138
6139 if (ValBits != 32 && ValBits != 64)
6140 return SDValue();
6141
6142Retry:
6143 // 1st pattern to match BSTRINS:
6144 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
6145 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
6146 // =>
6147 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6148 if (N0.getOpcode() == ISD::AND &&
6149 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6150 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6151 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
6152 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6153 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6154 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
6155 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6156 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6157 (MaskIdx0 + MaskLen0 <= ValBits)) {
6158 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
6159 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6160 N1.getOperand(0).getOperand(0),
6161 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6162 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6163 }
6164
6165 // 2nd pattern to match BSTRINS:
6166 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
6167 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
6168 // =>
6169 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6170 if (N0.getOpcode() == ISD::AND &&
6171 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6172 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6173 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6174 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6175 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6176 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6177 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6178 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
6179 (MaskIdx0 + MaskLen0 <= ValBits)) {
6180 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
6181 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6182 N1.getOperand(0).getOperand(0),
6183 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6184 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6185 }
6186
6187 // 3rd pattern to match BSTRINS:
6188 // R = or (and X, mask0), (and Y, mask1)
6189 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
6190 // =>
6191 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
6192 // where msb = lsb + size - 1
6193 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6194 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6195 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6196 (MaskIdx0 + MaskLen0 <= 64) &&
6197 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
6198 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6199 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
6200 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6201 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
6202 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
6203 DAG.getConstant(ValBits == 32
6204 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6205 : (MaskIdx0 + MaskLen0 - 1),
6206 DL, GRLenVT),
6207 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6208 }
6209
6210 // 4th pattern to match BSTRINS:
6211 // R = or (and X, mask), (shl Y, shamt)
6212 // where mask = (2**shamt - 1)
6213 // =>
6214 // R = BSTRINS X, Y, ValBits - 1, shamt
6215 // where ValBits = 32 or 64
6216 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
6217 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6218 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
6219 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6220 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
6221 (MaskIdx0 + MaskLen0 <= ValBits)) {
6222 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
6223 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6224 N1.getOperand(0),
6225 DAG.getConstant((ValBits - 1), DL, GRLenVT),
6226 DAG.getConstant(Shamt, DL, GRLenVT));
6227 }
6228
6229 // 5th pattern to match BSTRINS:
6230 // R = or (and X, mask), const
6231 // where ~mask = (2**size - 1) << lsb, mask & const = 0
6232 // =>
6233 // R = BSTRINS X, (const >> lsb), msb, lsb
6234 // where msb = lsb + size - 1
6235 if (N0.getOpcode() == ISD::AND &&
6236 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6237 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6238 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
6239 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6240 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
6241 return DAG.getNode(
6242 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6243 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
6244 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6245 : (MaskIdx0 + MaskLen0 - 1),
6246 DL, GRLenVT),
6247 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6248 }
6249
6250 // 6th pattern.
6251 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
6252 // by the incoming bits are known to be zero.
6253 // =>
6254 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
6255 //
6256 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
6257 // pattern is more common than the 1st. So we put the 1st before the 6th in
6258 // order to match as many nodes as possible.
6259 ConstantSDNode *CNMask, *CNShamt;
6260 unsigned MaskIdx, MaskLen;
6261 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6262 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6263 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6264 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6265 CNShamt->getZExtValue() + MaskLen <= ValBits) {
6266 Shamt = CNShamt->getZExtValue();
6267 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
6268 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6269 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
6270 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6271 N1.getOperand(0).getOperand(0),
6272 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
6273 DAG.getConstant(Shamt, DL, GRLenVT));
6274 }
6275 }
6276
6277 // 7th pattern.
6278 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
6279 // overwritten by the incoming bits are known to be zero.
6280 // =>
6281 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
6282 //
6283 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
6284 // before the 7th in order to match as many nodes as possible.
6285 if (N1.getOpcode() == ISD::AND &&
6286 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6287 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6288 N1.getOperand(0).getOpcode() == ISD::SHL &&
6289 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6290 CNShamt->getZExtValue() == MaskIdx) {
6291 APInt ShMask(ValBits, CNMask->getZExtValue());
6292 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6293 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
6294 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6295 N1.getOperand(0).getOperand(0),
6296 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6297 DAG.getConstant(MaskIdx, DL, GRLenVT));
6298 }
6299 }
6300
6301 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
6302 if (!SwapAndRetried) {
6303 std::swap(N0, N1);
6304 SwapAndRetried = true;
6305 goto Retry;
6306 }
6307
6308 SwapAndRetried = false;
6309Retry2:
6310 // 8th pattern.
6311 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
6312 // the incoming bits are known to be zero.
6313 // =>
6314 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
6315 //
6316 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
6317 // we put it here in order to match as many nodes as possible or generate less
6318 // instructions.
6319 if (N1.getOpcode() == ISD::AND &&
6320 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6321 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
6322 APInt ShMask(ValBits, CNMask->getZExtValue());
6323 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6324 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
6325 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6326 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
6327 N1->getOperand(0),
6328 DAG.getConstant(MaskIdx, DL, GRLenVT)),
6329 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6330 DAG.getConstant(MaskIdx, DL, GRLenVT));
6331 }
6332 }
6333 // Swap N0/N1 and retry.
6334 if (!SwapAndRetried) {
6335 std::swap(N0, N1);
6336 SwapAndRetried = true;
6337 goto Retry2;
6338 }
6339
6340 return SDValue();
6341}
6342
6343static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
6344 ExtType = ISD::NON_EXTLOAD;
6345
6346 switch (V.getNode()->getOpcode()) {
6347 case ISD::LOAD: {
6348 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
6349 if ((LoadNode->getMemoryVT() == MVT::i8) ||
6350 (LoadNode->getMemoryVT() == MVT::i16)) {
6351 ExtType = LoadNode->getExtensionType();
6352 return true;
6353 }
6354 return false;
6355 }
6356 case ISD::AssertSext: {
6357 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6358 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6359 ExtType = ISD::SEXTLOAD;
6360 return true;
6361 }
6362 return false;
6363 }
6364 case ISD::AssertZext: {
6365 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6366 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6367 ExtType = ISD::ZEXTLOAD;
6368 return true;
6369 }
6370 return false;
6371 }
6372 default:
6373 return false;
6374 }
6375
6376 return false;
6377}
6378
6379// Eliminate redundant truncation and zero-extension nodes.
6380// * Case 1:
6381// +------------+ +------------+ +------------+
6382// | Input1 | | Input2 | | CC |
6383// +------------+ +------------+ +------------+
6384// | | |
6385// V V +----+
6386// +------------+ +------------+ |
6387// | TRUNCATE | | TRUNCATE | |
6388// +------------+ +------------+ |
6389// | | |
6390// V V |
6391// +------------+ +------------+ |
6392// | ZERO_EXT | | ZERO_EXT | |
6393// +------------+ +------------+ |
6394// | | |
6395// | +-------------+ |
6396// V V | |
6397// +----------------+ | |
6398// | AND | | |
6399// +----------------+ | |
6400// | | |
6401// +---------------+ | |
6402// | | |
6403// V V V
6404// +-------------+
6405// | CMP |
6406// +-------------+
6407// * Case 2:
6408// +------------+ +------------+ +-------------+ +------------+ +------------+
6409// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
6410// +------------+ +------------+ +-------------+ +------------+ +------------+
6411// | | | | |
6412// V | | | |
6413// +------------+ | | | |
6414// | XOR |<---------------------+ | |
6415// +------------+ | | |
6416// | | | |
6417// V V +---------------+ |
6418// +------------+ +------------+ | |
6419// | TRUNCATE | | TRUNCATE | | +-------------------------+
6420// +------------+ +------------+ | |
6421// | | | |
6422// V V | |
6423// +------------+ +------------+ | |
6424// | ZERO_EXT | | ZERO_EXT | | |
6425// +------------+ +------------+ | |
6426// | | | |
6427// V V | |
6428// +----------------+ | |
6429// | AND | | |
6430// +----------------+ | |
6431// | | |
6432// +---------------+ | |
6433// | | |
6434// V V V
6435// +-------------+
6436// | CMP |
6437// +-------------+
6440 const LoongArchSubtarget &Subtarget) {
6441 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
6442
6443 SDNode *AndNode = N->getOperand(0).getNode();
6444 if (AndNode->getOpcode() != ISD::AND)
6445 return SDValue();
6446
6447 SDValue AndInputValue2 = AndNode->getOperand(1);
6448 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
6449 return SDValue();
6450
6451 SDValue CmpInputValue = N->getOperand(1);
6452 SDValue AndInputValue1 = AndNode->getOperand(0);
6453 if (AndInputValue1.getOpcode() == ISD::XOR) {
6454 if (CC != ISD::SETEQ && CC != ISD::SETNE)
6455 return SDValue();
6456 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
6457 if (!CN || !CN->isAllOnes())
6458 return SDValue();
6459 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
6460 if (!CN || !CN->isZero())
6461 return SDValue();
6462 AndInputValue1 = AndInputValue1.getOperand(0);
6463 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
6464 return SDValue();
6465 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
6466 if (AndInputValue2 != CmpInputValue)
6467 return SDValue();
6468 } else {
6469 return SDValue();
6470 }
6471
6472 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
6473 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
6474 return SDValue();
6475
6476 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
6477 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
6478 return SDValue();
6479
6480 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
6481 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
6482 ISD::LoadExtType ExtType1;
6483 ISD::LoadExtType ExtType2;
6484
6485 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
6486 !checkValueWidth(TruncInputValue2, ExtType2))
6487 return SDValue();
6488
6489 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
6490 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
6491 return SDValue();
6492
6493 if ((ExtType2 != ISD::ZEXTLOAD) &&
6494 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
6495 return SDValue();
6496
6497 // These truncation and zero-extension nodes are not necessary, remove them.
6498 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
6499 TruncInputValue1, TruncInputValue2);
6500 SDValue NewSetCC =
6501 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
6502 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
6503 return SDValue(N, 0);
6504}
6505
6506// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
6509 const LoongArchSubtarget &Subtarget) {
6510 if (DCI.isBeforeLegalizeOps())
6511 return SDValue();
6512
6513 SDValue Src = N->getOperand(0);
6514 if (Src.getOpcode() != LoongArchISD::REVB_2W)
6515 return SDValue();
6516
6517 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
6518 Src.getOperand(0));
6519}
6520
6521// Perform common combines for BR_CC and SELECT_CC conditions.
6522static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6523 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6524 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6525
6526 // As far as arithmetic right shift always saves the sign,
6527 // shift can be omitted.
6528 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6529 // setge (sra X, N), 0 -> setge X, 0
6530 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6531 LHS.getOpcode() == ISD::SRA) {
6532 LHS = LHS.getOperand(0);
6533 return true;
6534 }
6535
6536 if (!ISD::isIntEqualitySetCC(CCVal))
6537 return false;
6538
6539 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6540 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6541 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6542 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6543 // If we're looking for eq 0 instead of ne 0, we need to invert the
6544 // condition.
6545 bool Invert = CCVal == ISD::SETEQ;
6546 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6547 if (Invert)
6548 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6549
6550 RHS = LHS.getOperand(1);
6551 LHS = LHS.getOperand(0);
6552 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6553
6554 CC = DAG.getCondCode(CCVal);
6555 return true;
6556 }
6557
6558 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6559 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6560 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6561 SDValue LHS0 = LHS.getOperand(0);
6562 if (LHS0.getOpcode() == ISD::AND &&
6563 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6564 uint64_t Mask = LHS0.getConstantOperandVal(1);
6565 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6566 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6567 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6568 CC = DAG.getCondCode(CCVal);
6569
6570 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6571 LHS = LHS0.getOperand(0);
6572 if (ShAmt != 0)
6573 LHS =
6574 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6575 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6576 return true;
6577 }
6578 }
6579 }
6580
6581 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6582 // This can occur when legalizing some floating point comparisons.
6583 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6584 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6585 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6586 CC = DAG.getCondCode(CCVal);
6587 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6588 return true;
6589 }
6590
6591 return false;
6592}
6593
6596 const LoongArchSubtarget &Subtarget) {
6597 SDValue LHS = N->getOperand(1);
6598 SDValue RHS = N->getOperand(2);
6599 SDValue CC = N->getOperand(3);
6600 SDLoc DL(N);
6601
6602 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6603 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6604 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6605
6606 return SDValue();
6607}
6608
6611 const LoongArchSubtarget &Subtarget) {
6612 // Transform
6613 SDValue LHS = N->getOperand(0);
6614 SDValue RHS = N->getOperand(1);
6615 SDValue CC = N->getOperand(2);
6616 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6617 SDValue TrueV = N->getOperand(3);
6618 SDValue FalseV = N->getOperand(4);
6619 SDLoc DL(N);
6620 EVT VT = N->getValueType(0);
6621
6622 // If the True and False values are the same, we don't need a select_cc.
6623 if (TrueV == FalseV)
6624 return TrueV;
6625
6626 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6627 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6628 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6630 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6631 if (CCVal == ISD::CondCode::SETGE)
6632 std::swap(TrueV, FalseV);
6633
6634 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6635 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6636 // Only handle simm12, if it is not in this range, it can be considered as
6637 // register.
6638 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6639 isInt<12>(TrueSImm - FalseSImm)) {
6640 SDValue SRA =
6641 DAG.getNode(ISD::SRA, DL, VT, LHS,
6642 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6643 SDValue AND =
6644 DAG.getNode(ISD::AND, DL, VT, SRA,
6645 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6646 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6647 }
6648
6649 if (CCVal == ISD::CondCode::SETGE)
6650 std::swap(TrueV, FalseV);
6651 }
6652
6653 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6654 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6655 {LHS, RHS, CC, TrueV, FalseV});
6656
6657 return SDValue();
6658}
6659
6660template <unsigned N>
6662 SelectionDAG &DAG,
6663 const LoongArchSubtarget &Subtarget,
6664 bool IsSigned = false) {
6665 SDLoc DL(Node);
6666 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6667 // Check the ImmArg.
6668 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6669 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6670 DAG.getContext()->emitError(Node->getOperationName(0) +
6671 ": argument out of range.");
6672 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6673 }
6674 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6675}
6676
6677template <unsigned N>
6678static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6679 SelectionDAG &DAG, bool IsSigned = false) {
6680 SDLoc DL(Node);
6681 EVT ResTy = Node->getValueType(0);
6682 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6683
6684 // Check the ImmArg.
6685 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6686 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6687 DAG.getContext()->emitError(Node->getOperationName(0) +
6688 ": argument out of range.");
6689 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6690 }
6691 return DAG.getConstant(
6693 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6694 DL, ResTy);
6695}
6696
6698 SDLoc DL(Node);
6699 EVT ResTy = Node->getValueType(0);
6700 SDValue Vec = Node->getOperand(2);
6701 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6702 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6703}
6704
6706 SDLoc DL(Node);
6707 EVT ResTy = Node->getValueType(0);
6708 SDValue One = DAG.getConstant(1, DL, ResTy);
6709 SDValue Bit =
6710 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6711
6712 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6713 DAG.getNOT(DL, Bit, ResTy));
6714}
6715
6716template <unsigned N>
6718 SDLoc DL(Node);
6719 EVT ResTy = Node->getValueType(0);
6720 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6721 // Check the unsigned ImmArg.
6722 if (!isUInt<N>(CImm->getZExtValue())) {
6723 DAG.getContext()->emitError(Node->getOperationName(0) +
6724 ": argument out of range.");
6725 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6726 }
6727
6728 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6729 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6730
6731 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6732}
6733
6734template <unsigned N>
6736 SDLoc DL(Node);
6737 EVT ResTy = Node->getValueType(0);
6738 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6739 // Check the unsigned ImmArg.
6740 if (!isUInt<N>(CImm->getZExtValue())) {
6741 DAG.getContext()->emitError(Node->getOperationName(0) +
6742 ": argument out of range.");
6743 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6744 }
6745
6746 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6747 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6748 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6749}
6750
6751template <unsigned N>
6753 SDLoc DL(Node);
6754 EVT ResTy = Node->getValueType(0);
6755 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6756 // Check the unsigned ImmArg.
6757 if (!isUInt<N>(CImm->getZExtValue())) {
6758 DAG.getContext()->emitError(Node->getOperationName(0) +
6759 ": argument out of range.");
6760 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6761 }
6762
6763 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6764 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6765 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6766}
6767
6768template <unsigned W>
6770 unsigned ResOp) {
6771 unsigned Imm = N->getConstantOperandVal(2);
6772 if (!isUInt<W>(Imm)) {
6773 const StringRef ErrorMsg = "argument out of range";
6774 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6775 return DAG.getUNDEF(N->getValueType(0));
6776 }
6777 SDLoc DL(N);
6778 SDValue Vec = N->getOperand(1);
6779 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6781 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6782}
6783
6784static SDValue
6787 const LoongArchSubtarget &Subtarget) {
6788 SDLoc DL(N);
6789 switch (N->getConstantOperandVal(0)) {
6790 default:
6791 break;
6792 case Intrinsic::loongarch_lsx_vadd_b:
6793 case Intrinsic::loongarch_lsx_vadd_h:
6794 case Intrinsic::loongarch_lsx_vadd_w:
6795 case Intrinsic::loongarch_lsx_vadd_d:
6796 case Intrinsic::loongarch_lasx_xvadd_b:
6797 case Intrinsic::loongarch_lasx_xvadd_h:
6798 case Intrinsic::loongarch_lasx_xvadd_w:
6799 case Intrinsic::loongarch_lasx_xvadd_d:
6800 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6801 N->getOperand(2));
6802 case Intrinsic::loongarch_lsx_vaddi_bu:
6803 case Intrinsic::loongarch_lsx_vaddi_hu:
6804 case Intrinsic::loongarch_lsx_vaddi_wu:
6805 case Intrinsic::loongarch_lsx_vaddi_du:
6806 case Intrinsic::loongarch_lasx_xvaddi_bu:
6807 case Intrinsic::loongarch_lasx_xvaddi_hu:
6808 case Intrinsic::loongarch_lasx_xvaddi_wu:
6809 case Intrinsic::loongarch_lasx_xvaddi_du:
6810 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6811 lowerVectorSplatImm<5>(N, 2, DAG));
6812 case Intrinsic::loongarch_lsx_vsub_b:
6813 case Intrinsic::loongarch_lsx_vsub_h:
6814 case Intrinsic::loongarch_lsx_vsub_w:
6815 case Intrinsic::loongarch_lsx_vsub_d:
6816 case Intrinsic::loongarch_lasx_xvsub_b:
6817 case Intrinsic::loongarch_lasx_xvsub_h:
6818 case Intrinsic::loongarch_lasx_xvsub_w:
6819 case Intrinsic::loongarch_lasx_xvsub_d:
6820 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6821 N->getOperand(2));
6822 case Intrinsic::loongarch_lsx_vsubi_bu:
6823 case Intrinsic::loongarch_lsx_vsubi_hu:
6824 case Intrinsic::loongarch_lsx_vsubi_wu:
6825 case Intrinsic::loongarch_lsx_vsubi_du:
6826 case Intrinsic::loongarch_lasx_xvsubi_bu:
6827 case Intrinsic::loongarch_lasx_xvsubi_hu:
6828 case Intrinsic::loongarch_lasx_xvsubi_wu:
6829 case Intrinsic::loongarch_lasx_xvsubi_du:
6830 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6831 lowerVectorSplatImm<5>(N, 2, DAG));
6832 case Intrinsic::loongarch_lsx_vneg_b:
6833 case Intrinsic::loongarch_lsx_vneg_h:
6834 case Intrinsic::loongarch_lsx_vneg_w:
6835 case Intrinsic::loongarch_lsx_vneg_d:
6836 case Intrinsic::loongarch_lasx_xvneg_b:
6837 case Intrinsic::loongarch_lasx_xvneg_h:
6838 case Intrinsic::loongarch_lasx_xvneg_w:
6839 case Intrinsic::loongarch_lasx_xvneg_d:
6840 return DAG.getNode(
6841 ISD::SUB, DL, N->getValueType(0),
6842 DAG.getConstant(
6843 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6844 /*isSigned=*/true),
6845 SDLoc(N), N->getValueType(0)),
6846 N->getOperand(1));
6847 case Intrinsic::loongarch_lsx_vmax_b:
6848 case Intrinsic::loongarch_lsx_vmax_h:
6849 case Intrinsic::loongarch_lsx_vmax_w:
6850 case Intrinsic::loongarch_lsx_vmax_d:
6851 case Intrinsic::loongarch_lasx_xvmax_b:
6852 case Intrinsic::loongarch_lasx_xvmax_h:
6853 case Intrinsic::loongarch_lasx_xvmax_w:
6854 case Intrinsic::loongarch_lasx_xvmax_d:
6855 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6856 N->getOperand(2));
6857 case Intrinsic::loongarch_lsx_vmax_bu:
6858 case Intrinsic::loongarch_lsx_vmax_hu:
6859 case Intrinsic::loongarch_lsx_vmax_wu:
6860 case Intrinsic::loongarch_lsx_vmax_du:
6861 case Intrinsic::loongarch_lasx_xvmax_bu:
6862 case Intrinsic::loongarch_lasx_xvmax_hu:
6863 case Intrinsic::loongarch_lasx_xvmax_wu:
6864 case Intrinsic::loongarch_lasx_xvmax_du:
6865 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6866 N->getOperand(2));
6867 case Intrinsic::loongarch_lsx_vmaxi_b:
6868 case Intrinsic::loongarch_lsx_vmaxi_h:
6869 case Intrinsic::loongarch_lsx_vmaxi_w:
6870 case Intrinsic::loongarch_lsx_vmaxi_d:
6871 case Intrinsic::loongarch_lasx_xvmaxi_b:
6872 case Intrinsic::loongarch_lasx_xvmaxi_h:
6873 case Intrinsic::loongarch_lasx_xvmaxi_w:
6874 case Intrinsic::loongarch_lasx_xvmaxi_d:
6875 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6876 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6877 case Intrinsic::loongarch_lsx_vmaxi_bu:
6878 case Intrinsic::loongarch_lsx_vmaxi_hu:
6879 case Intrinsic::loongarch_lsx_vmaxi_wu:
6880 case Intrinsic::loongarch_lsx_vmaxi_du:
6881 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6882 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6883 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6884 case Intrinsic::loongarch_lasx_xvmaxi_du:
6885 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6886 lowerVectorSplatImm<5>(N, 2, DAG));
6887 case Intrinsic::loongarch_lsx_vmin_b:
6888 case Intrinsic::loongarch_lsx_vmin_h:
6889 case Intrinsic::loongarch_lsx_vmin_w:
6890 case Intrinsic::loongarch_lsx_vmin_d:
6891 case Intrinsic::loongarch_lasx_xvmin_b:
6892 case Intrinsic::loongarch_lasx_xvmin_h:
6893 case Intrinsic::loongarch_lasx_xvmin_w:
6894 case Intrinsic::loongarch_lasx_xvmin_d:
6895 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6896 N->getOperand(2));
6897 case Intrinsic::loongarch_lsx_vmin_bu:
6898 case Intrinsic::loongarch_lsx_vmin_hu:
6899 case Intrinsic::loongarch_lsx_vmin_wu:
6900 case Intrinsic::loongarch_lsx_vmin_du:
6901 case Intrinsic::loongarch_lasx_xvmin_bu:
6902 case Intrinsic::loongarch_lasx_xvmin_hu:
6903 case Intrinsic::loongarch_lasx_xvmin_wu:
6904 case Intrinsic::loongarch_lasx_xvmin_du:
6905 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6906 N->getOperand(2));
6907 case Intrinsic::loongarch_lsx_vmini_b:
6908 case Intrinsic::loongarch_lsx_vmini_h:
6909 case Intrinsic::loongarch_lsx_vmini_w:
6910 case Intrinsic::loongarch_lsx_vmini_d:
6911 case Intrinsic::loongarch_lasx_xvmini_b:
6912 case Intrinsic::loongarch_lasx_xvmini_h:
6913 case Intrinsic::loongarch_lasx_xvmini_w:
6914 case Intrinsic::loongarch_lasx_xvmini_d:
6915 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6916 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6917 case Intrinsic::loongarch_lsx_vmini_bu:
6918 case Intrinsic::loongarch_lsx_vmini_hu:
6919 case Intrinsic::loongarch_lsx_vmini_wu:
6920 case Intrinsic::loongarch_lsx_vmini_du:
6921 case Intrinsic::loongarch_lasx_xvmini_bu:
6922 case Intrinsic::loongarch_lasx_xvmini_hu:
6923 case Intrinsic::loongarch_lasx_xvmini_wu:
6924 case Intrinsic::loongarch_lasx_xvmini_du:
6925 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6926 lowerVectorSplatImm<5>(N, 2, DAG));
6927 case Intrinsic::loongarch_lsx_vmul_b:
6928 case Intrinsic::loongarch_lsx_vmul_h:
6929 case Intrinsic::loongarch_lsx_vmul_w:
6930 case Intrinsic::loongarch_lsx_vmul_d:
6931 case Intrinsic::loongarch_lasx_xvmul_b:
6932 case Intrinsic::loongarch_lasx_xvmul_h:
6933 case Intrinsic::loongarch_lasx_xvmul_w:
6934 case Intrinsic::loongarch_lasx_xvmul_d:
6935 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6936 N->getOperand(2));
6937 case Intrinsic::loongarch_lsx_vmadd_b:
6938 case Intrinsic::loongarch_lsx_vmadd_h:
6939 case Intrinsic::loongarch_lsx_vmadd_w:
6940 case Intrinsic::loongarch_lsx_vmadd_d:
6941 case Intrinsic::loongarch_lasx_xvmadd_b:
6942 case Intrinsic::loongarch_lasx_xvmadd_h:
6943 case Intrinsic::loongarch_lasx_xvmadd_w:
6944 case Intrinsic::loongarch_lasx_xvmadd_d: {
6945 EVT ResTy = N->getValueType(0);
6946 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6947 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6948 N->getOperand(3)));
6949 }
6950 case Intrinsic::loongarch_lsx_vmsub_b:
6951 case Intrinsic::loongarch_lsx_vmsub_h:
6952 case Intrinsic::loongarch_lsx_vmsub_w:
6953 case Intrinsic::loongarch_lsx_vmsub_d:
6954 case Intrinsic::loongarch_lasx_xvmsub_b:
6955 case Intrinsic::loongarch_lasx_xvmsub_h:
6956 case Intrinsic::loongarch_lasx_xvmsub_w:
6957 case Intrinsic::loongarch_lasx_xvmsub_d: {
6958 EVT ResTy = N->getValueType(0);
6959 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6960 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6961 N->getOperand(3)));
6962 }
6963 case Intrinsic::loongarch_lsx_vdiv_b:
6964 case Intrinsic::loongarch_lsx_vdiv_h:
6965 case Intrinsic::loongarch_lsx_vdiv_w:
6966 case Intrinsic::loongarch_lsx_vdiv_d:
6967 case Intrinsic::loongarch_lasx_xvdiv_b:
6968 case Intrinsic::loongarch_lasx_xvdiv_h:
6969 case Intrinsic::loongarch_lasx_xvdiv_w:
6970 case Intrinsic::loongarch_lasx_xvdiv_d:
6971 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6972 N->getOperand(2));
6973 case Intrinsic::loongarch_lsx_vdiv_bu:
6974 case Intrinsic::loongarch_lsx_vdiv_hu:
6975 case Intrinsic::loongarch_lsx_vdiv_wu:
6976 case Intrinsic::loongarch_lsx_vdiv_du:
6977 case Intrinsic::loongarch_lasx_xvdiv_bu:
6978 case Intrinsic::loongarch_lasx_xvdiv_hu:
6979 case Intrinsic::loongarch_lasx_xvdiv_wu:
6980 case Intrinsic::loongarch_lasx_xvdiv_du:
6981 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6982 N->getOperand(2));
6983 case Intrinsic::loongarch_lsx_vmod_b:
6984 case Intrinsic::loongarch_lsx_vmod_h:
6985 case Intrinsic::loongarch_lsx_vmod_w:
6986 case Intrinsic::loongarch_lsx_vmod_d:
6987 case Intrinsic::loongarch_lasx_xvmod_b:
6988 case Intrinsic::loongarch_lasx_xvmod_h:
6989 case Intrinsic::loongarch_lasx_xvmod_w:
6990 case Intrinsic::loongarch_lasx_xvmod_d:
6991 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6992 N->getOperand(2));
6993 case Intrinsic::loongarch_lsx_vmod_bu:
6994 case Intrinsic::loongarch_lsx_vmod_hu:
6995 case Intrinsic::loongarch_lsx_vmod_wu:
6996 case Intrinsic::loongarch_lsx_vmod_du:
6997 case Intrinsic::loongarch_lasx_xvmod_bu:
6998 case Intrinsic::loongarch_lasx_xvmod_hu:
6999 case Intrinsic::loongarch_lasx_xvmod_wu:
7000 case Intrinsic::loongarch_lasx_xvmod_du:
7001 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
7002 N->getOperand(2));
7003 case Intrinsic::loongarch_lsx_vand_v:
7004 case Intrinsic::loongarch_lasx_xvand_v:
7005 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7006 N->getOperand(2));
7007 case Intrinsic::loongarch_lsx_vor_v:
7008 case Intrinsic::loongarch_lasx_xvor_v:
7009 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7010 N->getOperand(2));
7011 case Intrinsic::loongarch_lsx_vxor_v:
7012 case Intrinsic::loongarch_lasx_xvxor_v:
7013 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7014 N->getOperand(2));
7015 case Intrinsic::loongarch_lsx_vnor_v:
7016 case Intrinsic::loongarch_lasx_xvnor_v: {
7017 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7018 N->getOperand(2));
7019 return DAG.getNOT(DL, Res, Res->getValueType(0));
7020 }
7021 case Intrinsic::loongarch_lsx_vandi_b:
7022 case Intrinsic::loongarch_lasx_xvandi_b:
7023 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7024 lowerVectorSplatImm<8>(N, 2, DAG));
7025 case Intrinsic::loongarch_lsx_vori_b:
7026 case Intrinsic::loongarch_lasx_xvori_b:
7027 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7028 lowerVectorSplatImm<8>(N, 2, DAG));
7029 case Intrinsic::loongarch_lsx_vxori_b:
7030 case Intrinsic::loongarch_lasx_xvxori_b:
7031 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7032 lowerVectorSplatImm<8>(N, 2, DAG));
7033 case Intrinsic::loongarch_lsx_vsll_b:
7034 case Intrinsic::loongarch_lsx_vsll_h:
7035 case Intrinsic::loongarch_lsx_vsll_w:
7036 case Intrinsic::loongarch_lsx_vsll_d:
7037 case Intrinsic::loongarch_lasx_xvsll_b:
7038 case Intrinsic::loongarch_lasx_xvsll_h:
7039 case Intrinsic::loongarch_lasx_xvsll_w:
7040 case Intrinsic::loongarch_lasx_xvsll_d:
7041 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7042 truncateVecElts(N, DAG));
7043 case Intrinsic::loongarch_lsx_vslli_b:
7044 case Intrinsic::loongarch_lasx_xvslli_b:
7045 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7046 lowerVectorSplatImm<3>(N, 2, DAG));
7047 case Intrinsic::loongarch_lsx_vslli_h:
7048 case Intrinsic::loongarch_lasx_xvslli_h:
7049 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7050 lowerVectorSplatImm<4>(N, 2, DAG));
7051 case Intrinsic::loongarch_lsx_vslli_w:
7052 case Intrinsic::loongarch_lasx_xvslli_w:
7053 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7054 lowerVectorSplatImm<5>(N, 2, DAG));
7055 case Intrinsic::loongarch_lsx_vslli_d:
7056 case Intrinsic::loongarch_lasx_xvslli_d:
7057 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7058 lowerVectorSplatImm<6>(N, 2, DAG));
7059 case Intrinsic::loongarch_lsx_vsrl_b:
7060 case Intrinsic::loongarch_lsx_vsrl_h:
7061 case Intrinsic::loongarch_lsx_vsrl_w:
7062 case Intrinsic::loongarch_lsx_vsrl_d:
7063 case Intrinsic::loongarch_lasx_xvsrl_b:
7064 case Intrinsic::loongarch_lasx_xvsrl_h:
7065 case Intrinsic::loongarch_lasx_xvsrl_w:
7066 case Intrinsic::loongarch_lasx_xvsrl_d:
7067 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7068 truncateVecElts(N, DAG));
7069 case Intrinsic::loongarch_lsx_vsrli_b:
7070 case Intrinsic::loongarch_lasx_xvsrli_b:
7071 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7072 lowerVectorSplatImm<3>(N, 2, DAG));
7073 case Intrinsic::loongarch_lsx_vsrli_h:
7074 case Intrinsic::loongarch_lasx_xvsrli_h:
7075 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7076 lowerVectorSplatImm<4>(N, 2, DAG));
7077 case Intrinsic::loongarch_lsx_vsrli_w:
7078 case Intrinsic::loongarch_lasx_xvsrli_w:
7079 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7080 lowerVectorSplatImm<5>(N, 2, DAG));
7081 case Intrinsic::loongarch_lsx_vsrli_d:
7082 case Intrinsic::loongarch_lasx_xvsrli_d:
7083 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7084 lowerVectorSplatImm<6>(N, 2, DAG));
7085 case Intrinsic::loongarch_lsx_vsra_b:
7086 case Intrinsic::loongarch_lsx_vsra_h:
7087 case Intrinsic::loongarch_lsx_vsra_w:
7088 case Intrinsic::loongarch_lsx_vsra_d:
7089 case Intrinsic::loongarch_lasx_xvsra_b:
7090 case Intrinsic::loongarch_lasx_xvsra_h:
7091 case Intrinsic::loongarch_lasx_xvsra_w:
7092 case Intrinsic::loongarch_lasx_xvsra_d:
7093 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7094 truncateVecElts(N, DAG));
7095 case Intrinsic::loongarch_lsx_vsrai_b:
7096 case Intrinsic::loongarch_lasx_xvsrai_b:
7097 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7098 lowerVectorSplatImm<3>(N, 2, DAG));
7099 case Intrinsic::loongarch_lsx_vsrai_h:
7100 case Intrinsic::loongarch_lasx_xvsrai_h:
7101 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7102 lowerVectorSplatImm<4>(N, 2, DAG));
7103 case Intrinsic::loongarch_lsx_vsrai_w:
7104 case Intrinsic::loongarch_lasx_xvsrai_w:
7105 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7106 lowerVectorSplatImm<5>(N, 2, DAG));
7107 case Intrinsic::loongarch_lsx_vsrai_d:
7108 case Intrinsic::loongarch_lasx_xvsrai_d:
7109 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7110 lowerVectorSplatImm<6>(N, 2, DAG));
7111 case Intrinsic::loongarch_lsx_vclz_b:
7112 case Intrinsic::loongarch_lsx_vclz_h:
7113 case Intrinsic::loongarch_lsx_vclz_w:
7114 case Intrinsic::loongarch_lsx_vclz_d:
7115 case Intrinsic::loongarch_lasx_xvclz_b:
7116 case Intrinsic::loongarch_lasx_xvclz_h:
7117 case Intrinsic::loongarch_lasx_xvclz_w:
7118 case Intrinsic::loongarch_lasx_xvclz_d:
7119 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
7120 case Intrinsic::loongarch_lsx_vpcnt_b:
7121 case Intrinsic::loongarch_lsx_vpcnt_h:
7122 case Intrinsic::loongarch_lsx_vpcnt_w:
7123 case Intrinsic::loongarch_lsx_vpcnt_d:
7124 case Intrinsic::loongarch_lasx_xvpcnt_b:
7125 case Intrinsic::loongarch_lasx_xvpcnt_h:
7126 case Intrinsic::loongarch_lasx_xvpcnt_w:
7127 case Intrinsic::loongarch_lasx_xvpcnt_d:
7128 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
7129 case Intrinsic::loongarch_lsx_vbitclr_b:
7130 case Intrinsic::loongarch_lsx_vbitclr_h:
7131 case Intrinsic::loongarch_lsx_vbitclr_w:
7132 case Intrinsic::loongarch_lsx_vbitclr_d:
7133 case Intrinsic::loongarch_lasx_xvbitclr_b:
7134 case Intrinsic::loongarch_lasx_xvbitclr_h:
7135 case Intrinsic::loongarch_lasx_xvbitclr_w:
7136 case Intrinsic::loongarch_lasx_xvbitclr_d:
7137 return lowerVectorBitClear(N, DAG);
7138 case Intrinsic::loongarch_lsx_vbitclri_b:
7139 case Intrinsic::loongarch_lasx_xvbitclri_b:
7140 return lowerVectorBitClearImm<3>(N, DAG);
7141 case Intrinsic::loongarch_lsx_vbitclri_h:
7142 case Intrinsic::loongarch_lasx_xvbitclri_h:
7143 return lowerVectorBitClearImm<4>(N, DAG);
7144 case Intrinsic::loongarch_lsx_vbitclri_w:
7145 case Intrinsic::loongarch_lasx_xvbitclri_w:
7146 return lowerVectorBitClearImm<5>(N, DAG);
7147 case Intrinsic::loongarch_lsx_vbitclri_d:
7148 case Intrinsic::loongarch_lasx_xvbitclri_d:
7149 return lowerVectorBitClearImm<6>(N, DAG);
7150 case Intrinsic::loongarch_lsx_vbitset_b:
7151 case Intrinsic::loongarch_lsx_vbitset_h:
7152 case Intrinsic::loongarch_lsx_vbitset_w:
7153 case Intrinsic::loongarch_lsx_vbitset_d:
7154 case Intrinsic::loongarch_lasx_xvbitset_b:
7155 case Intrinsic::loongarch_lasx_xvbitset_h:
7156 case Intrinsic::loongarch_lasx_xvbitset_w:
7157 case Intrinsic::loongarch_lasx_xvbitset_d: {
7158 EVT VecTy = N->getValueType(0);
7159 SDValue One = DAG.getConstant(1, DL, VecTy);
7160 return DAG.getNode(
7161 ISD::OR, DL, VecTy, N->getOperand(1),
7162 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7163 }
7164 case Intrinsic::loongarch_lsx_vbitseti_b:
7165 case Intrinsic::loongarch_lasx_xvbitseti_b:
7166 return lowerVectorBitSetImm<3>(N, DAG);
7167 case Intrinsic::loongarch_lsx_vbitseti_h:
7168 case Intrinsic::loongarch_lasx_xvbitseti_h:
7169 return lowerVectorBitSetImm<4>(N, DAG);
7170 case Intrinsic::loongarch_lsx_vbitseti_w:
7171 case Intrinsic::loongarch_lasx_xvbitseti_w:
7172 return lowerVectorBitSetImm<5>(N, DAG);
7173 case Intrinsic::loongarch_lsx_vbitseti_d:
7174 case Intrinsic::loongarch_lasx_xvbitseti_d:
7175 return lowerVectorBitSetImm<6>(N, DAG);
7176 case Intrinsic::loongarch_lsx_vbitrev_b:
7177 case Intrinsic::loongarch_lsx_vbitrev_h:
7178 case Intrinsic::loongarch_lsx_vbitrev_w:
7179 case Intrinsic::loongarch_lsx_vbitrev_d:
7180 case Intrinsic::loongarch_lasx_xvbitrev_b:
7181 case Intrinsic::loongarch_lasx_xvbitrev_h:
7182 case Intrinsic::loongarch_lasx_xvbitrev_w:
7183 case Intrinsic::loongarch_lasx_xvbitrev_d: {
7184 EVT VecTy = N->getValueType(0);
7185 SDValue One = DAG.getConstant(1, DL, VecTy);
7186 return DAG.getNode(
7187 ISD::XOR, DL, VecTy, N->getOperand(1),
7188 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7189 }
7190 case Intrinsic::loongarch_lsx_vbitrevi_b:
7191 case Intrinsic::loongarch_lasx_xvbitrevi_b:
7192 return lowerVectorBitRevImm<3>(N, DAG);
7193 case Intrinsic::loongarch_lsx_vbitrevi_h:
7194 case Intrinsic::loongarch_lasx_xvbitrevi_h:
7195 return lowerVectorBitRevImm<4>(N, DAG);
7196 case Intrinsic::loongarch_lsx_vbitrevi_w:
7197 case Intrinsic::loongarch_lasx_xvbitrevi_w:
7198 return lowerVectorBitRevImm<5>(N, DAG);
7199 case Intrinsic::loongarch_lsx_vbitrevi_d:
7200 case Intrinsic::loongarch_lasx_xvbitrevi_d:
7201 return lowerVectorBitRevImm<6>(N, DAG);
7202 case Intrinsic::loongarch_lsx_vfadd_s:
7203 case Intrinsic::loongarch_lsx_vfadd_d:
7204 case Intrinsic::loongarch_lasx_xvfadd_s:
7205 case Intrinsic::loongarch_lasx_xvfadd_d:
7206 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
7207 N->getOperand(2));
7208 case Intrinsic::loongarch_lsx_vfsub_s:
7209 case Intrinsic::loongarch_lsx_vfsub_d:
7210 case Intrinsic::loongarch_lasx_xvfsub_s:
7211 case Intrinsic::loongarch_lasx_xvfsub_d:
7212 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
7213 N->getOperand(2));
7214 case Intrinsic::loongarch_lsx_vfmul_s:
7215 case Intrinsic::loongarch_lsx_vfmul_d:
7216 case Intrinsic::loongarch_lasx_xvfmul_s:
7217 case Intrinsic::loongarch_lasx_xvfmul_d:
7218 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
7219 N->getOperand(2));
7220 case Intrinsic::loongarch_lsx_vfdiv_s:
7221 case Intrinsic::loongarch_lsx_vfdiv_d:
7222 case Intrinsic::loongarch_lasx_xvfdiv_s:
7223 case Intrinsic::loongarch_lasx_xvfdiv_d:
7224 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
7225 N->getOperand(2));
7226 case Intrinsic::loongarch_lsx_vfmadd_s:
7227 case Intrinsic::loongarch_lsx_vfmadd_d:
7228 case Intrinsic::loongarch_lasx_xvfmadd_s:
7229 case Intrinsic::loongarch_lasx_xvfmadd_d:
7230 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
7231 N->getOperand(2), N->getOperand(3));
7232 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
7233 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7234 N->getOperand(1), N->getOperand(2),
7235 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
7236 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
7237 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
7238 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7239 N->getOperand(1), N->getOperand(2),
7240 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
7241 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
7242 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
7243 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7244 N->getOperand(1), N->getOperand(2),
7245 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
7246 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
7247 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7248 N->getOperand(1), N->getOperand(2),
7249 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
7250 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
7251 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
7252 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
7253 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
7254 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
7255 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
7256 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
7257 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
7258 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
7259 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7260 N->getOperand(1)));
7261 case Intrinsic::loongarch_lsx_vreplve_b:
7262 case Intrinsic::loongarch_lsx_vreplve_h:
7263 case Intrinsic::loongarch_lsx_vreplve_w:
7264 case Intrinsic::loongarch_lsx_vreplve_d:
7265 case Intrinsic::loongarch_lasx_xvreplve_b:
7266 case Intrinsic::loongarch_lasx_xvreplve_h:
7267 case Intrinsic::loongarch_lasx_xvreplve_w:
7268 case Intrinsic::loongarch_lasx_xvreplve_d:
7269 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
7270 N->getOperand(1),
7271 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7272 N->getOperand(2)));
7273 case Intrinsic::loongarch_lsx_vpickve2gr_b:
7274 if (!Subtarget.is64Bit())
7275 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7276 break;
7277 case Intrinsic::loongarch_lsx_vpickve2gr_h:
7278 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
7279 if (!Subtarget.is64Bit())
7280 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7281 break;
7282 case Intrinsic::loongarch_lsx_vpickve2gr_w:
7283 if (!Subtarget.is64Bit())
7284 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7285 break;
7286 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
7287 if (!Subtarget.is64Bit())
7288 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7289 break;
7290 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
7291 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
7292 if (!Subtarget.is64Bit())
7293 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7294 break;
7295 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
7296 if (!Subtarget.is64Bit())
7297 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7298 break;
7299 case Intrinsic::loongarch_lsx_bz_b:
7300 case Intrinsic::loongarch_lsx_bz_h:
7301 case Intrinsic::loongarch_lsx_bz_w:
7302 case Intrinsic::loongarch_lsx_bz_d:
7303 case Intrinsic::loongarch_lasx_xbz_b:
7304 case Intrinsic::loongarch_lasx_xbz_h:
7305 case Intrinsic::loongarch_lasx_xbz_w:
7306 case Intrinsic::loongarch_lasx_xbz_d:
7307 if (!Subtarget.is64Bit())
7308 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
7309 N->getOperand(1));
7310 break;
7311 case Intrinsic::loongarch_lsx_bz_v:
7312 case Intrinsic::loongarch_lasx_xbz_v:
7313 if (!Subtarget.is64Bit())
7314 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
7315 N->getOperand(1));
7316 break;
7317 case Intrinsic::loongarch_lsx_bnz_b:
7318 case Intrinsic::loongarch_lsx_bnz_h:
7319 case Intrinsic::loongarch_lsx_bnz_w:
7320 case Intrinsic::loongarch_lsx_bnz_d:
7321 case Intrinsic::loongarch_lasx_xbnz_b:
7322 case Intrinsic::loongarch_lasx_xbnz_h:
7323 case Intrinsic::loongarch_lasx_xbnz_w:
7324 case Intrinsic::loongarch_lasx_xbnz_d:
7325 if (!Subtarget.is64Bit())
7326 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
7327 N->getOperand(1));
7328 break;
7329 case Intrinsic::loongarch_lsx_bnz_v:
7330 case Intrinsic::loongarch_lasx_xbnz_v:
7331 if (!Subtarget.is64Bit())
7332 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
7333 N->getOperand(1));
7334 break;
7335 case Intrinsic::loongarch_lasx_concat_128_s:
7336 case Intrinsic::loongarch_lasx_concat_128_d:
7337 case Intrinsic::loongarch_lasx_concat_128:
7338 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
7339 N->getOperand(1), N->getOperand(2));
7340 }
7341 return SDValue();
7342}
7343
7346 const LoongArchSubtarget &Subtarget) {
7347 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
7348 // conversion is unnecessary and can be replaced with the
7349 // MOVFR2GR_S_LA64 operand.
7350 SDValue Op0 = N->getOperand(0);
7351 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
7352 return Op0.getOperand(0);
7353 return SDValue();
7354}
7355
7358 const LoongArchSubtarget &Subtarget) {
7359 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
7360 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
7361 // operand.
7362 SDValue Op0 = N->getOperand(0);
7363 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
7364 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
7365 "Unexpected value type!");
7366 return Op0.getOperand(0);
7367 }
7368 return SDValue();
7369}
7370
7373 const LoongArchSubtarget &Subtarget) {
7374 MVT VT = N->getSimpleValueType(0);
7375 unsigned NumBits = VT.getScalarSizeInBits();
7376
7377 // Simplify the inputs.
7378 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7379 APInt DemandedMask(APInt::getAllOnes(NumBits));
7380 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
7381 return SDValue(N, 0);
7382
7383 return SDValue();
7384}
7385
7386static SDValue
7389 const LoongArchSubtarget &Subtarget) {
7390 SDValue Op0 = N->getOperand(0);
7391 SDLoc DL(N);
7392
7393 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
7394 // redundant. Instead, use BuildPairF64's operands directly.
7395 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
7396 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7397
7398 if (Op0->isUndef()) {
7399 SDValue Lo = DAG.getUNDEF(MVT::i32);
7400 SDValue Hi = DAG.getUNDEF(MVT::i32);
7401 return DCI.CombineTo(N, Lo, Hi);
7402 }
7403
7404 // It's cheaper to materialise two 32-bit integers than to load a double
7405 // from the constant pool and transfer it to integer registers through the
7406 // stack.
7408 APInt V = C->getValueAPF().bitcastToAPInt();
7409 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7410 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7411 return DCI.CombineTo(N, Lo, Hi);
7412 }
7413
7414 return SDValue();
7415}
7416
7417/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
7420 const LoongArchSubtarget &Subtarget) {
7421 SDValue N0 = N->getOperand(0);
7422 SDValue N1 = N->getOperand(1);
7423 MVT VT = N->getSimpleValueType(0);
7424 SDLoc DL(N);
7425
7426 // VANDN(undef, x) -> 0
7427 // VANDN(x, undef) -> 0
7428 if (N0.isUndef() || N1.isUndef())
7429 return DAG.getConstant(0, DL, VT);
7430
7431 // VANDN(0, x) -> x
7433 return N1;
7434
7435 // VANDN(x, 0) -> 0
7437 return DAG.getConstant(0, DL, VT);
7438
7439 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
7441 return DAG.getNOT(DL, N0, VT);
7442
7443 // Turn VANDN back to AND if input is inverted.
7444 if (SDValue Not = isNOT(N0, DAG))
7445 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
7446
7447 // Folds for better commutativity:
7448 if (N1->hasOneUse()) {
7449 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
7450 if (SDValue Not = isNOT(N1, DAG))
7451 return DAG.getNOT(
7452 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
7453
7454 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
7455 // -> NOT(OR(x, SplatVector(-Imm))
7456 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
7457 // gain benefits.
7458 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
7459 N1.getOpcode() == ISD::BUILD_VECTOR) {
7460 if (SDValue SplatValue =
7461 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
7462 if (!N1->isOnlyUserOf(SplatValue.getNode()))
7463 return SDValue();
7464
7465 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
7466 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
7467 SDValue Not =
7468 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
7469 return DAG.getNOT(
7470 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
7471 VT);
7472 }
7473 }
7474 }
7475 }
7476
7477 return SDValue();
7478}
7479
7482 const LoongArchSubtarget &Subtarget) {
7483 SDLoc DL(N);
7484 EVT VT = N->getValueType(0);
7485
7486 if (VT != MVT::f32 && VT != MVT::f64)
7487 return SDValue();
7488 if (VT == MVT::f32 && !Subtarget.hasBasicF())
7489 return SDValue();
7490 if (VT == MVT::f64 && !Subtarget.hasBasicD())
7491 return SDValue();
7492
7493 // Only optimize when the source and destination types have the same width.
7494 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
7495 return SDValue();
7496
7497 SDValue Src = N->getOperand(0);
7498 // If the result of an integer load is only used by an integer-to-float
7499 // conversion, use a fp load instead. This eliminates an integer-to-float-move
7500 // (movgr2fr) instruction.
7501 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
7502 // Do not change the width of a volatile load. This condition check is
7503 // inspired by AArch64.
7504 !cast<LoadSDNode>(Src)->isVolatile()) {
7505 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
7506 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
7507 LN0->getPointerInfo(), LN0->getAlign(),
7508 LN0->getMemOperand()->getFlags());
7509
7510 // Make sure successors of the original load stay after it by updating them
7511 // to use the new Chain.
7512 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
7513 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
7514 }
7515
7516 return SDValue();
7517}
7518
7519// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
7520// logical operations, like in the example below.
7521// or (and (truncate x, truncate y)),
7522// (xor (truncate z, build_vector (constants)))
7523// Given a target type \p VT, we generate
7524// or (and x, y), (xor z, zext(build_vector (constants)))
7525// given x, y and z are of type \p VT. We can do so, if operands are either
7526// truncates from VT types, the second operand is a vector of constants, can
7527// be recursively promoted or is an existing extension we can extend further.
7529 SelectionDAG &DAG,
7530 const LoongArchSubtarget &Subtarget,
7531 unsigned Depth) {
7532 // Limit recursion to avoid excessive compile times.
7534 return SDValue();
7535
7536 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
7537 return SDValue();
7538
7539 SDValue N0 = N.getOperand(0);
7540 SDValue N1 = N.getOperand(1);
7541
7542 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7543 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
7544 return SDValue();
7545
7546 if (SDValue NN0 =
7547 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
7548 N0 = NN0;
7549 else {
7550 // The left side has to be a 'trunc'.
7551 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
7552 N0.getOperand(0).getValueType() == VT;
7553 if (LHSTrunc)
7554 N0 = N0.getOperand(0);
7555 else
7556 return SDValue();
7557 }
7558
7559 if (SDValue NN1 =
7560 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
7561 N1 = NN1;
7562 else {
7563 // The right side has to be a 'trunc', a (foldable) constant or an
7564 // existing extension we can extend further.
7565 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
7566 N1.getOperand(0).getValueType() == VT;
7567 if (RHSTrunc)
7568 N1 = N1.getOperand(0);
7569 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
7570 Subtarget.hasExtLASX() && N1.hasOneUse())
7571 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
7572 // On 32-bit platform, i64 is an illegal integer scalar type, and
7573 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
7574 // future.
7575 else if (SDValue Cst =
7577 N1 = Cst;
7578 else
7579 return SDValue();
7580 }
7581
7582 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
7583}
7584
7585// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
7586// is LSX-sized register. In most cases we actually compare or select LASX-sized
7587// registers and mixing the two types creates horrible code. This method
7588// optimizes some of the transition sequences.
7590 SelectionDAG &DAG,
7591 const LoongArchSubtarget &Subtarget) {
7592 EVT VT = N.getValueType();
7593 assert(VT.isVector() && "Expected vector type");
7594 assert((N.getOpcode() == ISD::ANY_EXTEND ||
7595 N.getOpcode() == ISD::ZERO_EXTEND ||
7596 N.getOpcode() == ISD::SIGN_EXTEND) &&
7597 "Invalid Node");
7598
7599 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
7600 return SDValue();
7601
7602 SDValue Narrow = N.getOperand(0);
7603 EVT NarrowVT = Narrow.getValueType();
7604
7605 // Generate the wide operation.
7606 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
7607 if (!Op)
7608 return SDValue();
7609 switch (N.getOpcode()) {
7610 default:
7611 llvm_unreachable("Unexpected opcode");
7612 case ISD::ANY_EXTEND:
7613 return Op;
7614 case ISD::ZERO_EXTEND:
7615 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
7616 case ISD::SIGN_EXTEND:
7617 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7618 DAG.getValueType(NarrowVT));
7619 }
7620}
7621
7624 const LoongArchSubtarget &Subtarget) {
7625 EVT VT = N->getValueType(0);
7626 SDLoc DL(N);
7627
7628 if (VT.isVector())
7629 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
7630 return R;
7631
7632 return SDValue();
7633}
7634
7635static SDValue
7638 const LoongArchSubtarget &Subtarget) {
7639 SDLoc DL(N);
7640 EVT VT = N->getValueType(0);
7641
7642 if (VT.isVector() && N->getNumOperands() == 2)
7643 if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
7644 return R;
7645
7646 return SDValue();
7647}
7648
7651 const LoongArchSubtarget &Subtarget) {
7652 if (DCI.isBeforeLegalizeOps())
7653 return SDValue();
7654
7655 EVT VT = N->getValueType(0);
7656 if (!VT.isVector())
7657 return SDValue();
7658
7659 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7660 return SDValue();
7661
7662 EVT EltVT = VT.getVectorElementType();
7663 if (!EltVT.isInteger())
7664 return SDValue();
7665
7666 SDValue Cond = N->getOperand(0);
7667 SDValue TrueVal = N->getOperand(1);
7668 SDValue FalseVal = N->getOperand(2);
7669
7670 // match:
7671 //
7672 // vselect (setcc shift, 0, seteq),
7673 // x,
7674 // rounded_shift
7675
7676 if (Cond.getOpcode() != ISD::SETCC)
7677 return SDValue();
7678
7679 if (!ISD::isConstantSplatVectorAllZeros(Cond.getOperand(1).getNode()))
7680 return SDValue();
7681
7682 auto *CC = cast<CondCodeSDNode>(Cond.getOperand(2));
7683 if (CC->get() != ISD::SETEQ)
7684 return SDValue();
7685
7686 SDValue Shift = Cond.getOperand(0);
7687
7688 // True branch must be original value:
7689 //
7690 // vselect cond, x, ...
7691
7692 SDValue X = TrueVal;
7693
7694 // Now match rounded shift pattern:
7695 //
7696 // add
7697 // (and
7698 // (srl X, shift-1)
7699 // 1)
7700 // (srl/sra X, shift)
7701
7702 if (FalseVal.getOpcode() != ISD::ADD)
7703 return SDValue();
7704
7705 SDValue Add0 = FalseVal.getOperand(0);
7706 SDValue Add1 = FalseVal.getOperand(1);
7707 SDValue And;
7708 SDValue Shr;
7709
7710 if (Add0.getOpcode() == ISD::AND) {
7711 And = Add0;
7712 Shr = Add1;
7713 } else if (Add1.getOpcode() == ISD::AND) {
7714 And = Add1;
7715 Shr = Add0;
7716 } else {
7717 return SDValue();
7718 }
7719
7720 // match:
7721 //
7722 // srl/sra X, shift
7723
7724 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
7725 return SDValue();
7726
7727 if (Shr.getOperand(0) != X)
7728 return SDValue();
7729
7730 if (Shr.getOperand(1) != Shift)
7731 return SDValue();
7732
7733 // match:
7734 //
7735 // and
7736 // (srl X, shift-1)
7737 // 1
7738
7739 SDValue Srl = And.getOperand(0);
7740 SDValue One = And.getOperand(1);
7741 APInt SplatVal;
7742
7743 if (Srl.getOpcode() != ISD::SRL)
7744 return SDValue();
7745
7746 One = peekThroughBitcasts(One);
7747 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
7748 return SDValue();
7749
7750 if (SplatVal != 1)
7751 return SDValue();
7752
7753 if (Srl.getOperand(0) != X)
7754 return SDValue();
7755
7756 // match:
7757 //
7758 // shift-1
7759
7760 SDValue ShiftMinus1 = Srl.getOperand(1);
7761
7762 if (ShiftMinus1.getOpcode() != ISD::ADD)
7763 return SDValue();
7764
7765 if (ShiftMinus1.getOperand(0) != Shift)
7766 return SDValue();
7767
7769 return SDValue();
7770
7771 // We matched a rounded right shift pattern and can lower it
7772 // to a single vector rounded shift instruction.
7773
7774 SDLoc DL(N);
7775 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
7776 : LoongArchISD::VSRAR,
7777 DL, VT, X, Shift);
7778}
7779
7781 DAGCombinerInfo &DCI) const {
7782 SelectionDAG &DAG = DCI.DAG;
7783 switch (N->getOpcode()) {
7784 default:
7785 break;
7786 case ISD::ADD:
7787 return performADDCombine(N, DAG, DCI, Subtarget);
7788 case ISD::AND:
7789 return performANDCombine(N, DAG, DCI, Subtarget);
7790 case ISD::OR:
7791 return performORCombine(N, DAG, DCI, Subtarget);
7792 case ISD::SETCC:
7793 return performSETCCCombine(N, DAG, DCI, Subtarget);
7794 case ISD::SRL:
7795 return performSRLCombine(N, DAG, DCI, Subtarget);
7796 case ISD::BITCAST:
7797 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7798 case ISD::ANY_EXTEND:
7799 case ISD::ZERO_EXTEND:
7800 case ISD::SIGN_EXTEND:
7801 return performEXTENDCombine(N, DAG, DCI, Subtarget);
7802 case ISD::SINT_TO_FP:
7803 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7804 case LoongArchISD::BITREV_W:
7805 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7806 case LoongArchISD::BR_CC:
7807 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7808 case LoongArchISD::SELECT_CC:
7809 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7811 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7812 case LoongArchISD::MOVGR2FR_W_LA64:
7813 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7814 case LoongArchISD::MOVFR2GR_S_LA64:
7815 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7816 case LoongArchISD::VMSKLTZ:
7817 case LoongArchISD::XVMSKLTZ:
7818 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7819 case LoongArchISD::SPLIT_PAIR_F64:
7820 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7821 case LoongArchISD::VANDN:
7822 return performVANDNCombine(N, DAG, DCI, Subtarget);
7824 return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
7825 case ISD::VSELECT:
7826 return performVSELECTCombine(N, DAG, DCI, Subtarget);
7827 case LoongArchISD::VPACKEV:
7828 if (SDValue Result =
7829 combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
7830 return Result;
7831 }
7832 return SDValue();
7833}
7834
7837 if (!ZeroDivCheck)
7838 return MBB;
7839
7840 // Build instructions:
7841 // MBB:
7842 // div(or mod) $dst, $dividend, $divisor
7843 // bne $divisor, $zero, SinkMBB
7844 // BreakMBB:
7845 // break 7 // BRK_DIVZERO
7846 // SinkMBB:
7847 // fallthrough
7848 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7849 MachineFunction::iterator It = ++MBB->getIterator();
7850 MachineFunction *MF = MBB->getParent();
7851 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7852 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7853 MF->insert(It, BreakMBB);
7854 MF->insert(It, SinkMBB);
7855
7856 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7857 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7858 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7859
7860 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7861 DebugLoc DL = MI.getDebugLoc();
7862 MachineOperand &Divisor = MI.getOperand(2);
7863 Register DivisorReg = Divisor.getReg();
7864
7865 // MBB:
7866 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7867 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7868 .addReg(LoongArch::R0)
7869 .addMBB(SinkMBB);
7870 MBB->addSuccessor(BreakMBB);
7871 MBB->addSuccessor(SinkMBB);
7872
7873 // BreakMBB:
7874 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7875 // definition of BRK_DIVZERO.
7876 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7877 BreakMBB->addSuccessor(SinkMBB);
7878
7879 // Clear Divisor's kill flag.
7880 Divisor.setIsKill(false);
7881
7882 return SinkMBB;
7883}
7884
7885static MachineBasicBlock *
7887 const LoongArchSubtarget &Subtarget) {
7888 unsigned CondOpc;
7889 switch (MI.getOpcode()) {
7890 default:
7891 llvm_unreachable("Unexpected opcode");
7892 case LoongArch::PseudoVBZ:
7893 CondOpc = LoongArch::VSETEQZ_V;
7894 break;
7895 case LoongArch::PseudoVBZ_B:
7896 CondOpc = LoongArch::VSETANYEQZ_B;
7897 break;
7898 case LoongArch::PseudoVBZ_H:
7899 CondOpc = LoongArch::VSETANYEQZ_H;
7900 break;
7901 case LoongArch::PseudoVBZ_W:
7902 CondOpc = LoongArch::VSETANYEQZ_W;
7903 break;
7904 case LoongArch::PseudoVBZ_D:
7905 CondOpc = LoongArch::VSETANYEQZ_D;
7906 break;
7907 case LoongArch::PseudoVBNZ:
7908 CondOpc = LoongArch::VSETNEZ_V;
7909 break;
7910 case LoongArch::PseudoVBNZ_B:
7911 CondOpc = LoongArch::VSETALLNEZ_B;
7912 break;
7913 case LoongArch::PseudoVBNZ_H:
7914 CondOpc = LoongArch::VSETALLNEZ_H;
7915 break;
7916 case LoongArch::PseudoVBNZ_W:
7917 CondOpc = LoongArch::VSETALLNEZ_W;
7918 break;
7919 case LoongArch::PseudoVBNZ_D:
7920 CondOpc = LoongArch::VSETALLNEZ_D;
7921 break;
7922 case LoongArch::PseudoXVBZ:
7923 CondOpc = LoongArch::XVSETEQZ_V;
7924 break;
7925 case LoongArch::PseudoXVBZ_B:
7926 CondOpc = LoongArch::XVSETANYEQZ_B;
7927 break;
7928 case LoongArch::PseudoXVBZ_H:
7929 CondOpc = LoongArch::XVSETANYEQZ_H;
7930 break;
7931 case LoongArch::PseudoXVBZ_W:
7932 CondOpc = LoongArch::XVSETANYEQZ_W;
7933 break;
7934 case LoongArch::PseudoXVBZ_D:
7935 CondOpc = LoongArch::XVSETANYEQZ_D;
7936 break;
7937 case LoongArch::PseudoXVBNZ:
7938 CondOpc = LoongArch::XVSETNEZ_V;
7939 break;
7940 case LoongArch::PseudoXVBNZ_B:
7941 CondOpc = LoongArch::XVSETALLNEZ_B;
7942 break;
7943 case LoongArch::PseudoXVBNZ_H:
7944 CondOpc = LoongArch::XVSETALLNEZ_H;
7945 break;
7946 case LoongArch::PseudoXVBNZ_W:
7947 CondOpc = LoongArch::XVSETALLNEZ_W;
7948 break;
7949 case LoongArch::PseudoXVBNZ_D:
7950 CondOpc = LoongArch::XVSETALLNEZ_D;
7951 break;
7952 }
7953
7954 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7955 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7956 DebugLoc DL = MI.getDebugLoc();
7959
7960 MachineFunction *F = BB->getParent();
7961 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7962 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7963 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7964
7965 F->insert(It, FalseBB);
7966 F->insert(It, TrueBB);
7967 F->insert(It, SinkBB);
7968
7969 // Transfer the remainder of MBB and its successor edges to Sink.
7970 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7972
7973 // Insert the real instruction to BB.
7974 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7975 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7976
7977 // Insert branch.
7978 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7979 BB->addSuccessor(FalseBB);
7980 BB->addSuccessor(TrueBB);
7981
7982 // FalseBB.
7983 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7984 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7985 .addReg(LoongArch::R0)
7986 .addImm(0);
7987 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7988 FalseBB->addSuccessor(SinkBB);
7989
7990 // TrueBB.
7991 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7992 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7993 .addReg(LoongArch::R0)
7994 .addImm(1);
7995 TrueBB->addSuccessor(SinkBB);
7996
7997 // SinkBB: merge the results.
7998 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7999 MI.getOperand(0).getReg())
8000 .addReg(RD1)
8001 .addMBB(FalseBB)
8002 .addReg(RD2)
8003 .addMBB(TrueBB);
8004
8005 // The pseudo instruction is gone now.
8006 MI.eraseFromParent();
8007 return SinkBB;
8008}
8009
8010static MachineBasicBlock *
8012 const LoongArchSubtarget &Subtarget) {
8013 unsigned InsOp;
8014 unsigned BroadcastOp;
8015 unsigned HalfSize;
8016 switch (MI.getOpcode()) {
8017 default:
8018 llvm_unreachable("Unexpected opcode");
8019 case LoongArch::PseudoXVINSGR2VR_B:
8020 HalfSize = 16;
8021 BroadcastOp = LoongArch::XVREPLGR2VR_B;
8022 InsOp = LoongArch::XVEXTRINS_B;
8023 break;
8024 case LoongArch::PseudoXVINSGR2VR_H:
8025 HalfSize = 8;
8026 BroadcastOp = LoongArch::XVREPLGR2VR_H;
8027 InsOp = LoongArch::XVEXTRINS_H;
8028 break;
8029 }
8030 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8031 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
8032 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
8033 DebugLoc DL = MI.getDebugLoc();
8035 // XDst = vector_insert XSrc, Elt, Idx
8036 Register XDst = MI.getOperand(0).getReg();
8037 Register XSrc = MI.getOperand(1).getReg();
8038 Register Elt = MI.getOperand(2).getReg();
8039 unsigned Idx = MI.getOperand(3).getImm();
8040
8041 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
8042 Idx < HalfSize) {
8043 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
8044 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
8045
8046 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
8047 .addReg(XSrc, {}, LoongArch::sub_128);
8048 BuildMI(*BB, MI, DL,
8049 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
8050 : LoongArch::VINSGR2VR_B),
8051 ScratchSubReg2)
8052 .addReg(ScratchSubReg1)
8053 .addReg(Elt)
8054 .addImm(Idx);
8055
8056 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
8057 .addReg(ScratchSubReg2)
8058 .addImm(LoongArch::sub_128);
8059 } else {
8060 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8061 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8062
8063 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
8064
8065 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
8066 .addReg(ScratchReg1)
8067 .addReg(XSrc)
8068 .addImm(Idx >= HalfSize ? 48 : 18);
8069
8070 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
8071 .addReg(XSrc)
8072 .addReg(ScratchReg2)
8073 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
8074 }
8075
8076 MI.eraseFromParent();
8077 return BB;
8078}
8079
8082 const LoongArchSubtarget &Subtarget) {
8083 assert(Subtarget.hasExtLSX());
8084 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8085 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8086 DebugLoc DL = MI.getDebugLoc();
8088 Register Dst = MI.getOperand(0).getReg();
8089 Register Src = MI.getOperand(1).getReg();
8090 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8091 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8092 Register ScratchReg3 = MRI.createVirtualRegister(RC);
8093
8094 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
8095 BuildMI(*BB, MI, DL,
8096 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
8097 : LoongArch::VINSGR2VR_W),
8098 ScratchReg2)
8099 .addReg(ScratchReg1)
8100 .addReg(Src)
8101 .addImm(0);
8102 BuildMI(
8103 *BB, MI, DL,
8104 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
8105 ScratchReg3)
8106 .addReg(ScratchReg2);
8107 BuildMI(*BB, MI, DL,
8108 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
8109 : LoongArch::VPICKVE2GR_W),
8110 Dst)
8111 .addReg(ScratchReg3)
8112 .addImm(0);
8113
8114 MI.eraseFromParent();
8115 return BB;
8116}
8117
8118static MachineBasicBlock *
8120 const LoongArchSubtarget &Subtarget) {
8121 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8122 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8123 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8125 Register Dst = MI.getOperand(0).getReg();
8126 Register Src = MI.getOperand(1).getReg();
8127 DebugLoc DL = MI.getDebugLoc();
8128 unsigned EleBits = 8;
8129 unsigned NotOpc = 0;
8130 unsigned MskOpc;
8131
8132 switch (MI.getOpcode()) {
8133 default:
8134 llvm_unreachable("Unexpected opcode");
8135 case LoongArch::PseudoVMSKLTZ_B:
8136 MskOpc = LoongArch::VMSKLTZ_B;
8137 break;
8138 case LoongArch::PseudoVMSKLTZ_H:
8139 MskOpc = LoongArch::VMSKLTZ_H;
8140 EleBits = 16;
8141 break;
8142 case LoongArch::PseudoVMSKLTZ_W:
8143 MskOpc = LoongArch::VMSKLTZ_W;
8144 EleBits = 32;
8145 break;
8146 case LoongArch::PseudoVMSKLTZ_D:
8147 MskOpc = LoongArch::VMSKLTZ_D;
8148 EleBits = 64;
8149 break;
8150 case LoongArch::PseudoVMSKGEZ_B:
8151 MskOpc = LoongArch::VMSKGEZ_B;
8152 break;
8153 case LoongArch::PseudoVMSKEQZ_B:
8154 MskOpc = LoongArch::VMSKNZ_B;
8155 NotOpc = LoongArch::VNOR_V;
8156 break;
8157 case LoongArch::PseudoVMSKNEZ_B:
8158 MskOpc = LoongArch::VMSKNZ_B;
8159 break;
8160 case LoongArch::PseudoXVMSKLTZ_B:
8161 MskOpc = LoongArch::XVMSKLTZ_B;
8162 RC = &LoongArch::LASX256RegClass;
8163 break;
8164 case LoongArch::PseudoXVMSKLTZ_H:
8165 MskOpc = LoongArch::XVMSKLTZ_H;
8166 RC = &LoongArch::LASX256RegClass;
8167 EleBits = 16;
8168 break;
8169 case LoongArch::PseudoXVMSKLTZ_W:
8170 MskOpc = LoongArch::XVMSKLTZ_W;
8171 RC = &LoongArch::LASX256RegClass;
8172 EleBits = 32;
8173 break;
8174 case LoongArch::PseudoXVMSKLTZ_D:
8175 MskOpc = LoongArch::XVMSKLTZ_D;
8176 RC = &LoongArch::LASX256RegClass;
8177 EleBits = 64;
8178 break;
8179 case LoongArch::PseudoXVMSKGEZ_B:
8180 MskOpc = LoongArch::XVMSKGEZ_B;
8181 RC = &LoongArch::LASX256RegClass;
8182 break;
8183 case LoongArch::PseudoXVMSKEQZ_B:
8184 MskOpc = LoongArch::XVMSKNZ_B;
8185 NotOpc = LoongArch::XVNOR_V;
8186 RC = &LoongArch::LASX256RegClass;
8187 break;
8188 case LoongArch::PseudoXVMSKNEZ_B:
8189 MskOpc = LoongArch::XVMSKNZ_B;
8190 RC = &LoongArch::LASX256RegClass;
8191 break;
8192 }
8193
8194 Register Msk = MRI.createVirtualRegister(RC);
8195 if (NotOpc) {
8196 Register Tmp = MRI.createVirtualRegister(RC);
8197 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
8198 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
8199 .addReg(Tmp, RegState::Kill)
8200 .addReg(Tmp, RegState::Kill);
8201 } else {
8202 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
8203 }
8204
8205 if (TRI->getRegSizeInBits(*RC) > 128) {
8206 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8207 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8208 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
8209 .addReg(Msk)
8210 .addImm(0);
8211 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
8212 .addReg(Msk, RegState::Kill)
8213 .addImm(4);
8214 BuildMI(*BB, MI, DL,
8215 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
8216 : LoongArch::BSTRINS_W),
8217 Dst)
8220 .addImm(256 / EleBits - 1)
8221 .addImm(128 / EleBits);
8222 } else {
8223 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
8224 .addReg(Msk, RegState::Kill)
8225 .addImm(0);
8226 }
8227
8228 MI.eraseFromParent();
8229 return BB;
8230}
8231
8232static MachineBasicBlock *
8234 const LoongArchSubtarget &Subtarget) {
8235 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
8236 "Unexpected instruction");
8237
8238 MachineFunction &MF = *BB->getParent();
8239 DebugLoc DL = MI.getDebugLoc();
8241 Register LoReg = MI.getOperand(0).getReg();
8242 Register HiReg = MI.getOperand(1).getReg();
8243 Register SrcReg = MI.getOperand(2).getReg();
8244
8245 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
8246 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
8247 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
8248 MI.eraseFromParent(); // The pseudo instruction is gone now.
8249 return BB;
8250}
8251
8252static MachineBasicBlock *
8254 const LoongArchSubtarget &Subtarget) {
8255 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
8256 "Unexpected instruction");
8257
8258 MachineFunction &MF = *BB->getParent();
8259 DebugLoc DL = MI.getDebugLoc();
8262 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
8263 Register DstReg = MI.getOperand(0).getReg();
8264 Register LoReg = MI.getOperand(1).getReg();
8265 Register HiReg = MI.getOperand(2).getReg();
8266
8267 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
8268 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
8269 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
8270 .addReg(TmpReg, RegState::Kill)
8271 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
8272 MI.eraseFromParent(); // The pseudo instruction is gone now.
8273 return BB;
8274}
8275
8277 switch (MI.getOpcode()) {
8278 default:
8279 return false;
8280 case LoongArch::Select_GPR_Using_CC_GPR:
8281 return true;
8282 }
8283}
8284
8285static MachineBasicBlock *
8287 const LoongArchSubtarget &Subtarget) {
8288 // To "insert" Select_* instructions, we actually have to insert the triangle
8289 // control-flow pattern. The incoming instructions know the destination vreg
8290 // to set, the condition code register to branch on, the true/false values to
8291 // select between, and the condcode to use to select the appropriate branch.
8292 //
8293 // We produce the following control flow:
8294 // HeadMBB
8295 // | \
8296 // | IfFalseMBB
8297 // | /
8298 // TailMBB
8299 //
8300 // When we find a sequence of selects we attempt to optimize their emission
8301 // by sharing the control flow. Currently we only handle cases where we have
8302 // multiple selects with the exact same condition (same LHS, RHS and CC).
8303 // The selects may be interleaved with other instructions if the other
8304 // instructions meet some requirements we deem safe:
8305 // - They are not pseudo instructions.
8306 // - They are debug instructions. Otherwise,
8307 // - They do not have side-effects, do not access memory and their inputs do
8308 // not depend on the results of the select pseudo-instructions.
8309 // The TrueV/FalseV operands of the selects cannot depend on the result of
8310 // previous selects in the sequence.
8311 // These conditions could be further relaxed. See the X86 target for a
8312 // related approach and more information.
8313
8314 Register LHS = MI.getOperand(1).getReg();
8315 Register RHS;
8316 if (MI.getOperand(2).isReg())
8317 RHS = MI.getOperand(2).getReg();
8318 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
8319
8320 SmallVector<MachineInstr *, 4> SelectDebugValues;
8321 SmallSet<Register, 4> SelectDests;
8322 SelectDests.insert(MI.getOperand(0).getReg());
8323
8324 MachineInstr *LastSelectPseudo = &MI;
8325 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
8326 SequenceMBBI != E; ++SequenceMBBI) {
8327 if (SequenceMBBI->isDebugInstr())
8328 continue;
8329 if (isSelectPseudo(*SequenceMBBI)) {
8330 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
8331 !SequenceMBBI->getOperand(2).isReg() ||
8332 SequenceMBBI->getOperand(2).getReg() != RHS ||
8333 SequenceMBBI->getOperand(3).getImm() != CC ||
8334 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
8335 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
8336 break;
8337 LastSelectPseudo = &*SequenceMBBI;
8338 SequenceMBBI->collectDebugValues(SelectDebugValues);
8339 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
8340 continue;
8341 }
8342 if (SequenceMBBI->hasUnmodeledSideEffects() ||
8343 SequenceMBBI->mayLoadOrStore() ||
8344 SequenceMBBI->usesCustomInsertionHook())
8345 break;
8346 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
8347 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
8348 }))
8349 break;
8350 }
8351
8352 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
8353 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8354 DebugLoc DL = MI.getDebugLoc();
8356
8357 MachineBasicBlock *HeadMBB = BB;
8358 MachineFunction *F = BB->getParent();
8359 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
8360 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
8361
8362 F->insert(I, IfFalseMBB);
8363 F->insert(I, TailMBB);
8364
8365 // Set the call frame size on entry to the new basic blocks.
8366 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
8367 IfFalseMBB->setCallFrameSize(CallFrameSize);
8368 TailMBB->setCallFrameSize(CallFrameSize);
8369
8370 // Transfer debug instructions associated with the selects to TailMBB.
8371 for (MachineInstr *DebugInstr : SelectDebugValues) {
8372 TailMBB->push_back(DebugInstr->removeFromParent());
8373 }
8374
8375 // Move all instructions after the sequence to TailMBB.
8376 TailMBB->splice(TailMBB->end(), HeadMBB,
8377 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
8378 // Update machine-CFG edges by transferring all successors of the current
8379 // block to the new block which will contain the Phi nodes for the selects.
8380 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
8381 // Set the successors for HeadMBB.
8382 HeadMBB->addSuccessor(IfFalseMBB);
8383 HeadMBB->addSuccessor(TailMBB);
8384
8385 // Insert appropriate branch.
8386 if (MI.getOperand(2).isImm())
8387 BuildMI(HeadMBB, DL, TII.get(CC))
8388 .addReg(LHS)
8389 .addImm(MI.getOperand(2).getImm())
8390 .addMBB(TailMBB);
8391 else
8392 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
8393
8394 // IfFalseMBB just falls through to TailMBB.
8395 IfFalseMBB->addSuccessor(TailMBB);
8396
8397 // Create PHIs for all of the select pseudo-instructions.
8398 auto SelectMBBI = MI.getIterator();
8399 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
8400 auto InsertionPoint = TailMBB->begin();
8401 while (SelectMBBI != SelectEnd) {
8402 auto Next = std::next(SelectMBBI);
8403 if (isSelectPseudo(*SelectMBBI)) {
8404 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
8405 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
8406 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
8407 .addReg(SelectMBBI->getOperand(4).getReg())
8408 .addMBB(HeadMBB)
8409 .addReg(SelectMBBI->getOperand(5).getReg())
8410 .addMBB(IfFalseMBB);
8411 SelectMBBI->eraseFromParent();
8412 }
8413 SelectMBBI = Next;
8414 }
8415
8416 F->getProperties().resetNoPHIs();
8417 return TailMBB;
8418}
8419
8420MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
8421 MachineInstr &MI, MachineBasicBlock *BB) const {
8422 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8423 DebugLoc DL = MI.getDebugLoc();
8424
8425 switch (MI.getOpcode()) {
8426 default:
8427 llvm_unreachable("Unexpected instr type to insert");
8428 case LoongArch::DIV_W:
8429 case LoongArch::DIV_WU:
8430 case LoongArch::MOD_W:
8431 case LoongArch::MOD_WU:
8432 case LoongArch::DIV_D:
8433 case LoongArch::DIV_DU:
8434 case LoongArch::MOD_D:
8435 case LoongArch::MOD_DU:
8436 return insertDivByZeroTrap(MI, BB);
8437 break;
8438 case LoongArch::WRFCSR: {
8439 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
8440 LoongArch::FCSR0 + MI.getOperand(0).getImm())
8441 .addReg(MI.getOperand(1).getReg());
8442 MI.eraseFromParent();
8443 return BB;
8444 }
8445 case LoongArch::RDFCSR: {
8446 MachineInstr *ReadFCSR =
8447 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
8448 MI.getOperand(0).getReg())
8449 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
8450 ReadFCSR->getOperand(1).setIsUndef();
8451 MI.eraseFromParent();
8452 return BB;
8453 }
8454 case LoongArch::Select_GPR_Using_CC_GPR:
8455 return emitSelectPseudo(MI, BB, Subtarget);
8456 case LoongArch::BuildPairF64Pseudo:
8457 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
8458 case LoongArch::SplitPairF64Pseudo:
8459 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
8460 case LoongArch::PseudoVBZ:
8461 case LoongArch::PseudoVBZ_B:
8462 case LoongArch::PseudoVBZ_H:
8463 case LoongArch::PseudoVBZ_W:
8464 case LoongArch::PseudoVBZ_D:
8465 case LoongArch::PseudoVBNZ:
8466 case LoongArch::PseudoVBNZ_B:
8467 case LoongArch::PseudoVBNZ_H:
8468 case LoongArch::PseudoVBNZ_W:
8469 case LoongArch::PseudoVBNZ_D:
8470 case LoongArch::PseudoXVBZ:
8471 case LoongArch::PseudoXVBZ_B:
8472 case LoongArch::PseudoXVBZ_H:
8473 case LoongArch::PseudoXVBZ_W:
8474 case LoongArch::PseudoXVBZ_D:
8475 case LoongArch::PseudoXVBNZ:
8476 case LoongArch::PseudoXVBNZ_B:
8477 case LoongArch::PseudoXVBNZ_H:
8478 case LoongArch::PseudoXVBNZ_W:
8479 case LoongArch::PseudoXVBNZ_D:
8480 return emitVecCondBranchPseudo(MI, BB, Subtarget);
8481 case LoongArch::PseudoXVINSGR2VR_B:
8482 case LoongArch::PseudoXVINSGR2VR_H:
8483 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
8484 case LoongArch::PseudoCTPOP:
8485 return emitPseudoCTPOP(MI, BB, Subtarget);
8486 case LoongArch::PseudoVMSKLTZ_B:
8487 case LoongArch::PseudoVMSKLTZ_H:
8488 case LoongArch::PseudoVMSKLTZ_W:
8489 case LoongArch::PseudoVMSKLTZ_D:
8490 case LoongArch::PseudoVMSKGEZ_B:
8491 case LoongArch::PseudoVMSKEQZ_B:
8492 case LoongArch::PseudoVMSKNEZ_B:
8493 case LoongArch::PseudoXVMSKLTZ_B:
8494 case LoongArch::PseudoXVMSKLTZ_H:
8495 case LoongArch::PseudoXVMSKLTZ_W:
8496 case LoongArch::PseudoXVMSKLTZ_D:
8497 case LoongArch::PseudoXVMSKGEZ_B:
8498 case LoongArch::PseudoXVMSKEQZ_B:
8499 case LoongArch::PseudoXVMSKNEZ_B:
8500 return emitPseudoVMSKCOND(MI, BB, Subtarget);
8501 case TargetOpcode::STATEPOINT:
8502 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
8503 // while bl call instruction (where statepoint will be lowered at the
8504 // end) has implicit def. This def is early-clobber as it will be set at
8505 // the moment of the call and earlier than any use is read.
8506 // Add this implicit dead def here as a workaround.
8507 MI.addOperand(*MI.getMF(),
8509 LoongArch::R1, /*isDef*/ true,
8510 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
8511 /*isUndef*/ false, /*isEarlyClobber*/ true));
8512 if (!Subtarget.is64Bit())
8513 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
8514 return emitPatchPoint(MI, BB);
8515 }
8516}
8517
8519 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
8520 unsigned *Fast) const {
8521 if (!Subtarget.hasUAL())
8522 return false;
8523
8524 // TODO: set reasonable speed number.
8525 if (Fast)
8526 *Fast = 1;
8527 return true;
8528}
8529
8530//===----------------------------------------------------------------------===//
8531// Calling Convention Implementation
8532//===----------------------------------------------------------------------===//
8533
8534// Eight general-purpose registers a0-a7 used for passing integer arguments,
8535// with a0-a1 reused to return values. Generally, the GPRs are used to pass
8536// fixed-point arguments, and floating-point arguments when no FPR is available
8537// or with soft float ABI.
8538const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
8539 LoongArch::R7, LoongArch::R8, LoongArch::R9,
8540 LoongArch::R10, LoongArch::R11};
8541
8542// PreserveNone calling convention:
8543// Arguments may be passed in any general-purpose registers except:
8544// - R1 : return address register
8545// - R22 : frame pointer
8546// - R31 : base pointer
8547//
8548// All general-purpose registers are treated as caller-saved,
8549// except R1 (RA) and R22 (FP).
8550//
8551// Non-volatile registers are allocated first so that a function
8552// can call normal functions without having to spill and reload
8553// argument registers.
8555 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
8556 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
8557 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
8558 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
8559 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
8560 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
8561 LoongArch::R20};
8562
8563// Eight floating-point registers fa0-fa7 used for passing floating-point
8564// arguments, and fa0-fa1 are also used to return values.
8565const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
8566 LoongArch::F3, LoongArch::F4, LoongArch::F5,
8567 LoongArch::F6, LoongArch::F7};
8568// FPR32 and FPR64 alias each other.
8570 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
8571 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
8572
8573const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
8574 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
8575 LoongArch::VR6, LoongArch::VR7};
8576
8577const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
8578 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
8579 LoongArch::XR6, LoongArch::XR7};
8580
8582 switch (State.getCallingConv()) {
8584 if (!State.isVarArg())
8585 return State.AllocateReg(PreserveNoneArgGPRs);
8586 [[fallthrough]];
8587 default:
8588 return State.AllocateReg(ArgGPRs);
8589 }
8590}
8591
8592// Pass a 2*GRLen argument that has been split into two GRLen values through
8593// registers or the stack as necessary.
8594static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
8595 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
8596 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
8597 ISD::ArgFlagsTy ArgFlags2) {
8598 unsigned GRLenInBytes = GRLen / 8;
8599 if (Register Reg = allocateArgGPR(State)) {
8600 // At least one half can be passed via register.
8601 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
8602 VA1.getLocVT(), CCValAssign::Full));
8603 } else {
8604 // Both halves must be passed on the stack, with proper alignment.
8605 Align StackAlign =
8606 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
8607 State.addLoc(
8609 State.AllocateStack(GRLenInBytes, StackAlign),
8610 VA1.getLocVT(), CCValAssign::Full));
8611 State.addLoc(CCValAssign::getMem(
8612 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8613 LocVT2, CCValAssign::Full));
8614 return false;
8615 }
8616 if (Register Reg = allocateArgGPR(State)) {
8617 // The second half can also be passed via register.
8618 State.addLoc(
8619 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
8620 } else {
8621 // The second half is passed via the stack, without additional alignment.
8622 State.addLoc(CCValAssign::getMem(
8623 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8624 LocVT2, CCValAssign::Full));
8625 }
8626 return false;
8627}
8628
8629// Implements the LoongArch calling convention. Returns true upon failure.
8631 unsigned ValNo, MVT ValVT,
8632 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
8633 CCState &State, bool IsRet, Type *OrigTy) {
8634 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
8635 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
8636 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
8637 MVT LocVT = ValVT;
8638
8639 // Any return value split into more than two values can't be returned
8640 // directly.
8641 if (IsRet && ValNo > 1)
8642 return true;
8643
8644 // If passing a variadic argument, or if no FPR is available.
8645 bool UseGPRForFloat = true;
8646
8647 switch (ABI) {
8648 default:
8649 llvm_unreachable("Unexpected ABI");
8650 break;
8655 UseGPRForFloat = ArgFlags.isVarArg();
8656 break;
8659 break;
8660 }
8661
8662 // If this is a variadic argument, the LoongArch calling convention requires
8663 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
8664 // byte alignment. An aligned register should be used regardless of whether
8665 // the original argument was split during legalisation or not. The argument
8666 // will not be passed by registers if the original type is larger than
8667 // 2*GRLen, so the register alignment rule does not apply.
8668 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
8669 if (ArgFlags.isVarArg() &&
8670 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
8671 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
8672 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
8673 // Skip 'odd' register if necessary.
8674 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
8675 State.AllocateReg(ArgGPRs);
8676 }
8677
8678 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8679 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8680 State.getPendingArgFlags();
8681
8682 assert(PendingLocs.size() == PendingArgFlags.size() &&
8683 "PendingLocs and PendingArgFlags out of sync");
8684
8685 // FPR32 and FPR64 alias each other.
8686 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
8687 UseGPRForFloat = true;
8688
8689 if (UseGPRForFloat && ValVT == MVT::f32) {
8690 LocVT = GRLenVT;
8691 LocInfo = CCValAssign::BCvt;
8692 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
8693 LocVT = MVT::i64;
8694 LocInfo = CCValAssign::BCvt;
8695 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
8696 // Handle passing f64 on LA32D with a soft float ABI or when floating point
8697 // registers are exhausted.
8698 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
8699 // Depending on available argument GPRS, f64 may be passed in a pair of
8700 // GPRs, split between a GPR and the stack, or passed completely on the
8701 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8702 // cases.
8703 MCRegister Reg = allocateArgGPR(State);
8704 if (!Reg) {
8705 int64_t StackOffset = State.AllocateStack(8, Align(8));
8706 State.addLoc(
8707 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8708 return false;
8709 }
8710 LocVT = MVT::i32;
8711 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8712 MCRegister HiReg = allocateArgGPR(State);
8713 if (HiReg) {
8714 State.addLoc(
8715 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
8716 } else {
8717 int64_t StackOffset = State.AllocateStack(4, Align(4));
8718 State.addLoc(
8719 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8720 }
8721 return false;
8722 }
8723
8724 // Split arguments might be passed indirectly, so keep track of the pending
8725 // values.
8726 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
8727 LocVT = GRLenVT;
8728 LocInfo = CCValAssign::Indirect;
8729 PendingLocs.push_back(
8730 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
8731 PendingArgFlags.push_back(ArgFlags);
8732 if (!ArgFlags.isSplitEnd()) {
8733 return false;
8734 }
8735 }
8736
8737 // If the split argument only had two elements, it should be passed directly
8738 // in registers or on the stack.
8739 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
8740 PendingLocs.size() <= 2) {
8741 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
8742 // Apply the normal calling convention rules to the first half of the
8743 // split argument.
8744 CCValAssign VA = PendingLocs[0];
8745 ISD::ArgFlagsTy AF = PendingArgFlags[0];
8746 PendingLocs.clear();
8747 PendingArgFlags.clear();
8748 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
8749 ArgFlags);
8750 }
8751
8752 // Allocate to a register if possible, or else a stack slot.
8753 Register Reg;
8754 unsigned StoreSizeBytes = GRLen / 8;
8755 Align StackAlign = Align(GRLen / 8);
8756
8757 if (ValVT == MVT::f32 && !UseGPRForFloat) {
8758 Reg = State.AllocateReg(ArgFPR32s);
8759 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
8760 Reg = State.AllocateReg(ArgFPR64s);
8761 } else if (ValVT.is128BitVector()) {
8762 Reg = State.AllocateReg(ArgVRs);
8763 UseGPRForFloat = false;
8764 StoreSizeBytes = 16;
8765 StackAlign = Align(16);
8766 } else if (ValVT.is256BitVector()) {
8767 Reg = State.AllocateReg(ArgXRs);
8768 UseGPRForFloat = false;
8769 StoreSizeBytes = 32;
8770 StackAlign = Align(32);
8771 } else {
8772 Reg = allocateArgGPR(State);
8773 }
8774
8775 unsigned StackOffset =
8776 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
8777
8778 // If we reach this point and PendingLocs is non-empty, we must be at the
8779 // end of a split argument that must be passed indirectly.
8780 if (!PendingLocs.empty()) {
8781 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8782 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
8783 for (auto &It : PendingLocs) {
8784 if (Reg)
8785 It.convertToReg(Reg);
8786 else
8787 It.convertToMem(StackOffset);
8788 State.addLoc(It);
8789 }
8790 PendingLocs.clear();
8791 PendingArgFlags.clear();
8792 return false;
8793 }
8794 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
8795 "Expected an GRLenVT at this stage");
8796
8797 if (Reg) {
8798 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8799 return false;
8800 }
8801
8802 // When a floating-point value is passed on the stack, no bit-cast is needed.
8803 if (ValVT.isFloatingPoint()) {
8804 LocVT = ValVT;
8805 LocInfo = CCValAssign::Full;
8806 }
8807
8808 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8809 return false;
8810}
8811
8812void LoongArchTargetLowering::analyzeInputArgs(
8813 MachineFunction &MF, CCState &CCInfo,
8814 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8815 LoongArchCCAssignFn Fn) const {
8816 FunctionType *FType = MF.getFunction().getFunctionType();
8817 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8818 MVT ArgVT = Ins[i].VT;
8819 Type *ArgTy = nullptr;
8820 if (IsRet)
8821 ArgTy = FType->getReturnType();
8822 else if (Ins[i].isOrigArg())
8823 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8825 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8826 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8827 CCInfo, IsRet, ArgTy)) {
8828 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8829 << '\n');
8830 llvm_unreachable("");
8831 }
8832 }
8833}
8834
8835void LoongArchTargetLowering::analyzeOutputArgs(
8836 MachineFunction &MF, CCState &CCInfo,
8837 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8838 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8839 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8840 MVT ArgVT = Outs[i].VT;
8841 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8843 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8844 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8845 CCInfo, IsRet, OrigTy)) {
8846 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8847 << "\n");
8848 llvm_unreachable("");
8849 }
8850 }
8851}
8852
8853// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8854// values.
8856 const CCValAssign &VA, const SDLoc &DL) {
8857 switch (VA.getLocInfo()) {
8858 default:
8859 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8860 case CCValAssign::Full:
8862 break;
8863 case CCValAssign::BCvt:
8864 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8865 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
8866 else
8867 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8868 break;
8869 }
8870 return Val;
8871}
8872
8874 const CCValAssign &VA, const SDLoc &DL,
8875 const ISD::InputArg &In,
8876 const LoongArchTargetLowering &TLI) {
8879 EVT LocVT = VA.getLocVT();
8880 SDValue Val;
8881 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8882 Register VReg = RegInfo.createVirtualRegister(RC);
8883 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8884 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8885
8886 // If input is sign extended from 32 bits, note it for the OptW pass.
8887 if (In.isOrigArg()) {
8888 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8889 if (OrigArg->getType()->isIntegerTy()) {
8890 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8891 // An input zero extended from i31 can also be considered sign extended.
8892 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8893 (BitWidth < 32 && In.Flags.isZExt())) {
8896 LAFI->addSExt32Register(VReg);
8897 }
8898 }
8899 }
8900
8901 return convertLocVTToValVT(DAG, Val, VA, DL);
8902}
8903
8904// The caller is responsible for loading the full value if the argument is
8905// passed with CCValAssign::Indirect.
8907 const CCValAssign &VA, const SDLoc &DL) {
8909 MachineFrameInfo &MFI = MF.getFrameInfo();
8910 EVT ValVT = VA.getValVT();
8911 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8912 /*IsImmutable=*/true);
8913 SDValue FIN = DAG.getFrameIndex(
8915
8916 ISD::LoadExtType ExtType;
8917 switch (VA.getLocInfo()) {
8918 default:
8919 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8920 case CCValAssign::Full:
8922 case CCValAssign::BCvt:
8923 ExtType = ISD::NON_EXTLOAD;
8924 break;
8925 }
8926 return DAG.getExtLoad(
8927 ExtType, DL, VA.getLocVT(), Chain, FIN,
8929}
8930
8932 const CCValAssign &VA,
8933 const CCValAssign &HiVA,
8934 const SDLoc &DL) {
8935 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8936 "Unexpected VA");
8938 MachineFrameInfo &MFI = MF.getFrameInfo();
8940
8941 assert(VA.isRegLoc() && "Expected register VA assignment");
8942
8943 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8944 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8945 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8946 SDValue Hi;
8947 if (HiVA.isMemLoc()) {
8948 // Second half of f64 is passed on the stack.
8949 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8950 /*IsImmutable=*/true);
8951 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8952 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8954 } else {
8955 // Second half of f64 is passed in another GPR.
8956 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8957 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8958 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8959 }
8960 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8961}
8962
8964 const CCValAssign &VA, const SDLoc &DL) {
8965 EVT LocVT = VA.getLocVT();
8966
8967 switch (VA.getLocInfo()) {
8968 default:
8969 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8970 case CCValAssign::Full:
8971 break;
8972 case CCValAssign::BCvt:
8973 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8974 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8975 else
8976 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8977 break;
8978 }
8979 return Val;
8980}
8981
8982static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8983 CCValAssign::LocInfo LocInfo,
8984 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8985 CCState &State) {
8986 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8987 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8988 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8989 static const MCPhysReg GPRList[] = {
8990 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8991 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8992 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8993 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8994 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8995 return false;
8996 }
8997 }
8998
8999 if (LocVT == MVT::f32) {
9000 // Pass in STG registers: F1, F2, F3, F4
9001 // fs0,fs1,fs2,fs3
9002 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
9003 LoongArch::F26, LoongArch::F27};
9004 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
9005 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9006 return false;
9007 }
9008 }
9009
9010 if (LocVT == MVT::f64) {
9011 // Pass in STG registers: D1, D2, D3, D4
9012 // fs4,fs5,fs6,fs7
9013 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
9014 LoongArch::F30_64, LoongArch::F31_64};
9015 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
9016 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9017 return false;
9018 }
9019 }
9020
9021 report_fatal_error("No registers left in GHC calling convention");
9022 return true;
9023}
9024
9025// Transform physical registers into virtual registers.
9027 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9028 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9029 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9030
9032
9033 switch (CallConv) {
9034 default:
9035 llvm_unreachable("Unsupported calling convention");
9036 case CallingConv::C:
9037 case CallingConv::Fast:
9040 break;
9041 case CallingConv::GHC:
9042 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
9043 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
9045 "GHC calling convention requires the F and D extensions");
9046 }
9047
9048 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9049 MVT GRLenVT = Subtarget.getGRLenVT();
9050 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
9051 // Used with varargs to acumulate store chains.
9052 std::vector<SDValue> OutChains;
9053
9054 // Assign locations to all of the incoming arguments.
9056 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9057
9058 if (CallConv == CallingConv::GHC)
9060 else
9061 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
9062
9063 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
9064 CCValAssign &VA = ArgLocs[i];
9065 SDValue ArgValue;
9066 // Passing f64 on LA32D with a soft float ABI must be handled as a special
9067 // case.
9068 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9069 assert(VA.needsCustom());
9070 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
9071 } else if (VA.isRegLoc())
9072 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
9073 else
9074 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9075 if (VA.getLocInfo() == CCValAssign::Indirect) {
9076 // If the original argument was split and passed by reference, we need to
9077 // load all parts of it here (using the same address).
9078 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9080 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
9081 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
9082 assert(ArgPartOffset == 0);
9083 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
9084 CCValAssign &PartVA = ArgLocs[i + 1];
9085 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
9086 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9087 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9088 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9090 ++i;
9091 ++InsIdx;
9092 }
9093 continue;
9094 }
9095 InVals.push_back(ArgValue);
9096 }
9097
9098 if (IsVarArg) {
9100 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9101 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
9102 MachineFrameInfo &MFI = MF.getFrameInfo();
9103 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9104 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
9105
9106 // Offset of the first variable argument from stack pointer, and size of
9107 // the vararg save area. For now, the varargs save area is either zero or
9108 // large enough to hold a0-a7.
9109 int VaArgOffset, VarArgsSaveSize;
9110
9111 // If all registers are allocated, then all varargs must be passed on the
9112 // stack and we don't need to save any argregs.
9113 if (ArgRegs.size() == Idx) {
9114 VaArgOffset = CCInfo.getStackSize();
9115 VarArgsSaveSize = 0;
9116 } else {
9117 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
9118 VaArgOffset = -VarArgsSaveSize;
9119 }
9120
9121 // Record the frame index of the first variable argument
9122 // which is a value necessary to VASTART.
9123 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9124 LoongArchFI->setVarArgsFrameIndex(FI);
9125
9126 // If saving an odd number of registers then create an extra stack slot to
9127 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
9128 // offsets to even-numbered registered remain 2*GRLen-aligned.
9129 if (Idx % 2) {
9130 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
9131 true);
9132 VarArgsSaveSize += GRLenInBytes;
9133 }
9134
9135 // Copy the integer registers that may have been used for passing varargs
9136 // to the vararg save area.
9137 for (unsigned I = Idx; I < ArgRegs.size();
9138 ++I, VaArgOffset += GRLenInBytes) {
9139 const Register Reg = RegInfo.createVirtualRegister(RC);
9140 RegInfo.addLiveIn(ArgRegs[I], Reg);
9141 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
9142 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9143 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9144 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9146 cast<StoreSDNode>(Store.getNode())
9147 ->getMemOperand()
9148 ->setValue((Value *)nullptr);
9149 OutChains.push_back(Store);
9150 }
9151 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
9152 }
9153
9154 // All stores are grouped in one node to allow the matching between
9155 // the size of Ins and InVals. This only happens for vararg functions.
9156 if (!OutChains.empty()) {
9157 OutChains.push_back(Chain);
9158 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9159 }
9160
9161 return Chain;
9162}
9163
9165 return CI->isTailCall();
9166}
9167
9168// Check if the return value is used as only a return value, as otherwise
9169// we can't perform a tail-call.
9171 SDValue &Chain) const {
9172 if (N->getNumValues() != 1)
9173 return false;
9174 if (!N->hasNUsesOfValue(1, 0))
9175 return false;
9176
9177 SDNode *Copy = *N->user_begin();
9178 if (Copy->getOpcode() != ISD::CopyToReg)
9179 return false;
9180
9181 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
9182 // isn't safe to perform a tail call.
9183 if (Copy->getGluedNode())
9184 return false;
9185
9186 // The copy must be used by a LoongArchISD::RET, and nothing else.
9187 bool HasRet = false;
9188 for (SDNode *Node : Copy->users()) {
9189 if (Node->getOpcode() != LoongArchISD::RET)
9190 return false;
9191 HasRet = true;
9192 }
9193
9194 if (!HasRet)
9195 return false;
9196
9197 Chain = Copy->getOperand(0);
9198 return true;
9199}
9200
9201// Check whether the call is eligible for tail call optimization.
9202bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
9203 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9204 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
9205
9206 auto CalleeCC = CLI.CallConv;
9207 auto &Outs = CLI.Outs;
9208 auto &Caller = MF.getFunction();
9209 auto CallerCC = Caller.getCallingConv();
9210
9211 // Do not tail call opt if the stack is used to pass parameters.
9212 if (CCInfo.getStackSize() != 0)
9213 return false;
9214
9215 // Do not tail call opt if any parameters need to be passed indirectly.
9216 for (auto &VA : ArgLocs)
9217 if (VA.getLocInfo() == CCValAssign::Indirect)
9218 return false;
9219
9220 // Do not tail call opt if either caller or callee uses struct return
9221 // semantics.
9222 auto IsCallerStructRet = Caller.hasStructRetAttr();
9223 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
9224 if (IsCallerStructRet || IsCalleeStructRet)
9225 return false;
9226
9227 // Do not tail call opt if either the callee or caller has a byval argument.
9228 for (auto &Arg : Outs)
9229 if (Arg.Flags.isByVal())
9230 return false;
9231
9232 // The callee has to preserve all registers the caller needs to preserve.
9233 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
9234 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
9235 if (CalleeCC != CallerCC) {
9236 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
9237 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9238 return false;
9239 }
9240 return true;
9241}
9242
9244 return DAG.getDataLayout().getPrefTypeAlign(
9245 VT.getTypeForEVT(*DAG.getContext()));
9246}
9247
9248// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
9249// and output parameter nodes.
9250SDValue
9252 SmallVectorImpl<SDValue> &InVals) const {
9253 SelectionDAG &DAG = CLI.DAG;
9254 SDLoc &DL = CLI.DL;
9256 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
9258 SDValue Chain = CLI.Chain;
9259 SDValue Callee = CLI.Callee;
9260 CallingConv::ID CallConv = CLI.CallConv;
9261 bool IsVarArg = CLI.IsVarArg;
9262 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9263 MVT GRLenVT = Subtarget.getGRLenVT();
9264 bool &IsTailCall = CLI.IsTailCall;
9265
9267
9268 // Analyze the operands of the call, assigning locations to each operand.
9270 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9271
9272 if (CallConv == CallingConv::GHC)
9273 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
9274 else
9275 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
9276
9277 // Check if it's really possible to do a tail call.
9278 if (IsTailCall)
9279 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
9280
9281 if (IsTailCall)
9282 ++NumTailCalls;
9283 else if (CLI.CB && CLI.CB->isMustTailCall())
9284 report_fatal_error("failed to perform tail call elimination on a call "
9285 "site marked musttail");
9286
9287 // Get a count of how many bytes are to be pushed on the stack.
9288 unsigned NumBytes = ArgCCInfo.getStackSize();
9289
9290 // Create local copies for byval args.
9291 SmallVector<SDValue> ByValArgs;
9292 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9293 ISD::ArgFlagsTy Flags = Outs[i].Flags;
9294 if (!Flags.isByVal())
9295 continue;
9296
9297 SDValue Arg = OutVals[i];
9298 unsigned Size = Flags.getByValSize();
9299 Align Alignment = Flags.getNonZeroByValAlign();
9300
9301 int FI =
9302 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
9303 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9304 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
9305
9306 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
9307 /*IsVolatile=*/false,
9308 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
9310 ByValArgs.push_back(FIPtr);
9311 }
9312
9313 if (!IsTailCall)
9314 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
9315
9316 // Copy argument values to their designated locations.
9318 SmallVector<SDValue> MemOpChains;
9319 SDValue StackPtr;
9320 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
9321 ++i, ++OutIdx) {
9322 CCValAssign &VA = ArgLocs[i];
9323 SDValue ArgValue = OutVals[OutIdx];
9324 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
9325
9326 // Handle passing f64 on LA32D with a soft float ABI as a special case.
9327 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9328 assert(VA.isRegLoc() && "Expected register VA assignment");
9329 assert(VA.needsCustom());
9330 SDValue SplitF64 =
9331 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
9332 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
9333 SDValue Lo = SplitF64.getValue(0);
9334 SDValue Hi = SplitF64.getValue(1);
9335
9336 Register RegLo = VA.getLocReg();
9337 RegsToPass.push_back(std::make_pair(RegLo, Lo));
9338
9339 // Get the CCValAssign for the Hi part.
9340 CCValAssign &HiVA = ArgLocs[++i];
9341
9342 if (HiVA.isMemLoc()) {
9343 // Second half of f64 is passed on the stack.
9344 if (!StackPtr.getNode())
9345 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
9347 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9348 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
9349 // Emit the store.
9350 MemOpChains.push_back(DAG.getStore(
9351 Chain, DL, Hi, Address,
9353 } else {
9354 // Second half of f64 is passed in another GPR.
9355 Register RegHigh = HiVA.getLocReg();
9356 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
9357 }
9358 continue;
9359 }
9360
9361 // Promote the value if needed.
9362 // For now, only handle fully promoted and indirect arguments.
9363 if (VA.getLocInfo() == CCValAssign::Indirect) {
9364 // Store the argument in a stack slot and pass its address.
9365 Align StackAlign =
9366 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
9367 getPrefTypeAlign(ArgValue.getValueType(), DAG));
9368 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
9369 // If the original argument was split and passed by reference, we need to
9370 // store the required parts of it here (and pass just one address).
9371 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
9372 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
9373 assert(ArgPartOffset == 0);
9374 // Calculate the total size to store. We don't have access to what we're
9375 // actually storing other than performing the loop and collecting the
9376 // info.
9378 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
9379 SDValue PartValue = OutVals[OutIdx + 1];
9380 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
9381 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9382 EVT PartVT = PartValue.getValueType();
9383
9384 StoredSize += PartVT.getStoreSize();
9385 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
9386 Parts.push_back(std::make_pair(PartValue, Offset));
9387 ++i;
9388 ++OutIdx;
9389 }
9390 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
9391 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
9392 MemOpChains.push_back(
9393 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
9395 for (const auto &Part : Parts) {
9396 SDValue PartValue = Part.first;
9397 SDValue PartOffset = Part.second;
9399 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
9400 MemOpChains.push_back(
9401 DAG.getStore(Chain, DL, PartValue, Address,
9403 }
9404 ArgValue = SpillSlot;
9405 } else {
9406 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
9407 }
9408
9409 // Use local copy if it is a byval arg.
9410 if (Flags.isByVal())
9411 ArgValue = ByValArgs[j++];
9412
9413 if (VA.isRegLoc()) {
9414 // Queue up the argument copies and emit them at the end.
9415 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
9416 } else {
9417 assert(VA.isMemLoc() && "Argument not register or memory");
9418 assert(!IsTailCall && "Tail call not allowed if stack is used "
9419 "for passing parameters");
9420
9421 // Work out the address of the stack slot.
9422 if (!StackPtr.getNode())
9423 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
9425 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9427
9428 // Emit the store.
9429 MemOpChains.push_back(
9430 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
9431 }
9432 }
9433
9434 // Join the stores, which are independent of one another.
9435 if (!MemOpChains.empty())
9436 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
9437
9438 SDValue Glue;
9439
9440 // Build a sequence of copy-to-reg nodes, chained and glued together.
9441 for (auto &Reg : RegsToPass) {
9442 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
9443 Glue = Chain.getValue(1);
9444 }
9445
9446 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
9447 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
9448 // split it and then direct call can be matched by PseudoCALL_SMALL.
9450 const GlobalValue *GV = S->getGlobal();
9451 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
9454 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
9455 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
9456 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
9459 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
9460 }
9461
9462 // The first call operand is the chain and the second is the target address.
9464 Ops.push_back(Chain);
9465 Ops.push_back(Callee);
9466
9467 // Add argument registers to the end of the list so that they are
9468 // known live into the call.
9469 for (auto &Reg : RegsToPass)
9470 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
9471
9472 if (!IsTailCall) {
9473 // Add a register mask operand representing the call-preserved registers.
9474 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
9475 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
9476 assert(Mask && "Missing call preserved mask for calling convention");
9477 Ops.push_back(DAG.getRegisterMask(Mask));
9478 }
9479
9480 // Glue the call to the argument copies, if any.
9481 if (Glue.getNode())
9482 Ops.push_back(Glue);
9483
9484 // Emit the call.
9485 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
9486 unsigned Op;
9487 switch (DAG.getTarget().getCodeModel()) {
9488 default:
9489 report_fatal_error("Unsupported code model");
9490 case CodeModel::Small:
9491 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
9492 break;
9493 case CodeModel::Medium:
9494 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
9495 break;
9496 case CodeModel::Large:
9497 assert(Subtarget.is64Bit() && "Large code model requires LA64");
9498 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
9499 break;
9500 }
9501
9502 if (IsTailCall) {
9504 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
9505 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
9506 return Ret;
9507 }
9508
9509 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
9510 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
9511 Glue = Chain.getValue(1);
9512
9513 // Mark the end of the call, which is glued to the call itself.
9514 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
9515 Glue = Chain.getValue(1);
9516
9517 // Assign locations to each value returned by this call.
9519 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
9520 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
9521
9522 // Copy all of the result registers out of their specified physreg.
9523 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
9524 auto &VA = RVLocs[i];
9525 // Copy the value out.
9526 SDValue RetValue =
9527 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
9528 // Glue the RetValue to the end of the call sequence.
9529 Chain = RetValue.getValue(1);
9530 Glue = RetValue.getValue(2);
9531
9532 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9533 assert(VA.needsCustom());
9534 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
9535 MVT::i32, Glue);
9536 Chain = RetValue2.getValue(1);
9537 Glue = RetValue2.getValue(2);
9538 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
9539 RetValue, RetValue2);
9540 } else
9541 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
9542
9543 InVals.push_back(RetValue);
9544 }
9545
9546 return Chain;
9547}
9548
9550 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
9551 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
9552 const Type *RetTy) const {
9554 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
9555
9556 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9557 LoongArchABI::ABI ABI =
9558 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9559 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
9560 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
9561 return false;
9562 }
9563 return true;
9564}
9565
9567 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9569 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
9570 SelectionDAG &DAG) const {
9571 // Stores the assignment of the return value to a location.
9573
9574 // Info about the registers and stack slot.
9575 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
9576 *DAG.getContext());
9577
9578 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
9579 nullptr, CC_LoongArch);
9580 if (CallConv == CallingConv::GHC && !RVLocs.empty())
9581 report_fatal_error("GHC functions return void only");
9582 SDValue Glue;
9583 SmallVector<SDValue, 4> RetOps(1, Chain);
9584
9585 // Copy the result values into the output registers.
9586 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
9587 SDValue Val = OutVals[OutIdx];
9588 CCValAssign &VA = RVLocs[i];
9589 assert(VA.isRegLoc() && "Can only return in registers!");
9590
9591 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9592 // Handle returning f64 on LA32D with a soft float ABI.
9593 assert(VA.isRegLoc() && "Expected return via registers");
9594 assert(VA.needsCustom());
9595 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
9596 DAG.getVTList(MVT::i32, MVT::i32), Val);
9597 SDValue Lo = SplitF64.getValue(0);
9598 SDValue Hi = SplitF64.getValue(1);
9599 Register RegLo = VA.getLocReg();
9600 Register RegHi = RVLocs[++i].getLocReg();
9601
9602 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
9603 Glue = Chain.getValue(1);
9604 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
9605 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
9606 Glue = Chain.getValue(1);
9607 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
9608 } else {
9609 // Handle a 'normal' return.
9610 Val = convertValVTToLocVT(DAG, Val, VA, DL);
9611 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
9612
9613 // Guarantee that all emitted copies are stuck together.
9614 Glue = Chain.getValue(1);
9615 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
9616 }
9617 }
9618
9619 RetOps[0] = Chain; // Update chain.
9620
9621 // Add the glue node if we have it.
9622 if (Glue.getNode())
9623 RetOps.push_back(Glue);
9624
9625 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
9626}
9627
9628// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
9629// Note: The following prefixes are excluded:
9630// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
9631// as they can be represented using [x]vrepli.[whb]
9633 const APInt &SplatValue, const unsigned SplatBitSize) const {
9634 uint64_t RequiredImm = 0;
9635 uint64_t V = SplatValue.getZExtValue();
9636 if (SplatBitSize == 16 && !(V & 0x00FF)) {
9637 // 4'b0101
9638 RequiredImm = (0b10101 << 8) | (V >> 8);
9639 return {true, RequiredImm};
9640 } else if (SplatBitSize == 32) {
9641 // 4'b0001
9642 if (!(V & 0xFFFF00FF)) {
9643 RequiredImm = (0b10001 << 8) | (V >> 8);
9644 return {true, RequiredImm};
9645 }
9646 // 4'b0010
9647 if (!(V & 0xFF00FFFF)) {
9648 RequiredImm = (0b10010 << 8) | (V >> 16);
9649 return {true, RequiredImm};
9650 }
9651 // 4'b0011
9652 if (!(V & 0x00FFFFFF)) {
9653 RequiredImm = (0b10011 << 8) | (V >> 24);
9654 return {true, RequiredImm};
9655 }
9656 // 4'b0110
9657 if ((V & 0xFFFF00FF) == 0xFF) {
9658 RequiredImm = (0b10110 << 8) | (V >> 8);
9659 return {true, RequiredImm};
9660 }
9661 // 4'b0111
9662 if ((V & 0xFF00FFFF) == 0xFFFF) {
9663 RequiredImm = (0b10111 << 8) | (V >> 16);
9664 return {true, RequiredImm};
9665 }
9666 // 4'b1010
9667 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
9668 RequiredImm =
9669 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9670 return {true, RequiredImm};
9671 }
9672 } else if (SplatBitSize == 64) {
9673 // 4'b1011
9674 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
9675 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
9676 RequiredImm =
9677 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9678 return {true, RequiredImm};
9679 }
9680 // 4'b1100
9681 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
9682 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
9683 RequiredImm =
9684 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
9685 return {true, RequiredImm};
9686 }
9687 // 4'b1001
9688 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
9689 uint8_t res = 0;
9690 for (int i = 0; i < 8; ++i) {
9691 uint8_t byte = x & 0xFF;
9692 if (byte == 0 || byte == 0xFF)
9693 res |= ((byte & 1) << i);
9694 else
9695 return {false, 0};
9696 x >>= 8;
9697 }
9698 return {true, res};
9699 };
9700 auto [IsSame, Suffix] = sameBitsPreByte(V);
9701 if (IsSame) {
9702 RequiredImm = (0b11001 << 8) | Suffix;
9703 return {true, RequiredImm};
9704 }
9705 }
9706 return {false, RequiredImm};
9707}
9708
9710 EVT VT) const {
9711 if (!Subtarget.hasExtLSX())
9712 return false;
9713
9714 if (VT == MVT::f32) {
9715 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
9716 return (masked == 0x3e000000 || masked == 0x40000000);
9717 }
9718
9719 if (VT == MVT::f64) {
9720 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
9721 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
9722 }
9723
9724 return false;
9725}
9726
9727bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
9728 bool ForCodeSize) const {
9729 // TODO: Maybe need more checks here after vector extension is supported.
9730 if (VT == MVT::f32 && !Subtarget.hasBasicF())
9731 return false;
9732 if (VT == MVT::f64 && !Subtarget.hasBasicD())
9733 return false;
9734 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
9735}
9736
9738 return true;
9739}
9740
9742 return true;
9743}
9744
9745bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9746 const Instruction *I) const {
9747 if (!Subtarget.is64Bit())
9748 return isa<LoadInst>(I) || isa<StoreInst>(I);
9749
9750 if (isa<LoadInst>(I))
9751 return true;
9752
9753 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9754 // require fences beacuse we can use amswap_db.[w/d].
9755 Type *Ty = I->getOperand(0)->getType();
9756 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
9757 unsigned Size = Ty->getIntegerBitWidth();
9758 return (Size == 8 || Size == 16);
9759 }
9760
9761 return false;
9762}
9763
9765 LLVMContext &Context,
9766 EVT VT) const {
9767 if (!VT.isVector())
9768 return getPointerTy(DL);
9770}
9771
9773 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
9774 // Do not merge to float value size (128 or 256 bits) if no implicit
9775 // float attribute is set.
9776 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
9777 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
9778 if (NoFloat)
9779 return MemVT.getSizeInBits() <= MaxIntSize;
9780
9781 // Make sure we don't merge greater than our maximum supported vector width.
9782 if (Subtarget.hasExtLASX())
9783 MaxIntSize = 256;
9784 else if (Subtarget.hasExtLSX())
9785 MaxIntSize = 128;
9786
9787 return MemVT.getSizeInBits() <= MaxIntSize;
9788}
9789
9791 EVT VT = Y.getValueType();
9792
9793 if (VT.isVector())
9794 return Subtarget.hasExtLSX() && VT.isInteger();
9795
9796 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
9797}
9798
9801 MachineFunction &MF, unsigned Intrinsic) const {
9802 switch (Intrinsic) {
9803 default:
9804 return;
9805 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9806 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9807 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9808 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
9809 IntrinsicInfo Info;
9811 Info.memVT = MVT::i32;
9812 Info.ptrVal = I.getArgOperand(0);
9813 Info.offset = 0;
9814 Info.align = Align(4);
9817 Infos.push_back(Info);
9818 return;
9819 // TODO: Add more Intrinsics later.
9820 }
9821 }
9822}
9823
9824// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9825// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9826// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9827// regression, we need to implement it manually.
9830
9832 Op == AtomicRMWInst::And) &&
9833 "Unable to expand");
9834 unsigned MinWordSize = 4;
9835
9836 IRBuilder<> Builder(AI);
9837 LLVMContext &Ctx = Builder.getContext();
9838 const DataLayout &DL = AI->getDataLayout();
9839 Type *ValueType = AI->getType();
9840 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
9841
9842 Value *Addr = AI->getPointerOperand();
9843 PointerType *PtrTy = cast<PointerType>(Addr->getType());
9844 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
9845
9846 Value *AlignedAddr = Builder.CreateIntrinsic(
9847 Intrinsic::ptrmask, {PtrTy, IntTy},
9848 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
9849 "AlignedAddr");
9850
9851 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
9852 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
9853 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
9854 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
9855 Value *Mask = Builder.CreateShl(
9856 ConstantInt::get(WordType,
9857 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
9858 ShiftAmt, "Mask");
9859 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
9860 Value *ValOperand_Shifted =
9861 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
9862 ShiftAmt, "ValOperand_Shifted");
9863 Value *NewOperand;
9864 if (Op == AtomicRMWInst::And)
9865 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
9866 else
9867 NewOperand = ValOperand_Shifted;
9868
9869 AtomicRMWInst *NewAI =
9870 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
9871 AI->getOrdering(), AI->getSyncScopeID());
9872
9873 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
9874 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
9875 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
9876 AI->replaceAllUsesWith(FinalOldResult);
9877 AI->eraseFromParent();
9878}
9879
9882 const AtomicRMWInst *AI) const {
9883 // TODO: Add more AtomicRMWInst that needs to be extended.
9884
9885 // Since floating-point operation requires a non-trivial set of data
9886 // operations, use CmpXChg to expand.
9887 if (AI->isFloatingPointOperation() ||
9893
9894 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9897 AI->getOperation() == AtomicRMWInst::Sub)) {
9899 }
9900
9901 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9902 if (Subtarget.hasLAMCAS()) {
9903 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9907 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9909 }
9910
9911 if (Size == 8 || Size == 16)
9914}
9915
9916static Intrinsic::ID
9918 AtomicRMWInst::BinOp BinOp) {
9919 if (GRLen == 64) {
9920 switch (BinOp) {
9921 default:
9922 llvm_unreachable("Unexpected AtomicRMW BinOp");
9924 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9925 case AtomicRMWInst::Add:
9926 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9927 case AtomicRMWInst::Sub:
9928 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9930 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9932 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9934 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9935 case AtomicRMWInst::Max:
9936 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9937 case AtomicRMWInst::Min:
9938 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9939 // TODO: support other AtomicRMWInst.
9940 }
9941 }
9942
9943 if (GRLen == 32) {
9944 switch (BinOp) {
9945 default:
9946 llvm_unreachable("Unexpected AtomicRMW BinOp");
9948 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9949 case AtomicRMWInst::Add:
9950 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9951 case AtomicRMWInst::Sub:
9952 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9954 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9956 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9958 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9959 case AtomicRMWInst::Max:
9960 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9961 case AtomicRMWInst::Min:
9962 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9963 // TODO: support other AtomicRMWInst.
9964 }
9965 }
9966
9967 llvm_unreachable("Unexpected GRLen\n");
9968}
9969
9972 const AtomicCmpXchgInst *CI) const {
9973
9974 if (Subtarget.hasLAMCAS())
9976
9978 if (Size == 8 || Size == 16)
9981}
9982
9984 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9985 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9986 unsigned GRLen = Subtarget.getGRLen();
9987 AtomicOrdering FailOrd = CI->getFailureOrdering();
9988 Value *FailureOrdering =
9989 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9990 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9991 if (GRLen == 64) {
9992 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9993 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9994 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9995 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9996 }
9997 Type *Tys[] = {AlignedAddr->getType()};
9998 Value *Result = Builder.CreateIntrinsic(
9999 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
10000 if (GRLen == 64)
10001 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10002 return Result;
10003}
10004
10006 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10007 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10008 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
10009 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
10010 // mask, as this produces better code than the LL/SC loop emitted by
10011 // int_loongarch_masked_atomicrmw_xchg.
10012 if (AI->getOperation() == AtomicRMWInst::Xchg &&
10015 if (CVal->isZero())
10016 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
10017 Builder.CreateNot(Mask, "Inv_Mask"),
10018 AI->getAlign(), Ord);
10019 if (CVal->isMinusOne())
10020 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
10021 AI->getAlign(), Ord);
10022 }
10023
10024 unsigned GRLen = Subtarget.getGRLen();
10025 Value *Ordering =
10026 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
10027 Type *Tys[] = {AlignedAddr->getType()};
10029 AI->getModule(),
10031
10032 if (GRLen == 64) {
10033 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10034 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10035 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10036 }
10037
10038 Value *Result;
10039
10040 // Must pass the shift amount needed to sign extend the loaded value prior
10041 // to performing a signed comparison for min/max. ShiftAmt is the number of
10042 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
10043 // is the number of bits to left+right shift the value in order to
10044 // sign-extend.
10045 if (AI->getOperation() == AtomicRMWInst::Min ||
10047 const DataLayout &DL = AI->getDataLayout();
10048 unsigned ValWidth =
10049 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
10050 Value *SextShamt =
10051 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
10052 Result = Builder.CreateCall(LlwOpScwLoop,
10053 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10054 } else {
10055 Result =
10056 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
10057 }
10058
10059 if (GRLen == 64)
10060 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10061 return Result;
10062}
10063
10065 const MachineFunction &MF, EVT VT) const {
10066 VT = VT.getScalarType();
10067
10068 if (!VT.isSimple())
10069 return false;
10070
10071 switch (VT.getSimpleVT().SimpleTy) {
10072 case MVT::f32:
10073 case MVT::f64:
10074 return true;
10075 default:
10076 break;
10077 }
10078
10079 return false;
10080}
10081
10083 const Constant *PersonalityFn) const {
10084 return LoongArch::R4;
10085}
10086
10088 const Constant *PersonalityFn) const {
10089 return LoongArch::R5;
10090}
10091
10092//===----------------------------------------------------------------------===//
10093// Target Optimization Hooks
10094//===----------------------------------------------------------------------===//
10095
10097 const LoongArchSubtarget &Subtarget) {
10098 // Feature FRECIPE instrucions relative accuracy is 2^-14.
10099 // IEEE float has 23 digits and double has 52 digits.
10100 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
10101 return RefinementSteps;
10102}
10103
10104static bool
10106 assert(Subtarget.hasFrecipe() &&
10107 "Reciprocal estimate queried on unsupported target");
10108
10109 if (!VT.isSimple())
10110 return false;
10111
10112 switch (VT.getSimpleVT().SimpleTy) {
10113 case MVT::f32:
10114 // f32 is the base type for reciprocal estimate instructions.
10115 return true;
10116
10117 case MVT::f64:
10118 return Subtarget.hasBasicD();
10119
10120 case MVT::v4f32:
10121 case MVT::v2f64:
10122 return Subtarget.hasExtLSX();
10123
10124 case MVT::v8f32:
10125 case MVT::v4f64:
10126 return Subtarget.hasExtLASX();
10127
10128 default:
10129 return false;
10130 }
10131}
10132
10134 SelectionDAG &DAG, int Enabled,
10135 int &RefinementSteps,
10136 bool &UseOneConstNR,
10137 bool Reciprocal) const {
10139 "Enabled should never be Disabled here");
10140
10141 if (!Subtarget.hasFrecipe())
10142 return SDValue();
10143
10144 SDLoc DL(Operand);
10145 EVT VT = Operand.getValueType();
10146
10147 // Check supported types.
10148 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10149 return SDValue();
10150
10151 // Handle refinement steps.
10152 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10153 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10154
10155 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
10156 UseOneConstNR = false;
10157 SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
10158
10159 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
10160 // are needed (which rely on the reciprocal form), return the raw reciprocal
10161 // estimate.
10162 if (Reciprocal || RefinementSteps > 0)
10163 return Rsqrt;
10164
10165 // Otherwise, return sqrt(x) by multiplying with the operand.
10166 return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
10167}
10168
10170 SelectionDAG &DAG,
10171 int Enabled,
10172 int &RefinementSteps) const {
10174 "Enabled should never be Disabled here");
10175
10176 if (!Subtarget.hasFrecipe())
10177 return SDValue();
10178
10179 SDLoc DL(Operand);
10180 EVT VT = Operand.getValueType();
10181
10182 // Check supported types.
10183 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10184 return SDValue();
10185
10186 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10187 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10188
10189 // FRECIPE computes 1.0 / x.
10190 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
10191}
10192
10193//===----------------------------------------------------------------------===//
10194// LoongArch Inline Assembly Support
10195//===----------------------------------------------------------------------===//
10196
10198LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
10199 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
10200 //
10201 // 'f': A floating-point register (if available).
10202 // 'k': A memory operand whose address is formed by a base register and
10203 // (optionally scaled) index register.
10204 // 'l': A signed 16-bit constant.
10205 // 'm': A memory operand whose address is formed by a base register and
10206 // offset that is suitable for use in instructions with the same
10207 // addressing mode as st.w and ld.w.
10208 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
10209 // instruction)
10210 // 'I': A signed 12-bit constant (for arithmetic instructions).
10211 // 'J': Integer zero.
10212 // 'K': An unsigned 12-bit constant (for logic instructions).
10213 // "ZB": An address that is held in a general-purpose register. The offset is
10214 // zero.
10215 // "ZC": A memory operand whose address is formed by a base register and
10216 // offset that is suitable for use in instructions with the same
10217 // addressing mode as ll.w and sc.w.
10218 if (Constraint.size() == 1) {
10219 switch (Constraint[0]) {
10220 default:
10221 break;
10222 case 'f':
10223 case 'q':
10224 return C_RegisterClass;
10225 case 'l':
10226 case 'I':
10227 case 'J':
10228 case 'K':
10229 return C_Immediate;
10230 case 'k':
10231 return C_Memory;
10232 }
10233 }
10234
10235 if (Constraint == "ZC" || Constraint == "ZB")
10236 return C_Memory;
10237
10238 // 'm' is handled here.
10239 return TargetLowering::getConstraintType(Constraint);
10240}
10241
10242InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
10243 StringRef ConstraintCode) const {
10244 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
10248 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
10249}
10250
10251std::pair<unsigned, const TargetRegisterClass *>
10252LoongArchTargetLowering::getRegForInlineAsmConstraint(
10253 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
10254 // First, see if this is a constraint that directly corresponds to a LoongArch
10255 // register class.
10256 if (Constraint.size() == 1) {
10257 switch (Constraint[0]) {
10258 case 'r':
10259 // TODO: Support fixed vectors up to GRLen?
10260 if (VT.isVector())
10261 break;
10262 return std::make_pair(0U, &LoongArch::GPRRegClass);
10263 case 'q':
10264 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
10265 case 'f':
10266 if (Subtarget.hasBasicF() && VT == MVT::f32)
10267 return std::make_pair(0U, &LoongArch::FPR32RegClass);
10268 if (Subtarget.hasBasicD() && VT == MVT::f64)
10269 return std::make_pair(0U, &LoongArch::FPR64RegClass);
10270 if (Subtarget.hasExtLSX() &&
10271 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
10272 return std::make_pair(0U, &LoongArch::LSX128RegClass);
10273 if (Subtarget.hasExtLASX() &&
10274 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
10275 return std::make_pair(0U, &LoongArch::LASX256RegClass);
10276 break;
10277 default:
10278 break;
10279 }
10280 }
10281
10282 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
10283 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
10284 // constraints while the official register name is prefixed with a '$'. So we
10285 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
10286 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
10287 // case insensitive, so no need to convert the constraint to upper case here.
10288 //
10289 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
10290 // decode the usage of register name aliases into their official names. And
10291 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
10292 // official register names.
10293 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
10294 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
10295 bool IsFP = Constraint[2] == 'f';
10296 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
10297 std::pair<unsigned, const TargetRegisterClass *> R;
10299 TRI, join_items("", Temp.first, Temp.second), VT);
10300 // Match those names to the widest floating point register type available.
10301 if (IsFP) {
10302 unsigned RegNo = R.first;
10303 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
10304 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
10305 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
10306 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
10307 }
10308 }
10309 }
10310 return R;
10311 }
10312
10313 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10314}
10315
10316void LoongArchTargetLowering::LowerAsmOperandForConstraint(
10317 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
10318 SelectionDAG &DAG) const {
10319 // Currently only support length 1 constraints.
10320 if (Constraint.size() == 1) {
10321 switch (Constraint[0]) {
10322 case 'l':
10323 // Validate & create a 16-bit signed immediate operand.
10324 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10325 uint64_t CVal = C->getSExtValue();
10326 if (isInt<16>(CVal))
10327 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
10328 Subtarget.getGRLenVT()));
10329 }
10330 return;
10331 case 'I':
10332 // Validate & create a 12-bit signed immediate operand.
10333 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10334 uint64_t CVal = C->getSExtValue();
10335 if (isInt<12>(CVal))
10336 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
10337 Subtarget.getGRLenVT()));
10338 }
10339 return;
10340 case 'J':
10341 // Validate & create an integer zero operand.
10342 if (auto *C = dyn_cast<ConstantSDNode>(Op))
10343 if (C->getZExtValue() == 0)
10344 Ops.push_back(
10345 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
10346 return;
10347 case 'K':
10348 // Validate & create a 12-bit unsigned immediate operand.
10349 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10350 uint64_t CVal = C->getZExtValue();
10351 if (isUInt<12>(CVal))
10352 Ops.push_back(
10353 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
10354 }
10355 return;
10356 default:
10357 break;
10358 }
10359 }
10361}
10362
10363#define GET_REGISTER_MATCHER
10364#include "LoongArchGenAsmMatcher.inc"
10365
10368 const MachineFunction &MF) const {
10369 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
10370 std::string NewRegName = Name.second.str();
10371 Register Reg = MatchRegisterAltName(NewRegName);
10372 if (!Reg)
10373 Reg = MatchRegisterName(NewRegName);
10374 if (!Reg)
10375 return Reg;
10376 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
10377 if (!ReservedRegs.test(Reg))
10378 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
10379 StringRef(RegName) + "\"."));
10380 return Reg;
10381}
10382
10384 EVT VT, SDValue C) const {
10385 // TODO: Support vectors.
10386 if (!VT.isScalarInteger())
10387 return false;
10388
10389 // Omit the optimization if the data size exceeds GRLen.
10390 if (VT.getSizeInBits() > Subtarget.getGRLen())
10391 return false;
10392
10393 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
10394 const APInt &Imm = ConstNode->getAPIntValue();
10395 // Break MUL into (SLLI + ADD/SUB) or ALSL.
10396 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
10397 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
10398 return true;
10399 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
10400 if (ConstNode->hasOneUse() &&
10401 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
10402 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
10403 return true;
10404 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
10405 // in which the immediate has two set bits. Or Break (MUL x, imm)
10406 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
10407 // equals to (1 << s0) - (1 << s1).
10408 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
10409 unsigned Shifts = Imm.countr_zero();
10410 // Reject immediates which can be composed via a single LUI.
10411 if (Shifts >= 12)
10412 return false;
10413 // Reject multiplications can be optimized to
10414 // (SLLI (ALSL x, x, 1/2/3/4), s).
10415 APInt ImmPop = Imm.ashr(Shifts);
10416 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
10417 return false;
10418 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
10419 // since it needs one more instruction than other 3 cases.
10420 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
10421 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
10422 (ImmSmall - Imm).isPowerOf2())
10423 return true;
10424 }
10425 }
10426
10427 return false;
10428}
10429
10431 const AddrMode &AM,
10432 Type *Ty, unsigned AS,
10433 Instruction *I) const {
10434 // LoongArch has four basic addressing modes:
10435 // 1. reg
10436 // 2. reg + 12-bit signed offset
10437 // 3. reg + 14-bit signed offset left-shifted by 2
10438 // 4. reg1 + reg2
10439 // TODO: Add more checks after support vector extension.
10440
10441 // No global is ever allowed as a base.
10442 if (AM.BaseGV)
10443 return false;
10444
10445 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
10446 // with `UAL` feature.
10447 if (!isInt<12>(AM.BaseOffs) &&
10448 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
10449 return false;
10450
10451 switch (AM.Scale) {
10452 case 0:
10453 // "r+i" or just "i", depending on HasBaseReg.
10454 break;
10455 case 1:
10456 // "r+r+i" is not allowed.
10457 if (AM.HasBaseReg && AM.BaseOffs)
10458 return false;
10459 // Otherwise we have "r+r" or "r+i".
10460 break;
10461 case 2:
10462 // "2*r+r" or "2*r+i" is not allowed.
10463 if (AM.HasBaseReg || AM.BaseOffs)
10464 return false;
10465 // Allow "2*r" as "r+r".
10466 break;
10467 default:
10468 return false;
10469 }
10470
10471 return true;
10472}
10473
10475 return isInt<12>(Imm);
10476}
10477
10479 return isInt<12>(Imm);
10480}
10481
10483 // Zexts are free if they can be combined with a load.
10484 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
10485 // poorly with type legalization of compares preferring sext.
10486 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
10487 EVT MemVT = LD->getMemoryVT();
10488 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
10489 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
10490 LD->getExtensionType() == ISD::ZEXTLOAD))
10491 return true;
10492 }
10493
10494 return TargetLowering::isZExtFree(Val, VT2);
10495}
10496
10498 EVT DstVT) const {
10499 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
10500}
10501
10503 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
10504}
10505
10507 // TODO: Support vectors.
10508 if (Y.getValueType().isVector())
10509 return false;
10510
10511 return !isa<ConstantSDNode>(Y);
10512}
10513
10515 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
10516 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
10517}
10518
10520 Type *Ty, bool IsSigned) const {
10521 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
10522 return true;
10523
10524 return IsSigned;
10525}
10526
10528 // Return false to suppress the unnecessary extensions if the LibCall
10529 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
10530 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
10531 Type.getSizeInBits() < Subtarget.getGRLen()))
10532 return false;
10533 return true;
10534}
10535
10536// memcpy, and other memory intrinsics, typically tries to use wider load/store
10537// if the source/dest is aligned and the copy size is large enough. We therefore
10538// want to align such objects passed to memory intrinsics.
10540 unsigned &MinSize,
10541 Align &PrefAlign) const {
10542 if (!isa<MemIntrinsic>(CI))
10543 return false;
10544
10545 if (Subtarget.is64Bit()) {
10546 MinSize = 8;
10547 PrefAlign = Align(8);
10548 } else {
10549 MinSize = 4;
10550 PrefAlign = Align(4);
10551 }
10552
10553 return true;
10554}
10555
10558 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
10559 VT.getVectorElementType() != MVT::i1)
10560 return TypeWidenVector;
10561
10563}
10564
10565bool LoongArchTargetLowering::splitValueIntoRegisterParts(
10566 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
10567 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
10568 bool IsABIRegCopy = CC.has_value();
10569 EVT ValueVT = Val.getValueType();
10570
10571 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
10572 PartVT == MVT::f32) {
10573 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
10574 // nan, and cast to f32.
10575 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
10576 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
10577 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
10578 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
10579 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
10580 Parts[0] = Val;
10581 return true;
10582 }
10583
10584 return false;
10585}
10586
10587SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
10588 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
10589 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
10590 bool IsABIRegCopy = CC.has_value();
10591
10592 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
10593 PartVT == MVT::f32) {
10594 SDValue Val = Parts[0];
10595
10596 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
10597 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
10598 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
10599 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
10600 return Val;
10601 }
10602
10603 return SDValue();
10604}
10605
10606MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
10607 CallingConv::ID CC,
10608 EVT VT) const {
10609 // Use f32 to pass f16.
10610 if (VT == MVT::f16 && Subtarget.hasBasicF())
10611 return MVT::f32;
10612
10614}
10615
10616unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
10617 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
10618 // Use f32 to pass f16.
10619 if (VT == MVT::f16 && Subtarget.hasBasicF())
10620 return 1;
10621
10623}
10624
10626 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
10627 const SelectionDAG &DAG, unsigned Depth) const {
10628 unsigned Opc = Op.getOpcode();
10629 Known.resetAll();
10630 switch (Opc) {
10631 default:
10632 break;
10633 case LoongArchISD::VPICK_ZEXT_ELT: {
10634 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
10635 EVT VT = cast<VTSDNode>(Op->getOperand(2))->getVT();
10636 unsigned VTBits = VT.getScalarSizeInBits();
10637 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
10638 Known.Zero.setBitsFrom(VTBits);
10639 break;
10640 }
10641 }
10642}
10643
10645 SDValue Op, const APInt &OriginalDemandedBits,
10646 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
10647 unsigned Depth) const {
10648 EVT VT = Op.getValueType();
10649 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
10650 unsigned Opc = Op.getOpcode();
10651 switch (Opc) {
10652 default:
10653 break;
10654 case LoongArchISD::VMSKLTZ:
10655 case LoongArchISD::XVMSKLTZ: {
10656 SDValue Src = Op.getOperand(0);
10657 MVT SrcVT = Src.getSimpleValueType();
10658 unsigned SrcBits = SrcVT.getScalarSizeInBits();
10659 unsigned NumElts = SrcVT.getVectorNumElements();
10660
10661 // If we don't need the sign bits at all just return zero.
10662 if (OriginalDemandedBits.countr_zero() >= NumElts)
10663 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
10664
10665 // Only demand the vector elements of the sign bits we need.
10666 APInt KnownUndef, KnownZero;
10667 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
10668 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
10669 TLO, Depth + 1))
10670 return true;
10671
10672 Known.Zero = KnownZero.zext(BitWidth);
10673 Known.Zero.setHighBits(BitWidth - NumElts);
10674
10675 // [X]VMSKLTZ only uses the MSB from each vector element.
10676 KnownBits KnownSrc;
10677 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
10678 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
10679 Depth + 1))
10680 return true;
10681
10682 if (KnownSrc.One[SrcBits - 1])
10683 Known.One.setLowBits(NumElts);
10684 else if (KnownSrc.Zero[SrcBits - 1])
10685 Known.Zero.setLowBits(NumElts);
10686
10687 // Attempt to avoid multi-use ops if we don't need anything from it.
10689 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
10690 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
10691 return false;
10692 }
10693 }
10694
10696 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
10697}
10698
10700 unsigned Opc = VecOp.getOpcode();
10701
10702 // Assume target opcodes can't be scalarized.
10703 // TODO - do we have any exceptions?
10704 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
10705 return false;
10706
10707 // If the vector op is not supported, try to convert to scalar.
10708 EVT VecVT = VecOp.getValueType();
10710 return true;
10711
10712 // If the vector op is supported, but the scalar op is not, the transform may
10713 // not be worthwhile.
10714 EVT ScalarVT = VecVT.getScalarType();
10715 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
10716}
10717
10719 unsigned Index) const {
10721 return false;
10722
10723 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
10724 return Index == 0;
10725}
10726
10728 unsigned Index) const {
10729 EVT EltVT = VT.getScalarType();
10730
10731 // Extract a scalar FP value from index 0 of a vector is free.
10732 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
10733}
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1499
bool isZero() const
Definition APFloat.h:1512
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1075
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:967
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1708
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:494
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2847
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...