LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
357 }
358 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
360 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
362 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
365 }
366 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
374 VT, Expand);
382 }
384 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
385 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
386 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
387 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
388
389 for (MVT VT :
390 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
391 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
401 }
404 // We want to legalize this to an f64 load rather than an i64 load.
405 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
406 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16})
408 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v16i32, MVT::v8i64,
409 MVT::v16i64})
411 }
412
413 // Set operations for 'LASX' feature.
414
415 if (Subtarget.hasExtLASX()) {
416 for (MVT VT : LASXVTs) {
420
426
430 }
431 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
434 Legal);
436 VT, Legal);
443 Expand);
459 }
460 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
462 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
464 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
467 }
468 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
476 VT, Expand);
484 }
487 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) {
490 }
491 for (MVT VT :
492 {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) {
495 }
496 }
497
498 // Set DAG combine for LA32 and LA64.
499 if (Subtarget.hasBasicF()) {
501 }
502
507
508 // Set DAG combine for 'LSX' feature.
509
510 if (Subtarget.hasExtLSX()) {
519 }
520
521 // Set DAG combine for 'LASX' feature.
522 if (Subtarget.hasExtLASX()) {
527 }
528
529 // Compute derived properties from the register classes.
530 computeRegisterProperties(Subtarget.getRegisterInfo());
531
533
536
537 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
538
540
541 // Function alignments.
543 // Set preferred alignments.
544 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
545 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
546 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
547
548 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
549 if (Subtarget.hasLAMCAS())
551
552 if (Subtarget.hasSCQ()) {
555 }
556
557 // Disable strict node mutation.
558 IsStrictFPEnabled = true;
559}
560
562 const GlobalAddressSDNode *GA) const {
563 // In order to maximise the opportunity for common subexpression elimination,
564 // keep a separate ADD node for the global address offset instead of folding
565 // it in the global address node. Later peephole optimisations may choose to
566 // fold it back in when profitable.
567 return false;
568}
569
571 SelectionDAG &DAG) const {
572 switch (Op.getOpcode()) {
574 return lowerATOMIC_FENCE(Op, DAG);
576 return lowerEH_DWARF_CFA(Op, DAG);
578 return lowerGlobalAddress(Op, DAG);
580 return lowerGlobalTLSAddress(Op, DAG);
582 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
584 return lowerINTRINSIC_W_CHAIN(Op, DAG);
586 return lowerINTRINSIC_VOID(Op, DAG);
588 return lowerBlockAddress(Op, DAG);
589 case ISD::JumpTable:
590 return lowerJumpTable(Op, DAG);
591 case ISD::SHL_PARTS:
592 return lowerShiftLeftParts(Op, DAG);
593 case ISD::SRA_PARTS:
594 return lowerShiftRightParts(Op, DAG, true);
595 case ISD::SRL_PARTS:
596 return lowerShiftRightParts(Op, DAG, false);
598 return lowerConstantPool(Op, DAG);
599 case ISD::FP_TO_SINT:
600 return lowerFP_TO_SINT(Op, DAG);
601 case ISD::BITCAST:
602 return lowerBITCAST(Op, DAG);
603 case ISD::UINT_TO_FP:
604 return lowerUINT_TO_FP(Op, DAG);
605 case ISD::SINT_TO_FP:
606 return lowerSINT_TO_FP(Op, DAG);
607 case ISD::VASTART:
608 return lowerVASTART(Op, DAG);
609 case ISD::FRAMEADDR:
610 return lowerFRAMEADDR(Op, DAG);
611 case ISD::RETURNADDR:
612 return lowerRETURNADDR(Op, DAG);
614 return lowerWRITE_REGISTER(Op, DAG);
616 return lowerINSERT_VECTOR_ELT(Op, DAG);
618 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
620 return lowerBUILD_VECTOR(Op, DAG);
622 return lowerCONCAT_VECTORS(Op, DAG);
624 return lowerVECTOR_SHUFFLE(Op, DAG);
625 case ISD::BITREVERSE:
626 return lowerBITREVERSE(Op, DAG);
628 return lowerSCALAR_TO_VECTOR(Op, DAG);
629 case ISD::PREFETCH:
630 return lowerPREFETCH(Op, DAG);
631 case ISD::SELECT:
632 return lowerSELECT(Op, DAG);
633 case ISD::BRCOND:
634 return lowerBRCOND(Op, DAG);
635 case ISD::FP_TO_FP16:
636 return lowerFP_TO_FP16(Op, DAG);
637 case ISD::FP16_TO_FP:
638 return lowerFP16_TO_FP(Op, DAG);
639 case ISD::FP_TO_BF16:
640 return lowerFP_TO_BF16(Op, DAG);
641 case ISD::BF16_TO_FP:
642 return lowerBF16_TO_FP(Op, DAG);
644 return lowerVECREDUCE_ADD(Op, DAG);
645 case ISD::ROTL:
646 case ISD::ROTR:
647 return lowerRotate(Op, DAG);
655 return lowerVECREDUCE(Op, DAG);
656 case ISD::ConstantFP:
657 return lowerConstantFP(Op, DAG);
658 case ISD::SETCC:
659 return lowerSETCC(Op, DAG);
660 case ISD::FP_ROUND:
661 return lowerFP_ROUND(Op, DAG);
662 case ISD::FP_EXTEND:
663 return lowerFP_EXTEND(Op, DAG);
665 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
667 return lowerDYNAMIC_STACKALLOC(Op, DAG);
668 }
669 return SDValue();
670}
671
672// Helper to attempt to return a cheaper, bit-inverted version of \p V.
674 // TODO: don't always ignore oneuse constraints.
675 V = peekThroughBitcasts(V);
676 EVT VT = V.getValueType();
677
678 // Match not(xor X, -1) -> X.
679 if (V.getOpcode() == ISD::XOR &&
680 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
681 isAllOnesConstant(V.getOperand(1))))
682 return V.getOperand(0);
683
684 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
685 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
686 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
687 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
688 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
689 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
690 V.getOperand(1));
691 }
692 }
693
694 // Match not(SplatVector(not(X)) -> SplatVector(X).
695 if (V.getOpcode() == ISD::BUILD_VECTOR) {
696 if (SDValue SplatValue =
697 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
698 if (!V->isOnlyUserOf(SplatValue.getNode()))
699 return SDValue();
700
701 if (SDValue Not = isNOT(SplatValue, DAG)) {
702 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
703 return DAG.getSplat(VT, SDLoc(Not), Not);
704 }
705 }
706 }
707
708 // Match not(or(not(X),not(Y))) -> and(X, Y).
709 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
710 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
711 // TODO: Handle cases with single NOT operand -> VANDN
712 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
713 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
714 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
715 DAG.getBitcast(VT, Op1));
716 }
717
718 // TODO: Add more matching patterns. Such as,
719 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
720 // not(slt(C, X)) -> slt(X - 1, C)
721 return SDValue();
722}
723
724// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
725// LoongArchISD::VFCVT. For example:
726// x1 = fp_round x, 0
727// y1 = fp_round y, 0
728// z = concat_vectors x1, y1
729// Or
730// x1 = LoongArch::VFCVT undef, x
731// y1 = LoongArch::VFCVT undef, y
732// z = LoongArchISD::VPACKEV y1, x1; or LoongArchISD::VPERMI y1, x1, 68
733// can be combined to:
734// z = LoongArch::VFCVT y, x
736 const LoongArchSubtarget &Subtarget) {
737 assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
738 (N->getOpcode() == LoongArchISD::VPACKEV) ||
739 (N->getOpcode() == LoongArchISD::VPERMI)) &&
740 "Invalid Node");
741
742 SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
743 SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
744 unsigned Opcode0 = Op0.getOpcode();
745 unsigned Opcode1 = Op1.getOpcode();
746 if (Opcode0 != Opcode1)
747 return SDValue();
748
749 if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
750 return SDValue();
751
752 // Check if two nodes have only one use.
753 if (!Op0.hasOneUse() || !Op1.hasOneUse())
754 return SDValue();
755
756 EVT VT = N.getValueType();
757 EVT SVT0 = Op0.getValueType();
758 EVT SVT1 = Op1.getValueType();
759 // Check if two nodes have the same result type.
760 if (SVT0 != SVT1)
761 return SDValue();
762
763 // Check if two nodes have the same operand type.
764 EVT SSVT0 = Op0.getOperand(0).getValueType();
765 EVT SSVT1 = Op1.getOperand(0).getValueType();
766 if (SSVT0 != SSVT1)
767 return SDValue();
768
769 if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
770 if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
771 SSVT0 == MVT::v4f64) {
772 // A vector_shuffle is required in the final step, as xvfcvt instruction
773 // operates on each 128-bit segament as a lane.
774 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
775 Op1.getOperand(0), Op0.getOperand(0));
776 SDValue Undef = DAG.getUNDEF(Res.getValueType());
777 // After VFCVT, the high part of Res comes from the high parts of Op0 and
778 // Op1, and the low part comes from the low parts of Op0 and Op1. However,
779 // the desired order requires Op0 to fully occupy the lower half and Op1
780 // the upper half of Res. The Mask reorders the elements of Res to achieve
781 // this:
782 // - The first four elements (0, 1, 4, 5) come from Op0.
783 // - The next four elements (2, 3, 6, 7) come from Op1.
784 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
785 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
786 return DAG.getBitcast(VT, Res);
787 }
788 }
789
790 if ((N->getOpcode() == LoongArchISD::VPACKEV ||
791 N->getOpcode() == LoongArchISD::VPERMI) &&
792 Opcode0 == LoongArchISD::VFCVT) {
793 // For VPACKEV or VPERMI, check if the first operation of VFCVT is undef.
794 if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
795 return SDValue();
796
797 if (!Subtarget.hasExtLSX() || SVT0 != MVT::v4f32 || SSVT0 != MVT::v2f64)
798 return SDValue();
799
800 if (N->getOpcode() == LoongArchISD::VPACKEV &&
801 (VT == MVT::v2i64 || VT == MVT::v2f64)) {
802 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
803 Op0.getOperand(1), Op1.getOperand(1));
804 return DAG.getBitcast(VT, Res);
805 }
806
807 if (N->getOpcode() == LoongArchISD::VPERMI && VT == MVT::v4f32) {
808 int64_t Imm = cast<ConstantSDNode>(N->getOperand(2))->getSExtValue();
809 if (Imm != 68)
810 return SDValue();
811 return DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Op0.getOperand(1),
812 Op1.getOperand(1));
813 }
814 }
815
816 return SDValue();
817}
818
819SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
820 SelectionDAG &DAG) const {
821 SDLoc DL(Op);
822 SDValue In = Op.getOperand(0);
823 MVT VT = Op.getSimpleValueType();
824 MVT SVT = In.getSimpleValueType();
825
826 if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
827 SDValue Lo, Hi;
828 std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
829 return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
830 }
831
832 return SDValue();
833}
834
835SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op,
836 SelectionDAG &DAG) const {
837
838 SDLoc DL(Op);
839 EVT VT = Op.getValueType();
840 SDValue Src = Op->getOperand(0);
841 EVT SVT = Src.getValueType();
842
843 bool V2F32ToV2F64 =
844 VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX();
845 bool V4F32ToV4F64 =
846 VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX();
847 if (!V2F32ToV2F64 && !V4F32ToV4F64)
848 return SDValue();
849
850 // Check if Op is the high part of vector.
851 auto CheckVecHighPart = [](SDValue Op) {
853 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
854 SDValue SOp = Op.getOperand(0);
855 EVT SVT = SOp.getValueType();
856 if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0))
857 return SDValue();
858
859 const uint64_t Imm = Op.getConstantOperandVal(1);
860 if (Imm == SVT.getVectorNumElements() / 2)
861 return SOp;
862 return SDValue();
863 }
864 return SDValue();
865 };
866
867 unsigned Opcode;
868 SDValue VFCVTOp;
869 EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT();
870 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
871
872 // If the operand of ISD::FP_EXTEND comes from the high part of vector,
873 // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL.
874 if (SDValue V = CheckVecHighPart(Src)) {
875 assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() &&
876 "Unexpected wide vector");
877 Opcode = LoongArchISD::VFCVTH;
878 VFCVTOp = DAG.getBitcast(WideOpVT, V);
879 } else {
880 Opcode = LoongArchISD::VFCVTL;
881 VFCVTOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideOpVT,
882 DAG.getUNDEF(WideOpVT), Src, ZeroIdx);
883 }
884
885 // v2f64 = fp_extend v2f32
886 if (V2F32ToV2F64)
887 return DAG.getNode(Opcode, DL, VT, VFCVTOp);
888
889 // v4f64 = fp_extend v4f32
890 if (V4F32ToV4F64) {
891 // XVFCVT instruction operates on each 128-bit segment as a lane, so a
892 // vector_shuffle is required firstly.
893 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
894 SDValue Res = DAG.getVectorShuffle(WideOpVT, DL, VFCVTOp,
895 DAG.getUNDEF(WideOpVT), Mask);
896 Res = DAG.getNode(Opcode, DL, VT, Res);
897 return Res;
898 }
899
900 return SDValue();
901}
902
903SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
904 SelectionDAG &DAG) const {
905 EVT VT = Op.getValueType();
906 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
907 const APFloat &FPVal = CFP->getValueAPF();
908 SDLoc DL(CFP);
909
910 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
911 (VT == MVT::f64 && Subtarget.hasBasicD()));
912
913 // If value is 0.0 or -0.0, just ignore it.
914 if (FPVal.isZero())
915 return SDValue();
916
917 // If lsx enabled, use cheaper 'vldi' instruction if possible.
918 if (isFPImmVLDILegal(FPVal, VT))
919 return SDValue();
920
921 // Construct as integer, and move to float register.
922 APInt INTVal = FPVal.bitcastToAPInt();
923
924 // If more than MaterializeFPImmInsNum instructions will be used to
925 // generate the INTVal and move it to float register, fallback to
926 // use floating point load from the constant pool.
928 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
929 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
930 return SDValue();
931
932 switch (VT.getSimpleVT().SimpleTy) {
933 default:
934 llvm_unreachable("Unexpected floating point type!");
935 break;
936 case MVT::f32: {
937 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
938 if (Subtarget.is64Bit())
939 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
940 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
941 : LoongArchISD::MOVGR2FR_W,
942 DL, VT, NewVal);
943 }
944 case MVT::f64: {
945 if (Subtarget.is64Bit()) {
946 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
947 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
948 }
949 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
950 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
951 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
952 }
953 }
954
955 return SDValue();
956}
957
958// Ensure SETCC result and operand have the same bit width; isel does not
959// support mismatched widths.
960SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
961 SelectionDAG &DAG) const {
962 SDLoc DL(Op);
963 EVT ResultVT = Op.getValueType();
964 EVT OperandVT = Op.getOperand(0).getValueType();
965
966 EVT SetCCResultVT =
967 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
968
969 if (ResultVT == SetCCResultVT)
970 return Op;
971
972 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
973 "SETCC operands must have the same type!");
974
975 SDValue SetCCNode =
976 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
977 Op.getOperand(1), Op.getOperand(2));
978
979 if (ResultVT.bitsGT(SetCCResultVT))
980 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
981 else if (ResultVT.bitsLT(SetCCResultVT))
982 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
983
984 return SetCCNode;
985}
986
987// Lower sext_invec using vslti instructions.
988// For example:
989// %b = sext <4 x i16> %a to <4 x i32>
990// can be lowered to:
991// VSLTI_H vr2, vr1, 0
992// VILVL.H vr1, vr2, vr1
993SDValue LoongArchTargetLowering::lowerSIGN_EXTEND_VECTOR_INREG(
994 SDValue Op, SelectionDAG &DAG) const {
995 SDLoc DL(Op);
996 SDValue Src = Op.getOperand(0);
997 MVT SrcVT = Src.getSimpleValueType();
998 MVT DstVT = Op.getSimpleValueType();
999
1000 if (!SrcVT.is128BitVector())
1001 return SDValue();
1002
1003 // lower to VSLTI + VILVL if extend could be done in single step.
1004 if (DstVT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits() == 2) {
1005 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1006 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Src, Zero,
1007 DAG.getCondCode(ISD::SETLT));
1008 SDValue LoInterleaved =
1009 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Src);
1010
1011 return DAG.getBitcast(DstVT, LoInterleaved);
1012 }
1013
1014 return SDValue();
1015}
1016
1017// Lower vecreduce_add using vhaddw instructions.
1018// For Example:
1019// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
1020// can be lowered to:
1021// VHADDW_D_W vr0, vr0, vr0
1022// VHADDW_Q_D vr0, vr0, vr0
1023// VPICKVE2GR_D a0, vr0, 0
1024// ADDI_W a0, a0, 0
1025SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
1026 SelectionDAG &DAG) const {
1027
1028 SDLoc DL(Op);
1029 MVT OpVT = Op.getSimpleValueType();
1030 SDValue Val = Op.getOperand(0);
1031
1032 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1033 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1034 unsigned ResBits = OpVT.getScalarSizeInBits();
1035
1036 unsigned LegalVecSize = 128;
1037 bool isLASX256Vector =
1038 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
1039
1040 // Ensure operand type legal or enable it legal.
1041 while (!isTypeLegal(Val.getSimpleValueType())) {
1042 Val = DAG.WidenVector(Val, DL);
1043 }
1044
1045 // NumEles is designed for iterations count, v4i32 for LSX
1046 // and v8i32 for LASX should have the same count.
1047 if (isLASX256Vector) {
1048 NumEles /= 2;
1049 LegalVecSize = 256;
1050 }
1051
1052 EleBits *= 2;
1053 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
1054 EleBits = std::min(EleBits, 64u);
1055 MVT IntTy = MVT::getIntegerVT(EleBits);
1056 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
1057 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
1058 }
1059
1060 if (isLASX256Vector) {
1061 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
1062 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
1063 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
1064 }
1065
1066 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
1067 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1068 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
1069}
1070
1071// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
1072// For Example:
1073// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
1074// can be lowered to:
1075// VBSRL_V vr1, vr0, 8
1076// VMAX_W vr0, vr1, vr0
1077// VBSRL_V vr1, vr0, 4
1078// VMAX_W vr0, vr1, vr0
1079// VPICKVE2GR_W a0, vr0, 0
1080// For 256 bit vector, it is illegal and will be spilt into
1081// two 128 bit vector by default then processed by this.
1082SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
1083 SelectionDAG &DAG) const {
1084 SDLoc DL(Op);
1085
1086 MVT OpVT = Op.getSimpleValueType();
1087 SDValue Val = Op.getOperand(0);
1088
1089 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1090 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1091
1092 // Ensure operand type legal or enable it legal.
1093 while (!isTypeLegal(Val.getSimpleValueType())) {
1094 Val = DAG.WidenVector(Val, DL);
1095 }
1096
1097 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
1098 MVT VecTy = Val.getSimpleValueType();
1099 MVT GRLenVT = Subtarget.getGRLenVT();
1100
1101 for (int i = NumEles; i > 1; i /= 2) {
1102 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
1103 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
1104 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
1105 }
1106
1107 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1108 DAG.getConstant(0, DL, GRLenVT));
1109}
1110
1111SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
1112 SelectionDAG &DAG) const {
1113 unsigned IsData = Op.getConstantOperandVal(4);
1114
1115 // We don't support non-data prefetch.
1116 // Just preserve the chain.
1117 if (!IsData)
1118 return Op.getOperand(0);
1119
1120 return Op;
1121}
1122
1123SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
1124 SelectionDAG &DAG) const {
1125 MVT VT = Op.getSimpleValueType();
1126 assert(VT.isVector() && "Unexpected type");
1127
1128 SDLoc DL(Op);
1129 SDValue R = Op.getOperand(0);
1130 SDValue Amt = Op.getOperand(1);
1131 unsigned Opcode = Op.getOpcode();
1132 unsigned EltSizeInBits = VT.getScalarSizeInBits();
1133
1134 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
1135 if (V.getOpcode() != ISD::BUILD_VECTOR)
1136 return false;
1137 if (SDValue SplatValue =
1138 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
1139 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
1140 CstSplatValue = C->getAPIntValue();
1141 return true;
1142 }
1143 }
1144 return false;
1145 };
1146
1147 // Check for constant splat rotation amount.
1148 APInt CstSplatValue;
1149 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
1150 bool isROTL = Opcode == ISD::ROTL;
1151
1152 // Check for splat rotate by zero.
1153 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
1154 return R;
1155
1156 // LoongArch targets always prefer ISD::ROTR.
1157 if (isROTL) {
1158 SDValue Zero = DAG.getConstant(0, DL, VT);
1159 return DAG.getNode(ISD::ROTR, DL, VT, R,
1160 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
1161 }
1162
1163 // Rotate by a immediate.
1164 if (IsCstSplat) {
1165 // ISD::ROTR: Attemp to rotate by a positive immediate.
1166 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
1167 if (SDValue Urem =
1168 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
1169 return DAG.getNode(Opcode, DL, VT, R, Urem);
1170 }
1171
1172 return Op;
1173}
1174
1175// Return true if Val is equal to (setcc LHS, RHS, CC).
1176// Return false if Val is the inverse of (setcc LHS, RHS, CC).
1177// Otherwise, return std::nullopt.
1178static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
1179 ISD::CondCode CC, SDValue Val) {
1180 assert(Val->getOpcode() == ISD::SETCC);
1181 SDValue LHS2 = Val.getOperand(0);
1182 SDValue RHS2 = Val.getOperand(1);
1183 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
1184
1185 if (LHS == LHS2 && RHS == RHS2) {
1186 if (CC == CC2)
1187 return true;
1188 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1189 return false;
1190 } else if (LHS == RHS2 && RHS == LHS2) {
1192 if (CC == CC2)
1193 return true;
1194 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1195 return false;
1196 }
1197
1198 return std::nullopt;
1199}
1200
1202 const LoongArchSubtarget &Subtarget) {
1203 SDValue CondV = N->getOperand(0);
1204 SDValue TrueV = N->getOperand(1);
1205 SDValue FalseV = N->getOperand(2);
1206 MVT VT = N->getSimpleValueType(0);
1207 SDLoc DL(N);
1208
1209 // (select c, -1, y) -> -c | y
1210 if (isAllOnesConstant(TrueV)) {
1211 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1212 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
1213 }
1214 // (select c, y, -1) -> (c-1) | y
1215 if (isAllOnesConstant(FalseV)) {
1216 SDValue Neg =
1217 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1218 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
1219 }
1220
1221 // (select c, 0, y) -> (c-1) & y
1222 if (isNullConstant(TrueV)) {
1223 SDValue Neg =
1224 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1225 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
1226 }
1227 // (select c, y, 0) -> -c & y
1228 if (isNullConstant(FalseV)) {
1229 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1230 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
1231 }
1232
1233 // select c, ~x, x --> xor -c, x
1234 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
1235 const APInt &TrueVal = TrueV->getAsAPIntVal();
1236 const APInt &FalseVal = FalseV->getAsAPIntVal();
1237 if (~TrueVal == FalseVal) {
1238 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1239 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
1240 }
1241 }
1242
1243 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1244 // when both truev and falsev are also setcc.
1245 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1246 FalseV.getOpcode() == ISD::SETCC) {
1247 SDValue LHS = CondV.getOperand(0);
1248 SDValue RHS = CondV.getOperand(1);
1249 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1250
1251 // (select x, x, y) -> x | y
1252 // (select !x, x, y) -> x & y
1253 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1254 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1255 DAG.getFreeze(FalseV));
1256 }
1257 // (select x, y, x) -> x & y
1258 // (select !x, y, x) -> x | y
1259 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1260 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1261 DAG.getFreeze(TrueV), FalseV);
1262 }
1263 }
1264
1265 return SDValue();
1266}
1267
1268// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1269// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1270// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1271// being `0` or `-1`. In such cases we can replace `select` with `and`.
1272// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1273// than `c0`?
1274static SDValue
1276 const LoongArchSubtarget &Subtarget) {
1277 unsigned SelOpNo = 0;
1278 SDValue Sel = BO->getOperand(0);
1279 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1280 SelOpNo = 1;
1281 Sel = BO->getOperand(1);
1282 }
1283
1284 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1285 return SDValue();
1286
1287 unsigned ConstSelOpNo = 1;
1288 unsigned OtherSelOpNo = 2;
1289 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1290 ConstSelOpNo = 2;
1291 OtherSelOpNo = 1;
1292 }
1293 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1294 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1295 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1296 return SDValue();
1297
1298 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1299 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1300 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1301 return SDValue();
1302
1303 SDLoc DL(Sel);
1304 EVT VT = BO->getValueType(0);
1305
1306 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1307 if (SelOpNo == 1)
1308 std::swap(NewConstOps[0], NewConstOps[1]);
1309
1310 SDValue NewConstOp =
1311 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1312 if (!NewConstOp)
1313 return SDValue();
1314
1315 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1316 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1317 return SDValue();
1318
1319 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1320 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1321 if (SelOpNo == 1)
1322 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1323 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1324
1325 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1326 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1327 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1328}
1329
1330// Changes the condition code and swaps operands if necessary, so the SetCC
1331// operation matches one of the comparisons supported directly by branches
1332// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1333// compare with 1/-1.
1335 ISD::CondCode &CC, SelectionDAG &DAG) {
1336 // If this is a single bit test that can't be handled by ANDI, shift the
1337 // bit to be tested to the MSB and perform a signed compare with 0.
1338 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1339 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1340 isa<ConstantSDNode>(LHS.getOperand(1))) {
1341 uint64_t Mask = LHS.getConstantOperandVal(1);
1342 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1343 unsigned ShAmt = 0;
1344 if (isPowerOf2_64(Mask)) {
1345 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1346 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1347 } else {
1348 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1349 }
1350
1351 LHS = LHS.getOperand(0);
1352 if (ShAmt != 0)
1353 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1354 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1355 return;
1356 }
1357 }
1358
1359 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1360 int64_t C = RHSC->getSExtValue();
1361 switch (CC) {
1362 default:
1363 break;
1364 case ISD::SETGT:
1365 // Convert X > -1 to X >= 0.
1366 if (C == -1) {
1367 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1368 CC = ISD::SETGE;
1369 return;
1370 }
1371 break;
1372 case ISD::SETLT:
1373 // Convert X < 1 to 0 >= X.
1374 if (C == 1) {
1375 RHS = LHS;
1376 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1377 CC = ISD::SETGE;
1378 return;
1379 }
1380 break;
1381 }
1382 }
1383
1384 switch (CC) {
1385 default:
1386 break;
1387 case ISD::SETGT:
1388 case ISD::SETLE:
1389 case ISD::SETUGT:
1390 case ISD::SETULE:
1392 std::swap(LHS, RHS);
1393 break;
1394 }
1395}
1396
1397SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1398 SelectionDAG &DAG) const {
1399 SDValue CondV = Op.getOperand(0);
1400 SDValue TrueV = Op.getOperand(1);
1401 SDValue FalseV = Op.getOperand(2);
1402 SDLoc DL(Op);
1403 MVT VT = Op.getSimpleValueType();
1404 MVT GRLenVT = Subtarget.getGRLenVT();
1405
1406 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1407 return V;
1408
1409 if (Op.hasOneUse()) {
1410 unsigned UseOpc = Op->user_begin()->getOpcode();
1411 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1412 SDNode *BinOp = *Op->user_begin();
1413 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1414 DAG, Subtarget)) {
1415 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1416 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1417 // may return a constant node and cause crash in lowerSELECT.
1418 if (NewSel.getOpcode() == ISD::SELECT)
1419 return lowerSELECT(NewSel, DAG);
1420 return NewSel;
1421 }
1422 }
1423 }
1424
1425 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1426 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1427 // (select condv, truev, falsev)
1428 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1429 if (CondV.getOpcode() != ISD::SETCC ||
1430 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1431 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1432 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1433
1434 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1435
1436 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1437 }
1438
1439 // If the CondV is the output of a SETCC node which operates on GRLenVT
1440 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1441 // to take advantage of the integer compare+branch instructions. i.e.: (select
1442 // (setcc lhs, rhs, cc), truev, falsev)
1443 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1444 SDValue LHS = CondV.getOperand(0);
1445 SDValue RHS = CondV.getOperand(1);
1446 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1447
1448 // Special case for a select of 2 constants that have a difference of 1.
1449 // Normally this is done by DAGCombine, but if the select is introduced by
1450 // type legalization or op legalization, we miss it. Restricting to SETLT
1451 // case for now because that is what signed saturating add/sub need.
1452 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1453 // but we would probably want to swap the true/false values if the condition
1454 // is SETGE/SETLE to avoid an XORI.
1455 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1456 CCVal == ISD::SETLT) {
1457 const APInt &TrueVal = TrueV->getAsAPIntVal();
1458 const APInt &FalseVal = FalseV->getAsAPIntVal();
1459 if (TrueVal - 1 == FalseVal)
1460 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1461 if (TrueVal + 1 == FalseVal)
1462 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1463 }
1464
1465 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1466 // 1 < x ? x : 1 -> 0 < x ? x : 1
1467 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1468 RHS == TrueV && LHS == FalseV) {
1469 LHS = DAG.getConstant(0, DL, VT);
1470 // 0 <u x is the same as x != 0.
1471 if (CCVal == ISD::SETULT) {
1472 std::swap(LHS, RHS);
1473 CCVal = ISD::SETNE;
1474 }
1475 }
1476
1477 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1478 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1479 RHS == FalseV) {
1480 RHS = DAG.getConstant(0, DL, VT);
1481 }
1482
1483 SDValue TargetCC = DAG.getCondCode(CCVal);
1484
1485 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1486 // (select (setcc lhs, rhs, CC), constant, falsev)
1487 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1488 std::swap(TrueV, FalseV);
1489 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1490 }
1491
1492 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1493 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1494}
1495
1496SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1497 SelectionDAG &DAG) const {
1498 SDValue CondV = Op.getOperand(1);
1499 SDLoc DL(Op);
1500 MVT GRLenVT = Subtarget.getGRLenVT();
1501
1502 if (CondV.getOpcode() == ISD::SETCC) {
1503 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1504 SDValue LHS = CondV.getOperand(0);
1505 SDValue RHS = CondV.getOperand(1);
1506 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1507
1508 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1509
1510 SDValue TargetCC = DAG.getCondCode(CCVal);
1511 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1512 Op.getOperand(0), LHS, RHS, TargetCC,
1513 Op.getOperand(2));
1514 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1515 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1516 Op.getOperand(0), CondV, Op.getOperand(2));
1517 }
1518 }
1519
1520 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1521 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1522 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1523}
1524
1525SDValue
1526LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1527 SelectionDAG &DAG) const {
1528 SDLoc DL(Op);
1529 MVT OpVT = Op.getSimpleValueType();
1530
1531 SDValue Vector = DAG.getUNDEF(OpVT);
1532 SDValue Val = Op.getOperand(0);
1533 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1534
1535 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1536}
1537
1538SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1539 SelectionDAG &DAG) const {
1540 EVT ResTy = Op->getValueType(0);
1541 SDValue Src = Op->getOperand(0);
1542 SDLoc DL(Op);
1543
1544 // LoongArchISD::BITREV_8B is not supported on LA32.
1545 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1546 return SDValue();
1547
1548 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1549 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1550 unsigned int NewEltNum = NewVT.getVectorNumElements();
1551
1552 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1553
1555 for (unsigned int i = 0; i < NewEltNum; i++) {
1556 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1557 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1558 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1559 ? (unsigned)LoongArchISD::BITREV_8B
1560 : (unsigned)ISD::BITREVERSE;
1561 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1562 }
1563 SDValue Res =
1564 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1565
1566 switch (ResTy.getSimpleVT().SimpleTy) {
1567 default:
1568 return SDValue();
1569 case MVT::v16i8:
1570 case MVT::v32i8:
1571 return Res;
1572 case MVT::v8i16:
1573 case MVT::v16i16:
1574 case MVT::v4i32:
1575 case MVT::v8i32: {
1577 for (unsigned int i = 0; i < NewEltNum; i++)
1578 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1579 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1580 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1581 }
1582 }
1583}
1584
1585// Widen element type to get a new mask value (if possible).
1586// For example:
1587// shufflevector <4 x i32> %a, <4 x i32> %b,
1588// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1589// is equivalent to:
1590// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1591// can be lowered to:
1592// VPACKOD_D vr0, vr0, vr1
1594 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1595 unsigned EltBits = VT.getScalarSizeInBits();
1596
1597 if (EltBits > 32 || EltBits == 1)
1598 return SDValue();
1599
1600 SmallVector<int, 8> NewMask;
1601 if (widenShuffleMaskElts(Mask, NewMask)) {
1602 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1603 : MVT::getIntegerVT(EltBits * 2);
1604 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1605 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1606 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1607 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1608 return DAG.getBitcast(
1609 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1610 }
1611 }
1612
1613 return SDValue();
1614}
1615
1616/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1617/// instruction.
1618// The funciton matches elements from one of the input vector shuffled to the
1619// left or right with zeroable elements 'shifted in'. It handles both the
1620// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1621// lane.
1622// Mostly copied from X86.
1623static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1624 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1625 int MaskOffset, const APInt &Zeroable) {
1626 int Size = Mask.size();
1627 unsigned SizeInBits = Size * ScalarSizeInBits;
1628
1629 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1630 for (int i = 0; i < Size; i += Scale)
1631 for (int j = 0; j < Shift; ++j)
1632 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1633 return false;
1634
1635 return true;
1636 };
1637
1638 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1639 int Step = 1) {
1640 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1641 if (!(Mask[i] == -1 || Mask[i] == Low))
1642 return false;
1643 return true;
1644 };
1645
1646 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1647 for (int i = 0; i != Size; i += Scale) {
1648 unsigned Pos = Left ? i + Shift : i;
1649 unsigned Low = Left ? i : i + Shift;
1650 unsigned Len = Scale - Shift;
1651 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1652 return -1;
1653 }
1654
1655 int ShiftEltBits = ScalarSizeInBits * Scale;
1656 bool ByteShift = ShiftEltBits > 64;
1657 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1658 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1659 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1660
1661 // Normalize the scale for byte shifts to still produce an i64 element
1662 // type.
1663 Scale = ByteShift ? Scale / 2 : Scale;
1664
1665 // We need to round trip through the appropriate type for the shift.
1666 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1667 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1668 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1669 return (int)ShiftAmt;
1670 };
1671
1672 unsigned MaxWidth = 128;
1673 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1674 for (int Shift = 1; Shift != Scale; ++Shift)
1675 for (bool Left : {true, false})
1676 if (CheckZeros(Shift, Scale, Left)) {
1677 int ShiftAmt = MatchShift(Shift, Scale, Left);
1678 if (0 < ShiftAmt)
1679 return ShiftAmt;
1680 }
1681
1682 // no match
1683 return -1;
1684}
1685
1686/// Lower VECTOR_SHUFFLE as shift (if possible).
1687///
1688/// For example:
1689/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1690/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1691/// is lowered to:
1692/// (VBSLL_V $v0, $v0, 4)
1693///
1694/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1695/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1696/// is lowered to:
1697/// (VSLLI_D $v0, $v0, 32)
1699 MVT VT, SDValue V1, SDValue V2,
1700 SelectionDAG &DAG,
1701 const LoongArchSubtarget &Subtarget,
1702 const APInt &Zeroable) {
1703 int Size = Mask.size();
1704 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1705
1706 MVT ShiftVT;
1707 SDValue V = V1;
1708 unsigned Opcode;
1709
1710 // Try to match shuffle against V1 shift.
1711 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1712 Mask, 0, Zeroable);
1713
1714 // If V1 failed, try to match shuffle against V2 shift.
1715 if (ShiftAmt < 0) {
1716 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1717 Mask, Size, Zeroable);
1718 V = V2;
1719 }
1720
1721 if (ShiftAmt < 0)
1722 return SDValue();
1723
1724 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1725 "Illegal integer vector type");
1726 V = DAG.getBitcast(ShiftVT, V);
1727 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1728 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1729 return DAG.getBitcast(VT, V);
1730}
1731
1732/// Determine whether a range fits a regular pattern of values.
1733/// This function accounts for the possibility of jumping over the End iterator.
1734template <typename ValType>
1735static bool
1737 unsigned CheckStride,
1739 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1740 auto &I = Begin;
1741
1742 while (I != End) {
1743 if (*I != -1 && *I != ExpectedIndex)
1744 return false;
1745 ExpectedIndex += ExpectedIndexStride;
1746
1747 // Incrementing past End is undefined behaviour so we must increment one
1748 // step at a time and check for End at each step.
1749 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1750 ; // Empty loop body.
1751 }
1752 return true;
1753}
1754
1755/// Compute whether each element of a shuffle is zeroable.
1756///
1757/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1759 SDValue V2, APInt &KnownUndef,
1760 APInt &KnownZero) {
1761 int Size = Mask.size();
1762 KnownUndef = KnownZero = APInt::getZero(Size);
1763
1765 V2 = peekThroughBitcasts(V2);
1766
1767 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1768 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1769
1770 int VectorSizeInBits = V1.getValueSizeInBits();
1771 int ScalarSizeInBits = VectorSizeInBits / Size;
1772 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1773 (void)ScalarSizeInBits;
1774
1775 for (int i = 0; i < Size; ++i) {
1776 int M = Mask[i];
1777 if (M < 0) {
1778 KnownUndef.setBit(i);
1779 continue;
1780 }
1781 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1782 KnownZero.setBit(i);
1783 continue;
1784 }
1785 }
1786}
1787
1788/// Test whether a shuffle mask is equivalent within each sub-lane.
1789///
1790/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1791/// non-trivial to compute in the face of undef lanes. The representation is
1792/// suitable for use with existing 128-bit shuffles as entries from the second
1793/// vector have been remapped to [LaneSize, 2*LaneSize).
1794static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1795 ArrayRef<int> Mask,
1796 SmallVectorImpl<int> &RepeatedMask) {
1797 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1798 RepeatedMask.assign(LaneSize, -1);
1799 int Size = Mask.size();
1800 for (int i = 0; i < Size; ++i) {
1801 assert(Mask[i] == -1 || Mask[i] >= 0);
1802 if (Mask[i] < 0)
1803 continue;
1804 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1805 // This entry crosses lanes, so there is no way to model this shuffle.
1806 return false;
1807
1808 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1809 // Adjust second vector indices to start at LaneSize instead of Size.
1810 int LocalM =
1811 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1812 if (RepeatedMask[i % LaneSize] < 0)
1813 // This is the first non-undef entry in this slot of a 128-bit lane.
1814 RepeatedMask[i % LaneSize] = LocalM;
1815 else if (RepeatedMask[i % LaneSize] != LocalM)
1816 // Found a mismatch with the repeated mask.
1817 return false;
1818 }
1819 return true;
1820}
1821
1822/// Attempts to match vector shuffle as byte rotation.
1824 ArrayRef<int> Mask) {
1825
1826 SDValue Lo, Hi;
1827 SmallVector<int, 16> RepeatedMask;
1828
1829 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1830 return -1;
1831
1832 int NumElts = RepeatedMask.size();
1833 int Rotation = 0;
1834 int Scale = 16 / NumElts;
1835
1836 for (int i = 0; i < NumElts; ++i) {
1837 int M = RepeatedMask[i];
1838 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1839 "Unexpected mask index.");
1840 if (M < 0)
1841 continue;
1842
1843 // Determine where a rotated vector would have started.
1844 int StartIdx = i - (M % NumElts);
1845 if (StartIdx == 0)
1846 return -1;
1847
1848 // If we found the tail of a vector the rotation must be the missing
1849 // front. If we found the head of a vector, it must be how much of the
1850 // head.
1851 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1852
1853 if (Rotation == 0)
1854 Rotation = CandidateRotation;
1855 else if (Rotation != CandidateRotation)
1856 return -1;
1857
1858 // Compute which value this mask is pointing at.
1859 SDValue MaskV = M < NumElts ? V1 : V2;
1860
1861 // Compute which of the two target values this index should be assigned
1862 // to. This reflects whether the high elements are remaining or the low
1863 // elements are remaining.
1864 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1865
1866 // Either set up this value if we've not encountered it before, or check
1867 // that it remains consistent.
1868 if (!TargetV)
1869 TargetV = MaskV;
1870 else if (TargetV != MaskV)
1871 return -1;
1872 }
1873
1874 // Check that we successfully analyzed the mask, and normalize the results.
1875 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1876 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1877 if (!Lo)
1878 Lo = Hi;
1879 else if (!Hi)
1880 Hi = Lo;
1881
1882 V1 = Lo;
1883 V2 = Hi;
1884
1885 return Rotation * Scale;
1886}
1887
1888/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1889///
1890/// For example:
1891/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1892/// <2 x i32> <i32 3, i32 0>
1893/// is lowered to:
1894/// (VBSRL_V $v1, $v1, 8)
1895/// (VBSLL_V $v0, $v0, 8)
1896/// (VOR_V $v0, $V0, $v1)
1897static SDValue
1899 SDValue V1, SDValue V2, SelectionDAG &DAG,
1900 const LoongArchSubtarget &Subtarget) {
1901
1902 SDValue Lo = V1, Hi = V2;
1903 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1904 if (ByteRotation <= 0)
1905 return SDValue();
1906
1907 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1908 Lo = DAG.getBitcast(ByteVT, Lo);
1909 Hi = DAG.getBitcast(ByteVT, Hi);
1910
1911 int LoByteShift = 16 - ByteRotation;
1912 int HiByteShift = ByteRotation;
1913 MVT GRLenVT = Subtarget.getGRLenVT();
1914
1915 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1916 DAG.getConstant(LoByteShift, DL, GRLenVT));
1917 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1918 DAG.getConstant(HiByteShift, DL, GRLenVT));
1919 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1920}
1921
1922/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1923///
1924/// For example:
1925/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1926/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1927/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1928/// is lowered to:
1929/// (VREPLI $v1, 0)
1930/// (VILVL $v0, $v1, $v0)
1932 ArrayRef<int> Mask, MVT VT,
1933 SDValue V1, SDValue V2,
1934 SelectionDAG &DAG,
1935 const APInt &Zeroable) {
1936 int Bits = VT.getSizeInBits();
1937 int EltBits = VT.getScalarSizeInBits();
1938 int NumElements = VT.getVectorNumElements();
1939
1940 if (Zeroable.isAllOnes())
1941 return DAG.getConstant(0, DL, VT);
1942
1943 // Define a helper function to check a particular ext-scale and lower to it if
1944 // valid.
1945 auto Lower = [&](int Scale) -> SDValue {
1946 SDValue InputV;
1947 bool AnyExt = true;
1948 int Offset = 0;
1949 for (int i = 0; i < NumElements; i++) {
1950 int M = Mask[i];
1951 if (M < 0)
1952 continue;
1953 if (i % Scale != 0) {
1954 // Each of the extended elements need to be zeroable.
1955 if (!Zeroable[i])
1956 return SDValue();
1957
1958 AnyExt = false;
1959 continue;
1960 }
1961
1962 // Each of the base elements needs to be consecutive indices into the
1963 // same input vector.
1964 SDValue V = M < NumElements ? V1 : V2;
1965 M = M % NumElements;
1966 if (!InputV) {
1967 InputV = V;
1968 Offset = M - (i / Scale);
1969
1970 // These offset can't be handled
1971 if (Offset % (NumElements / Scale))
1972 return SDValue();
1973 } else if (InputV != V)
1974 return SDValue();
1975
1976 if (M != (Offset + (i / Scale)))
1977 return SDValue(); // Non-consecutive strided elements.
1978 }
1979
1980 // If we fail to find an input, we have a zero-shuffle which should always
1981 // have already been handled.
1982 if (!InputV)
1983 return SDValue();
1984
1985 do {
1986 unsigned VilVLoHi = LoongArchISD::VILVL;
1987 if (Offset >= (NumElements / 2)) {
1988 VilVLoHi = LoongArchISD::VILVH;
1989 Offset -= (NumElements / 2);
1990 }
1991
1992 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1993 SDValue Ext =
1994 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1995 InputV = DAG.getBitcast(InputVT, InputV);
1996 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1997 Scale /= 2;
1998 EltBits *= 2;
1999 NumElements /= 2;
2000 } while (Scale > 1);
2001 return DAG.getBitcast(VT, InputV);
2002 };
2003
2004 // Each iteration, try extending the elements half as much, but into twice as
2005 // many elements.
2006 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
2007 NumExtElements *= 2) {
2008 if (SDValue V = Lower(NumElements / NumExtElements))
2009 return V;
2010 }
2011 return SDValue();
2012}
2013
2014/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
2015///
2016/// VREPLVEI performs vector broadcast based on an element specified by an
2017/// integer immediate, with its mask being similar to:
2018/// <x, x, x, ...>
2019/// where x is any valid index.
2020///
2021/// When undef's appear in the mask they are treated as if they were whatever
2022/// value is necessary in order to fit the above form.
2023static SDValue
2025 SDValue V1, SelectionDAG &DAG,
2026 const LoongArchSubtarget &Subtarget) {
2027 int SplatIndex = -1;
2028 for (const auto &M : Mask) {
2029 if (M != -1) {
2030 SplatIndex = M;
2031 break;
2032 }
2033 }
2034
2035 if (SplatIndex == -1)
2036 return DAG.getUNDEF(VT);
2037
2038 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2039 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
2040 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2041 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2042 }
2043
2044 return SDValue();
2045}
2046
2047/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
2048///
2049/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
2050/// elements according to a <4 x i2> constant (encoded as an integer immediate).
2051///
2052/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
2053/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2054/// When undef's appear they are treated as if they were whatever value is
2055/// necessary in order to fit the above forms.
2056///
2057/// For example:
2058/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2059/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2060/// i32 7, i32 6, i32 5, i32 4>
2061/// is lowered to:
2062/// (VSHUF4I_H $v0, $v1, 27)
2063/// where the 27 comes from:
2064/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2065static SDValue
2067 SDValue V1, SDValue V2, SelectionDAG &DAG,
2068 const LoongArchSubtarget &Subtarget) {
2069
2070 unsigned SubVecSize = 4;
2071 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2072 SubVecSize = 2;
2073
2074 int SubMask[4] = {-1, -1, -1, -1};
2075 for (unsigned i = 0; i < SubVecSize; ++i) {
2076 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
2077 int M = Mask[j];
2078
2079 // Convert from vector index to 4-element subvector index
2080 // If an index refers to an element outside of the subvector then give up
2081 if (M != -1) {
2082 M -= 4 * (j / SubVecSize);
2083 if (M < 0 || M >= 4)
2084 return SDValue();
2085 }
2086
2087 // If the mask has an undef, replace it with the current index.
2088 // Note that it might still be undef if the current index is also undef
2089 if (SubMask[i] == -1)
2090 SubMask[i] = M;
2091 // Check that non-undef values are the same as in the mask. If they
2092 // aren't then give up
2093 else if (M != -1 && M != SubMask[i])
2094 return SDValue();
2095 }
2096 }
2097
2098 // Calculate the immediate. Replace any remaining undefs with zero
2099 int Imm = 0;
2100 for (int i = SubVecSize - 1; i >= 0; --i) {
2101 int M = SubMask[i];
2102
2103 if (M == -1)
2104 M = 0;
2105
2106 Imm <<= 2;
2107 Imm |= M & 0x3;
2108 }
2109
2110 MVT GRLenVT = Subtarget.getGRLenVT();
2111
2112 // Return vshuf4i.d
2113 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2114 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2115 DAG.getConstant(Imm, DL, GRLenVT));
2116
2117 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
2118 DAG.getConstant(Imm, DL, GRLenVT));
2119}
2120
2121/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
2122///
2123/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
2124/// reverse whose mask likes:
2125/// <7, 6, 5, 4, 3, 2, 1, 0>
2126///
2127/// When undef's appear in the mask they are treated as if they were whatever
2128/// value is necessary in order to fit the above forms.
2129static SDValue
2131 SDValue V1, SelectionDAG &DAG,
2132 const LoongArchSubtarget &Subtarget) {
2133 // Only vectors with i8/i16 elements which cannot match other patterns
2134 // directly needs to do this.
2135 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
2136 VT != MVT::v16i16)
2137 return SDValue();
2138
2139 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
2140 return SDValue();
2141
2142 int WidenNumElts = VT.getVectorNumElements() / 4;
2143 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
2144 for (int i = 0; i < WidenNumElts; ++i)
2145 WidenMask[i] = WidenNumElts - 1 - i;
2146
2147 MVT WidenVT = MVT::getVectorVT(
2148 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
2149 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
2150 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
2151 DAG.getUNDEF(WidenVT), WidenMask);
2152
2153 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
2154 DAG.getBitcast(VT, WidenRev),
2155 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
2156}
2157
2158/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
2159///
2160/// VPACKEV interleaves the even elements from each vector.
2161///
2162/// It is possible to lower into VPACKEV when the mask consists of two of the
2163/// following forms interleaved:
2164/// <0, 2, 4, ...>
2165/// <n, n+2, n+4, ...>
2166/// where n is the number of elements in the vector.
2167/// For example:
2168/// <0, 0, 2, 2, 4, 4, ...>
2169/// <0, n, 2, n+2, 4, n+4, ...>
2170///
2171/// When undef's appear in the mask they are treated as if they were whatever
2172/// value is necessary in order to fit the above forms.
2174 MVT VT, SDValue V1, SDValue V2,
2175 SelectionDAG &DAG) {
2176
2177 const auto &Begin = Mask.begin();
2178 const auto &End = Mask.end();
2179 SDValue OriV1 = V1, OriV2 = V2;
2180
2181 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2182 V1 = OriV1;
2183 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
2184 V1 = OriV2;
2185 else
2186 return SDValue();
2187
2188 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2189 V2 = OriV1;
2190 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
2191 V2 = OriV2;
2192 else
2193 return SDValue();
2194
2195 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
2196}
2197
2198/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
2199///
2200/// VPACKOD interleaves the odd elements from each vector.
2201///
2202/// It is possible to lower into VPACKOD when the mask consists of two of the
2203/// following forms interleaved:
2204/// <1, 3, 5, ...>
2205/// <n+1, n+3, n+5, ...>
2206/// where n is the number of elements in the vector.
2207/// For example:
2208/// <1, 1, 3, 3, 5, 5, ...>
2209/// <1, n+1, 3, n+3, 5, n+5, ...>
2210///
2211/// When undef's appear in the mask they are treated as if they were whatever
2212/// value is necessary in order to fit the above forms.
2214 MVT VT, SDValue V1, SDValue V2,
2215 SelectionDAG &DAG) {
2216
2217 const auto &Begin = Mask.begin();
2218 const auto &End = Mask.end();
2219 SDValue OriV1 = V1, OriV2 = V2;
2220
2221 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2222 V1 = OriV1;
2223 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
2224 V1 = OriV2;
2225 else
2226 return SDValue();
2227
2228 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2229 V2 = OriV1;
2230 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
2231 V2 = OriV2;
2232 else
2233 return SDValue();
2234
2235 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
2236}
2237
2238/// Lower VECTOR_SHUFFLE into VILVH (if possible).
2239///
2240/// VILVH interleaves consecutive elements from the left (highest-indexed) half
2241/// of each vector.
2242///
2243/// It is possible to lower into VILVH when the mask consists of two of the
2244/// following forms interleaved:
2245/// <x, x+1, x+2, ...>
2246/// <n+x, n+x+1, n+x+2, ...>
2247/// where n is the number of elements in the vector and x is half n.
2248/// For example:
2249/// <x, x, x+1, x+1, x+2, x+2, ...>
2250/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2251///
2252/// When undef's appear in the mask they are treated as if they were whatever
2253/// value is necessary in order to fit the above forms.
2255 MVT VT, SDValue V1, SDValue V2,
2256 SelectionDAG &DAG) {
2257
2258 const auto &Begin = Mask.begin();
2259 const auto &End = Mask.end();
2260 unsigned HalfSize = Mask.size() / 2;
2261 SDValue OriV1 = V1, OriV2 = V2;
2262
2263 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2264 V1 = OriV1;
2265 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2266 V1 = OriV2;
2267 else
2268 return SDValue();
2269
2270 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2271 V2 = OriV1;
2272 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2273 1))
2274 V2 = OriV2;
2275 else
2276 return SDValue();
2277
2278 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2279}
2280
2281/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2282///
2283/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2284/// of each vector.
2285///
2286/// It is possible to lower into VILVL when the mask consists of two of the
2287/// following forms interleaved:
2288/// <0, 1, 2, ...>
2289/// <n, n+1, n+2, ...>
2290/// where n is the number of elements in the vector.
2291/// For example:
2292/// <0, 0, 1, 1, 2, 2, ...>
2293/// <0, n, 1, n+1, 2, n+2, ...>
2294///
2295/// When undef's appear in the mask they are treated as if they were whatever
2296/// value is necessary in order to fit the above forms.
2298 MVT VT, SDValue V1, SDValue V2,
2299 SelectionDAG &DAG) {
2300
2301 const auto &Begin = Mask.begin();
2302 const auto &End = Mask.end();
2303 SDValue OriV1 = V1, OriV2 = V2;
2304
2305 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2306 V1 = OriV1;
2307 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2308 V1 = OriV2;
2309 else
2310 return SDValue();
2311
2312 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2313 V2 = OriV1;
2314 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2315 V2 = OriV2;
2316 else
2317 return SDValue();
2318
2319 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2320}
2321
2322/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2323///
2324/// VPICKEV copies the even elements of each vector into the result vector.
2325///
2326/// It is possible to lower into VPICKEV when the mask consists of two of the
2327/// following forms concatenated:
2328/// <0, 2, 4, ...>
2329/// <n, n+2, n+4, ...>
2330/// where n is the number of elements in the vector.
2331/// For example:
2332/// <0, 2, 4, ..., 0, 2, 4, ...>
2333/// <0, 2, 4, ..., n, n+2, n+4, ...>
2334///
2335/// When undef's appear in the mask they are treated as if they were whatever
2336/// value is necessary in order to fit the above forms.
2338 MVT VT, SDValue V1, SDValue V2,
2339 SelectionDAG &DAG) {
2340
2341 const auto &Begin = Mask.begin();
2342 const auto &Mid = Mask.begin() + Mask.size() / 2;
2343 const auto &End = Mask.end();
2344 SDValue OriV1 = V1, OriV2 = V2;
2345
2346 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2347 V1 = OriV1;
2348 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2349 V1 = OriV2;
2350 else
2351 return SDValue();
2352
2353 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2354 V2 = OriV1;
2355 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2356 V2 = OriV2;
2357
2358 else
2359 return SDValue();
2360
2361 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2362}
2363
2364/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2365///
2366/// VPICKOD copies the odd elements of each vector into the result vector.
2367///
2368/// It is possible to lower into VPICKOD when the mask consists of two of the
2369/// following forms concatenated:
2370/// <1, 3, 5, ...>
2371/// <n+1, n+3, n+5, ...>
2372/// where n is the number of elements in the vector.
2373/// For example:
2374/// <1, 3, 5, ..., 1, 3, 5, ...>
2375/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2376///
2377/// When undef's appear in the mask they are treated as if they were whatever
2378/// value is necessary in order to fit the above forms.
2380 MVT VT, SDValue V1, SDValue V2,
2381 SelectionDAG &DAG) {
2382
2383 const auto &Begin = Mask.begin();
2384 const auto &Mid = Mask.begin() + Mask.size() / 2;
2385 const auto &End = Mask.end();
2386 SDValue OriV1 = V1, OriV2 = V2;
2387
2388 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2389 V1 = OriV1;
2390 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2391 V1 = OriV2;
2392 else
2393 return SDValue();
2394
2395 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2396 V2 = OriV1;
2397 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2398 V2 = OriV2;
2399 else
2400 return SDValue();
2401
2402 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2403}
2404
2405/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2406///
2407/// VEXTRINS copies one element of a vector into any place of the result
2408/// vector and makes no change to the rest elements of the result vector.
2409///
2410/// It is possible to lower into VEXTRINS when the mask takes the form:
2411/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2412/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2413/// where n is the number of elements in the vector and i is in [0, n).
2414/// For example:
2415/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2416/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2417///
2418/// When undef's appear in the mask they are treated as if they were whatever
2419/// value is necessary in order to fit the above forms.
2420static SDValue
2422 SDValue V1, SDValue V2, SelectionDAG &DAG,
2423 const LoongArchSubtarget &Subtarget) {
2424 unsigned NumElts = VT.getVectorNumElements();
2425 MVT EltVT = VT.getVectorElementType();
2426 MVT GRLenVT = Subtarget.getGRLenVT();
2427
2428 if (Mask.size() != NumElts)
2429 return SDValue();
2430
2431 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2432 int DiffCount = 0;
2433 int DiffPos = -1;
2434 for (unsigned i = 0; i < NumElts; ++i) {
2435 if (Mask[i] == -1)
2436 continue;
2437 if (Mask[i] != int(Base + i)) {
2438 ++DiffCount;
2439 DiffPos = int(i);
2440 if (DiffCount > 1)
2441 return SDValue();
2442 }
2443 }
2444
2445 // Need exactly one differing element to lower into VEXTRINS.
2446 if (DiffCount != 1)
2447 return SDValue();
2448
2449 // DiffMask must be in [0, 2N).
2450 int DiffMask = Mask[DiffPos];
2451 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2452 return SDValue();
2453
2454 // Determine source vector and source index.
2455 SDValue SrcVec;
2456 unsigned SrcIdx;
2457 if (unsigned(DiffMask) < NumElts) {
2458 SrcVec = V1;
2459 SrcIdx = unsigned(DiffMask);
2460 } else {
2461 SrcVec = V2;
2462 SrcIdx = unsigned(DiffMask) - NumElts;
2463 }
2464
2465 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2466 // patterns of VEXTRINS in tablegen.
2467 SDValue Extracted = DAG.getNode(
2468 ISD::EXTRACT_VECTOR_ELT, DL, EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2469 SrcVec, DAG.getConstant(SrcIdx, DL, GRLenVT));
2470 SDValue Result =
2471 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, (Base == 0) ? V1 : V2,
2472 Extracted, DAG.getConstant(DiffPos, DL, GRLenVT));
2473
2474 return Result;
2475 };
2476
2477 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2478 if (SDValue Result = tryLowerToExtrAndIns(0))
2479 return Result;
2480 return tryLowerToExtrAndIns(NumElts);
2481}
2482
2483// Check the Mask and then build SrcVec and MaskImm infos which will
2484// be used to build LoongArchISD nodes for VPERMI_W or XVPERMI_W.
2485// On success, return true. Otherwise, return false.
2488 unsigned &MaskImm) {
2489 unsigned MaskSize = Mask.size();
2490
2491 auto isValid = [&](int M, int Off) {
2492 return (M == -1) || (M >= Off && M < Off + 4);
2493 };
2494
2495 auto buildImm = [&](int MLo, int MHi, unsigned Off, unsigned I) {
2496 auto immPart = [&](int M, unsigned Off) {
2497 return (M == -1 ? 0 : (M - Off)) & 0x3;
2498 };
2499 MaskImm |= immPart(MLo, Off) << (I * 2);
2500 MaskImm |= immPart(MHi, Off) << ((I + 1) * 2);
2501 };
2502
2503 for (unsigned i = 0; i < 4; i += 2) {
2504 int MLo = Mask[i];
2505 int MHi = Mask[i + 1];
2506
2507 if (MaskSize == 8) { // Only v8i32/v8f32 need this check.
2508 int M2Lo = Mask[i + 4];
2509 int M2Hi = Mask[i + 5];
2510 if (M2Lo != MLo + 4 || M2Hi != MHi + 4)
2511 return false;
2512 }
2513
2514 if (isValid(MLo, 0) && isValid(MHi, 0)) {
2515 SrcVec.push_back(V1);
2516 buildImm(MLo, MHi, 0, i);
2517 } else if (isValid(MLo, MaskSize) && isValid(MHi, MaskSize)) {
2518 SrcVec.push_back(V2);
2519 buildImm(MLo, MHi, MaskSize, i);
2520 } else {
2521 return false;
2522 }
2523 }
2524
2525 return true;
2526}
2527
2528/// Lower VECTOR_SHUFFLE into VPERMI (if possible).
2529///
2530/// VPERMI selects two elements from each of the two vectors based on the
2531/// mask and places them in the corresponding positions of the result vector
2532/// in order. Only v4i32 and v4f32 types are allowed.
2533///
2534/// It is possible to lower into VPERMI when the mask consists of two of the
2535/// following forms concatenated:
2536/// <i, j, u, v>
2537/// <u, v, i, j>
2538/// where i,j are in [0,4) and u,v are in [4, 8).
2539/// For example:
2540/// <2, 3, 4, 5>
2541/// <5, 7, 0, 2>
2542///
2543/// When undef's appear in the mask they are treated as if they were whatever
2544/// value is necessary in order to fit the above forms.
2546 MVT VT, SDValue V1, SDValue V2,
2547 SelectionDAG &DAG,
2548 const LoongArchSubtarget &Subtarget) {
2549 if ((VT != MVT::v4i32 && VT != MVT::v4f32) ||
2550 Mask.size() != VT.getVectorNumElements())
2551 return SDValue();
2552
2554 unsigned MaskImm = 0;
2555 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2556 return SDValue();
2557
2558 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2559 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2560}
2561
2562/// Lower VECTOR_SHUFFLE into VSHUF.
2563///
2564/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2565/// adding it as an operand to the resulting VSHUF.
2567 MVT VT, SDValue V1, SDValue V2,
2568 SelectionDAG &DAG,
2569 const LoongArchSubtarget &Subtarget) {
2570
2572 for (auto M : Mask)
2573 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2574
2575 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2576 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2577
2578 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2579 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2580 // VSHF concatenates the vectors in a bitwise fashion:
2581 // <0b00, 0b01> + <0b10, 0b11> ->
2582 // 0b0100 + 0b1110 -> 0b01001110
2583 // <0b10, 0b11, 0b00, 0b01>
2584 // We must therefore swap the operands to get the correct result.
2585 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2586}
2587
2588/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2589///
2590/// This routine breaks down the specific type of 128-bit shuffle and
2591/// dispatches to the lowering routines accordingly.
2593 SDValue V1, SDValue V2, SelectionDAG &DAG,
2594 const LoongArchSubtarget &Subtarget) {
2595 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2596 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2597 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2598 "Vector type is unsupported for lsx!");
2599 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2600 "Two operands have different types!");
2601 assert(VT.getVectorNumElements() == Mask.size() &&
2602 "Unexpected mask size for shuffle!");
2603 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2604
2605 APInt KnownUndef, KnownZero;
2606 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2607 APInt Zeroable = KnownUndef | KnownZero;
2608
2609 SDValue Result;
2610 // TODO: Add more comparison patterns.
2611 if (V2.isUndef()) {
2612 if ((Result =
2613 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2614 return Result;
2615 if ((Result =
2616 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2617 return Result;
2618 if ((Result =
2619 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2620 return Result;
2621
2622 // TODO: This comment may be enabled in the future to better match the
2623 // pattern for instruction selection.
2624 /* V2 = V1; */
2625 }
2626
2627 // It is recommended not to change the pattern comparison order for better
2628 // performance.
2629 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2630 return Result;
2631 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2632 return Result;
2633 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2634 return Result;
2635 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2636 return Result;
2637 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2638 return Result;
2639 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2640 return Result;
2641 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2642 (Result =
2643 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2644 return Result;
2645 if ((Result =
2646 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2647 return Result;
2648 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2649 Zeroable)))
2650 return Result;
2651 if ((Result =
2652 lowerVECTOR_SHUFFLE_VPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2653 return Result;
2654 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2655 Zeroable)))
2656 return Result;
2657 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2658 Subtarget)))
2659 return Result;
2660 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2661 return NewShuffle;
2662 if ((Result =
2663 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2664 return Result;
2665 return SDValue();
2666}
2667
2668/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2669///
2670/// It is a XVREPLVEI when the mask is:
2671/// <x, x, x, ..., x+n, x+n, x+n, ...>
2672/// where the number of x is equal to n and n is half the length of vector.
2673///
2674/// When undef's appear in the mask they are treated as if they were whatever
2675/// value is necessary in order to fit the above form.
2676static SDValue
2678 SDValue V1, SelectionDAG &DAG,
2679 const LoongArchSubtarget &Subtarget) {
2680 int SplatIndex = -1;
2681 for (const auto &M : Mask) {
2682 if (M != -1) {
2683 SplatIndex = M;
2684 break;
2685 }
2686 }
2687
2688 if (SplatIndex == -1)
2689 return DAG.getUNDEF(VT);
2690
2691 const auto &Begin = Mask.begin();
2692 const auto &End = Mask.end();
2693 int HalfSize = Mask.size() / 2;
2694
2695 if (SplatIndex >= HalfSize)
2696 return SDValue();
2697
2698 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2699 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2700 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2701 0)) {
2702 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2703 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2704 }
2705
2706 return SDValue();
2707}
2708
2709/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2710static SDValue
2712 SDValue V1, SDValue V2, SelectionDAG &DAG,
2713 const LoongArchSubtarget &Subtarget) {
2714 // XVSHUF4I_D must be handled separately because it is different from other
2715 // types of [X]VSHUF4I instructions.
2716 if (Mask.size() == 4) {
2717 unsigned MaskImm = 0;
2718 for (int i = 1; i >= 0; --i) {
2719 int MLo = Mask[i];
2720 int MHi = Mask[i + 2];
2721 if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) ||
2722 !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7)))
2723 return SDValue();
2724 if (MHi != -1 && MLo != -1 && MHi != MLo + 2)
2725 return SDValue();
2726
2727 MaskImm <<= 2;
2728 if (MLo != -1)
2729 MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3;
2730 else if (MHi != -1)
2731 MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3;
2732 }
2733
2734 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2735 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2736 }
2737
2738 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2739}
2740
2741/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2742static SDValue
2744 SDValue V1, SDValue V2, SelectionDAG &DAG,
2745 const LoongArchSubtarget &Subtarget) {
2746 MVT GRLenVT = Subtarget.getGRLenVT();
2747 unsigned MaskSize = Mask.size();
2748 if (MaskSize != VT.getVectorNumElements())
2749 return SDValue();
2750
2751 // Consider XVPERMI_W.
2752 if (VT == MVT::v8i32 || VT == MVT::v8f32) {
2754 unsigned MaskImm = 0;
2755 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2756 return SDValue();
2757
2758 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2759 DAG.getConstant(MaskImm, DL, GRLenVT));
2760 }
2761
2762 // Consider XVPERMI_D.
2763 if (VT == MVT::v4i64 || VT == MVT::v4f64) {
2764 unsigned MaskImm = 0;
2765 for (unsigned i = 0; i < MaskSize; ++i) {
2766 if (Mask[i] == -1)
2767 continue;
2768 if (Mask[i] >= (int)MaskSize)
2769 return SDValue();
2770 MaskImm |= Mask[i] << (i * 2);
2771 }
2772
2773 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2774 DAG.getConstant(MaskImm, DL, GRLenVT));
2775 }
2776
2777 return SDValue();
2778}
2779
2780/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2782 MVT VT, SDValue V1, SelectionDAG &DAG,
2783 const LoongArchSubtarget &Subtarget) {
2784 // LoongArch LASX only have XVPERM_W.
2785 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2786 return SDValue();
2787
2788 unsigned NumElts = VT.getVectorNumElements();
2789 unsigned HalfSize = NumElts / 2;
2790 bool FrontLo = true, FrontHi = true;
2791 bool BackLo = true, BackHi = true;
2792
2793 auto inRange = [](int val, int low, int high) {
2794 return (val == -1) || (val >= low && val < high);
2795 };
2796
2797 for (unsigned i = 0; i < HalfSize; ++i) {
2798 int Fronti = Mask[i];
2799 int Backi = Mask[i + HalfSize];
2800
2801 FrontLo &= inRange(Fronti, 0, HalfSize);
2802 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2803 BackLo &= inRange(Backi, 0, HalfSize);
2804 BackHi &= inRange(Backi, HalfSize, NumElts);
2805 }
2806
2807 // If both the lower and upper 128-bit parts access only one half of the
2808 // vector (either lower or upper), avoid using xvperm.w. The latency of
2809 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2810 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2811 return SDValue();
2812
2814 MVT GRLenVT = Subtarget.getGRLenVT();
2815 for (unsigned i = 0; i < NumElts; ++i)
2816 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2817 : DAG.getConstant(Mask[i], DL, GRLenVT));
2818 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2819
2820 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2821}
2822
2823/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2825 MVT VT, SDValue V1, SDValue V2,
2826 SelectionDAG &DAG) {
2827 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2828}
2829
2830/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2832 MVT VT, SDValue V1, SDValue V2,
2833 SelectionDAG &DAG) {
2834 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2835}
2836
2837/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2839 MVT VT, SDValue V1, SDValue V2,
2840 SelectionDAG &DAG) {
2841
2842 const auto &Begin = Mask.begin();
2843 const auto &End = Mask.end();
2844 unsigned HalfSize = Mask.size() / 2;
2845 unsigned LeftSize = HalfSize / 2;
2846 SDValue OriV1 = V1, OriV2 = V2;
2847
2848 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2849 1) &&
2850 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2851 V1 = OriV1;
2852 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2853 Mask.size() + HalfSize - LeftSize, 1) &&
2854 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2855 Mask.size() + HalfSize + LeftSize, 1))
2856 V1 = OriV2;
2857 else
2858 return SDValue();
2859
2860 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2861 1) &&
2862 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2863 1))
2864 V2 = OriV1;
2865 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2866 Mask.size() + HalfSize - LeftSize, 1) &&
2867 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2868 Mask.size() + HalfSize + LeftSize, 1))
2869 V2 = OriV2;
2870 else
2871 return SDValue();
2872
2873 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2874}
2875
2876/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2878 MVT VT, SDValue V1, SDValue V2,
2879 SelectionDAG &DAG) {
2880
2881 const auto &Begin = Mask.begin();
2882 const auto &End = Mask.end();
2883 unsigned HalfSize = Mask.size() / 2;
2884 SDValue OriV1 = V1, OriV2 = V2;
2885
2886 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2887 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2888 V1 = OriV1;
2889 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2890 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2891 Mask.size() + HalfSize, 1))
2892 V1 = OriV2;
2893 else
2894 return SDValue();
2895
2896 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2897 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2898 V2 = OriV1;
2899 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2900 1) &&
2901 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2902 Mask.size() + HalfSize, 1))
2903 V2 = OriV2;
2904 else
2905 return SDValue();
2906
2907 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2908}
2909
2910/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2912 MVT VT, SDValue V1, SDValue V2,
2913 SelectionDAG &DAG) {
2914
2915 const auto &Begin = Mask.begin();
2916 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2917 const auto &Mid = Mask.begin() + Mask.size() / 2;
2918 const auto &RightMid = Mask.end() - Mask.size() / 4;
2919 const auto &End = Mask.end();
2920 unsigned HalfSize = Mask.size() / 2;
2921 SDValue OriV1 = V1, OriV2 = V2;
2922
2923 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2924 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2925 V1 = OriV1;
2926 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2927 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2928 V1 = OriV2;
2929 else
2930 return SDValue();
2931
2932 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2933 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2934 V2 = OriV1;
2935 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2936 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2937 V2 = OriV2;
2938
2939 else
2940 return SDValue();
2941
2942 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2943}
2944
2945/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2947 MVT VT, SDValue V1, SDValue V2,
2948 SelectionDAG &DAG) {
2949
2950 const auto &Begin = Mask.begin();
2951 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2952 const auto &Mid = Mask.begin() + Mask.size() / 2;
2953 const auto &RightMid = Mask.end() - Mask.size() / 4;
2954 const auto &End = Mask.end();
2955 unsigned HalfSize = Mask.size() / 2;
2956 SDValue OriV1 = V1, OriV2 = V2;
2957
2958 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2959 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2960 V1 = OriV1;
2961 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2962 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2963 2))
2964 V1 = OriV2;
2965 else
2966 return SDValue();
2967
2968 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2969 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2970 V2 = OriV1;
2971 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2972 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2973 2))
2974 V2 = OriV2;
2975 else
2976 return SDValue();
2977
2978 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2979}
2980
2981/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
2982static SDValue
2984 SDValue V1, SDValue V2, SelectionDAG &DAG,
2985 const LoongArchSubtarget &Subtarget) {
2986 int NumElts = VT.getVectorNumElements();
2987 int HalfSize = NumElts / 2;
2988 MVT EltVT = VT.getVectorElementType();
2989 MVT GRLenVT = Subtarget.getGRLenVT();
2990
2991 if ((int)Mask.size() != NumElts)
2992 return SDValue();
2993
2994 auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
2995 SmallVector<int> DiffPos;
2996 for (int i = 0; i < NumElts; ++i) {
2997 if (Mask[i] == -1)
2998 continue;
2999 if (Mask[i] != Base + i) {
3000 DiffPos.push_back(i);
3001 if (DiffPos.size() > 2)
3002 return SDValue();
3003 }
3004 }
3005
3006 // Need exactly two differing element to lower into XVEXTRINS.
3007 // If only one differing element, the element at a distance of
3008 // HalfSize from it must be undef.
3009 if (DiffPos.size() == 1) {
3010 if (DiffPos[0] < HalfSize && Mask[DiffPos[0] + HalfSize] == -1)
3011 DiffPos.push_back(DiffPos[0] + HalfSize);
3012 else if (DiffPos[0] >= HalfSize && Mask[DiffPos[0] - HalfSize] == -1)
3013 DiffPos.insert(DiffPos.begin(), DiffPos[0] - HalfSize);
3014 else
3015 return SDValue();
3016 }
3017 if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + HalfSize)
3018 return SDValue();
3019
3020 // DiffMask must be in its low or high part.
3021 int DiffMaskLo = Mask[DiffPos[0]];
3022 int DiffMaskHi = Mask[DiffPos[1]];
3023 DiffMaskLo = DiffMaskLo == -1 ? DiffMaskHi - HalfSize : DiffMaskLo;
3024 DiffMaskHi = DiffMaskHi == -1 ? DiffMaskLo + HalfSize : DiffMaskHi;
3025 if (!(DiffMaskLo >= 0 && DiffMaskLo < HalfSize) &&
3026 !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + HalfSize))
3027 return SDValue();
3028 if (!(DiffMaskHi >= HalfSize && DiffMaskHi < NumElts) &&
3029 !(DiffMaskHi >= NumElts + HalfSize && DiffMaskHi < 2 * NumElts))
3030 return SDValue();
3031 if (DiffMaskHi != DiffMaskLo + HalfSize)
3032 return SDValue();
3033
3034 // Determine source vector and source index.
3035 SDValue SrcVec = (DiffMaskLo < HalfSize) ? V1 : V2;
3036 int SrcIdxLo =
3037 (DiffMaskLo < HalfSize) ? DiffMaskLo : (DiffMaskLo - NumElts);
3038 bool IsEltFP = EltVT.isFloatingPoint();
3039
3040 // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
3041 // the patterns of XVEXTRINS in tablegen.
3042 SDValue BaseVec = (Base == 0) ? V1 : V2;
3043 SDValue EltLo =
3044 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3045 SrcVec, DAG.getConstant(SrcIdxLo, DL, GRLenVT));
3046 SDValue InsLo = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, BaseVec, EltLo,
3047 DAG.getConstant(DiffPos[0], DL, GRLenVT));
3048 SDValue EltHi =
3049 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3050 SrcVec, DAG.getConstant(SrcIdxLo + HalfSize, DL, GRLenVT));
3051 SDValue Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsLo, EltHi,
3052 DAG.getConstant(DiffPos[1], DL, GRLenVT));
3053
3054 return Result;
3055 };
3056
3057 // Try [0, n-1) insertion then [n, 2n-1) insertion.
3058 if (SDValue Result = tryLowerToExtrAndIns(0))
3059 return Result;
3060 return tryLowerToExtrAndIns(NumElts);
3061}
3062
3063/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
3064static SDValue
3066 SDValue V1, SDValue V2, SelectionDAG &DAG,
3067 const LoongArchSubtarget &Subtarget) {
3068 // LoongArch LASX only supports xvinsve0.{w/d}.
3069 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
3070 VT != MVT::v4f64)
3071 return SDValue();
3072
3073 MVT GRLenVT = Subtarget.getGRLenVT();
3074 int MaskSize = Mask.size();
3075 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
3076
3077 // Check if exactly one element of the Mask is replaced by 'Replaced', while
3078 // all other elements are either 'Base + i' or undef (-1). On success, return
3079 // the index of the replaced element. Otherwise, just return -1.
3080 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
3081 int Idx = -1;
3082 for (int i = 0; i < MaskSize; ++i) {
3083 if (Mask[i] == Base + i || Mask[i] == -1)
3084 continue;
3085 if (Mask[i] != Replaced)
3086 return -1;
3087 if (Idx == -1)
3088 Idx = i;
3089 else
3090 return -1;
3091 }
3092 return Idx;
3093 };
3094
3095 // Case 1: the lowest element of V2 replaces one element in V1.
3096 int Idx = checkReplaceOne(0, MaskSize);
3097 if (Idx != -1)
3098 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
3099 DAG.getConstant(Idx, DL, GRLenVT));
3100
3101 // Case 2: the lowest element of V1 replaces one element in V2.
3102 Idx = checkReplaceOne(MaskSize, 0);
3103 if (Idx != -1)
3104 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
3105 DAG.getConstant(Idx, DL, GRLenVT));
3106
3107 return SDValue();
3108}
3109
3110/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
3112 MVT VT, SDValue V1, SDValue V2,
3113 SelectionDAG &DAG) {
3114
3115 int MaskSize = Mask.size();
3116 int HalfSize = Mask.size() / 2;
3117 const auto &Begin = Mask.begin();
3118 const auto &Mid = Mask.begin() + HalfSize;
3119 const auto &End = Mask.end();
3120
3121 // VECTOR_SHUFFLE concatenates the vectors:
3122 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
3123 // shuffling ->
3124 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
3125 //
3126 // XVSHUF concatenates the vectors:
3127 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
3128 // shuffling ->
3129 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
3130 SmallVector<SDValue, 8> MaskAlloc;
3131 for (auto it = Begin; it < Mid; it++) {
3132 if (*it < 0) // UNDEF
3133 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3134 else if ((*it >= 0 && *it < HalfSize) ||
3135 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
3136 int M = *it < HalfSize ? *it : *it - HalfSize;
3137 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3138 } else
3139 return SDValue();
3140 }
3141 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
3142
3143 for (auto it = Mid; it < End; it++) {
3144 if (*it < 0) // UNDEF
3145 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3146 else if ((*it >= HalfSize && *it < MaskSize) ||
3147 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
3148 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
3149 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3150 } else
3151 return SDValue();
3152 }
3153 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
3154
3155 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
3156 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
3157 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
3158}
3159
3160/// Shuffle vectors by lane to generate more optimized instructions.
3161/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
3162///
3163/// Therefore, except for the following four cases, other cases are regarded
3164/// as cross-lane shuffles, where optimization is relatively limited.
3165///
3166/// - Shuffle high, low lanes of two inputs vector
3167/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
3168/// - Shuffle low, high lanes of two inputs vector
3169/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
3170/// - Shuffle low, low lanes of two inputs vector
3171/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
3172/// - Shuffle high, high lanes of two inputs vector
3173/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
3174///
3175/// The first case is the closest to LoongArch instructions and the other
3176/// cases need to be converted to it for processing.
3177///
3178/// This function will return true for the last three cases above and will
3179/// modify V1, V2 and Mask. Otherwise, return false for the first case and
3180/// cross-lane shuffle cases.
3182 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
3183 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
3184
3185 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
3186
3187 int MaskSize = Mask.size();
3188 int HalfSize = Mask.size() / 2;
3189 MVT GRLenVT = Subtarget.getGRLenVT();
3190
3191 HalfMaskType preMask = None, postMask = None;
3192
3193 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3194 return M < 0 || (M >= 0 && M < HalfSize) ||
3195 (M >= MaskSize && M < MaskSize + HalfSize);
3196 }))
3197 preMask = HighLaneTy;
3198 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3199 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3200 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3201 }))
3202 preMask = LowLaneTy;
3203
3204 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3205 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3206 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3207 }))
3208 postMask = LowLaneTy;
3209 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3210 return M < 0 || (M >= 0 && M < HalfSize) ||
3211 (M >= MaskSize && M < MaskSize + HalfSize);
3212 }))
3213 postMask = HighLaneTy;
3214
3215 // The pre-half of mask is high lane type, and the post-half of mask
3216 // is low lane type, which is closest to the LoongArch instructions.
3217 //
3218 // Note: In the LoongArch architecture, the high lane of mask corresponds
3219 // to the lower 128-bit of vector register, and the low lane of mask
3220 // corresponds the higher 128-bit of vector register.
3221 if (preMask == HighLaneTy && postMask == LowLaneTy) {
3222 return false;
3223 }
3224 if (preMask == LowLaneTy && postMask == HighLaneTy) {
3225 V1 = DAG.getBitcast(MVT::v4i64, V1);
3226 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3227 DAG.getConstant(0b01001110, DL, GRLenVT));
3228 V1 = DAG.getBitcast(VT, V1);
3229
3230 if (!V2.isUndef()) {
3231 V2 = DAG.getBitcast(MVT::v4i64, V2);
3232 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3233 DAG.getConstant(0b01001110, DL, GRLenVT));
3234 V2 = DAG.getBitcast(VT, V2);
3235 }
3236
3237 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3238 *it = *it < 0 ? *it : *it - HalfSize;
3239 }
3240 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3241 *it = *it < 0 ? *it : *it + HalfSize;
3242 }
3243 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
3244 V1 = DAG.getBitcast(MVT::v4i64, V1);
3245 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3246 DAG.getConstant(0b11101110, DL, GRLenVT));
3247 V1 = DAG.getBitcast(VT, V1);
3248
3249 if (!V2.isUndef()) {
3250 V2 = DAG.getBitcast(MVT::v4i64, V2);
3251 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3252 DAG.getConstant(0b11101110, DL, GRLenVT));
3253 V2 = DAG.getBitcast(VT, V2);
3254 }
3255
3256 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3257 *it = *it < 0 ? *it : *it - HalfSize;
3258 }
3259 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
3260 V1 = DAG.getBitcast(MVT::v4i64, V1);
3261 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3262 DAG.getConstant(0b01000100, DL, GRLenVT));
3263 V1 = DAG.getBitcast(VT, V1);
3264
3265 if (!V2.isUndef()) {
3266 V2 = DAG.getBitcast(MVT::v4i64, V2);
3267 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3268 DAG.getConstant(0b01000100, DL, GRLenVT));
3269 V2 = DAG.getBitcast(VT, V2);
3270 }
3271
3272 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3273 *it = *it < 0 ? *it : *it + HalfSize;
3274 }
3275 } else { // cross-lane
3276 return false;
3277 }
3278
3279 return true;
3280}
3281
3282/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
3283/// Only for 256-bit vector.
3284///
3285/// For example:
3286/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
3287/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
3288/// is lowerded to:
3289/// (XVPERMI $xr2, $xr0, 78)
3290/// (XVSHUF $xr1, $xr2, $xr0)
3291/// (XVORI $xr0, $xr1, 0)
3293 ArrayRef<int> Mask,
3294 MVT VT, SDValue V1,
3295 SDValue V2,
3296 SelectionDAG &DAG) {
3297 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
3298 int Size = Mask.size();
3299 int LaneSize = Size / 2;
3300
3301 bool LaneCrossing[2] = {false, false};
3302 for (int i = 0; i < Size; ++i)
3303 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
3304 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
3305
3306 // Ensure that all lanes ared involved.
3307 if (!LaneCrossing[0] && !LaneCrossing[1])
3308 return SDValue();
3309
3310 SmallVector<int> InLaneMask;
3311 InLaneMask.assign(Mask.begin(), Mask.end());
3312 for (int i = 0; i < Size; ++i) {
3313 int &M = InLaneMask[i];
3314 if (M < 0)
3315 continue;
3316 if (((M % Size) / LaneSize) != (i / LaneSize))
3317 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
3318 }
3319
3320 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
3321 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
3322 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
3323 Flipped = DAG.getBitcast(VT, Flipped);
3324 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
3325}
3326
3327/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
3328///
3329/// This routine breaks down the specific type of 256-bit shuffle and
3330/// dispatches to the lowering routines accordingly.
3332 SDValue V1, SDValue V2, SelectionDAG &DAG,
3333 const LoongArchSubtarget &Subtarget) {
3334 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
3335 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
3336 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
3337 "Vector type is unsupported for lasx!");
3338 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
3339 "Two operands have different types!");
3340 assert(VT.getVectorNumElements() == Mask.size() &&
3341 "Unexpected mask size for shuffle!");
3342 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
3343 assert(Mask.size() >= 4 && "Mask size is less than 4.");
3344
3345 APInt KnownUndef, KnownZero;
3346 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
3347 APInt Zeroable = KnownUndef | KnownZero;
3348
3349 SDValue Result;
3350 // TODO: Add more comparison patterns.
3351 if (V2.isUndef()) {
3352 if ((Result =
3353 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
3354 return Result;
3355 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
3356 Subtarget)))
3357 return Result;
3358 // Try to widen vectors to gain more optimization opportunities.
3359 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
3360 return NewShuffle;
3361 if ((Result =
3362 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3363 return Result;
3364 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
3365 return Result;
3366 if ((Result =
3367 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
3368 return Result;
3369
3370 // TODO: This comment may be enabled in the future to better match the
3371 // pattern for instruction selection.
3372 /* V2 = V1; */
3373 }
3374
3375 // It is recommended not to change the pattern comparison order for better
3376 // performance.
3377 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
3378 return Result;
3379 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
3380 return Result;
3381 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
3382 return Result;
3383 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
3384 return Result;
3385 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
3386 return Result;
3387 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
3388 return Result;
3389 if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
3390 (Result =
3391 lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3392 return Result;
3393 if ((Result =
3394 lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3395 return Result;
3396 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
3397 Zeroable)))
3398 return Result;
3399 if ((Result =
3400 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3401 return Result;
3402 if ((Result =
3403 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3404 return Result;
3405 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
3406 Subtarget)))
3407 return Result;
3408
3409 // canonicalize non cross-lane shuffle vector
3410 SmallVector<int> NewMask(Mask);
3411 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
3412 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
3413
3414 // FIXME: Handling the remaining cases earlier can degrade performance
3415 // in some situations. Further analysis is required to enable more
3416 // effective optimizations.
3417 if (V2.isUndef()) {
3418 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
3419 V1, V2, DAG)))
3420 return Result;
3421 }
3422
3423 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
3424 return NewShuffle;
3425 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
3426 return Result;
3427
3428 return SDValue();
3429}
3430
3431SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3432 SelectionDAG &DAG) const {
3433 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
3434 ArrayRef<int> OrigMask = SVOp->getMask();
3435 SDValue V1 = Op.getOperand(0);
3436 SDValue V2 = Op.getOperand(1);
3437 MVT VT = Op.getSimpleValueType();
3438 int NumElements = VT.getVectorNumElements();
3439 SDLoc DL(Op);
3440
3441 bool V1IsUndef = V1.isUndef();
3442 bool V2IsUndef = V2.isUndef();
3443 if (V1IsUndef && V2IsUndef)
3444 return DAG.getUNDEF(VT);
3445
3446 // When we create a shuffle node we put the UNDEF node to second operand,
3447 // but in some cases the first operand may be transformed to UNDEF.
3448 // In this case we should just commute the node.
3449 if (V1IsUndef)
3450 return DAG.getCommutedVectorShuffle(*SVOp);
3451
3452 // Check for non-undef masks pointing at an undef vector and make the masks
3453 // undef as well. This makes it easier to match the shuffle based solely on
3454 // the mask.
3455 if (V2IsUndef &&
3456 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
3457 SmallVector<int, 8> NewMask(OrigMask);
3458 for (int &M : NewMask)
3459 if (M >= NumElements)
3460 M = -1;
3461 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
3462 }
3463
3464 // Check for illegal shuffle mask element index values.
3465 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3466 (void)MaskUpperLimit;
3467 assert(llvm::all_of(OrigMask,
3468 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3469 "Out of bounds shuffle index");
3470
3471 // For each vector width, delegate to a specialized lowering routine.
3472 if (VT.is128BitVector())
3473 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3474
3475 if (VT.is256BitVector())
3476 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3477
3478 return SDValue();
3479}
3480
3481SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3482 SelectionDAG &DAG) const {
3483 // Custom lower to ensure the libcall return is passed in an FPR on hard
3484 // float ABIs.
3485 SDLoc DL(Op);
3486 MakeLibCallOptions CallOptions;
3487 SDValue Op0 = Op.getOperand(0);
3488 SDValue Chain = SDValue();
3489 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
3490 SDValue Res;
3491 std::tie(Res, Chain) =
3492 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
3493 if (Subtarget.is64Bit())
3494 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3495 return DAG.getBitcast(MVT::i32, Res);
3496}
3497
3498SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3499 SelectionDAG &DAG) const {
3500 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3501 // float ABIs.
3502 SDLoc DL(Op);
3503 MakeLibCallOptions CallOptions;
3504 SDValue Op0 = Op.getOperand(0);
3505 SDValue Chain = SDValue();
3506 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3507 DL, MVT::f32, Op0)
3508 : DAG.getBitcast(MVT::f32, Op0);
3509 SDValue Res;
3510 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
3511 CallOptions, DL, Chain);
3512 return Res;
3513}
3514
3515SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3516 SelectionDAG &DAG) const {
3517 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3518 SDLoc DL(Op);
3519 MakeLibCallOptions CallOptions;
3520 RTLIB::Libcall LC =
3521 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
3522 SDValue Res =
3523 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
3524 if (Subtarget.is64Bit())
3525 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3526 return DAG.getBitcast(MVT::i32, Res);
3527}
3528
3529SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3530 SelectionDAG &DAG) const {
3531 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3532 MVT VT = Op.getSimpleValueType();
3533 SDLoc DL(Op);
3534 Op = DAG.getNode(
3535 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
3536 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3537 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3538 DL, MVT::f32, Op)
3539 : DAG.getBitcast(MVT::f32, Op);
3540 if (VT != MVT::f32)
3541 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3542 return Res;
3543}
3544
3545// Lower BUILD_VECTOR as broadcast load (if possible).
3546// For example:
3547// %a = load i8, ptr %ptr
3548// %b = build_vector %a, %a, %a, %a
3549// is lowered to :
3550// (VLDREPL_B $a0, 0)
3552 const SDLoc &DL,
3553 SelectionDAG &DAG) {
3554 MVT VT = BVOp->getSimpleValueType(0);
3555 int NumOps = BVOp->getNumOperands();
3556
3557 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3558 "Unsupported vector type for broadcast.");
3559
3560 SDValue IdentitySrc;
3561 bool IsIdeneity = true;
3562
3563 for (int i = 0; i != NumOps; i++) {
3564 SDValue Op = BVOp->getOperand(i);
3565 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3566 IsIdeneity = false;
3567 break;
3568 }
3569 IdentitySrc = BVOp->getOperand(0);
3570 }
3571
3572 // make sure that this load is valid and only has one user.
3573 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3574 return SDValue();
3575
3576 auto *LN = cast<LoadSDNode>(IdentitySrc);
3577 auto ExtType = LN->getExtensionType();
3578
3579 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3580 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3581 // Indexed loads and stores are not supported on LoongArch.
3582 assert(LN->isUnindexed() && "Unexpected indexed load.");
3583
3584 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3585 // The offset operand of unindexed load is always undefined, so there is
3586 // no need to pass it to VLDREPL.
3587 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3588 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3589 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3590 return BCast;
3591 }
3592 return SDValue();
3593}
3594
3595// Sequentially insert elements from Ops into Vector, from low to high indices.
3596// Note: Ops can have fewer elements than Vector.
3598 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3599 EVT ResTy) {
3600 assert(Ops.size() <= ResTy.getVectorNumElements());
3601
3602 SDValue Op0 = Ops[0];
3603 if (!Op0.isUndef())
3604 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3605 for (unsigned i = 1; i < Ops.size(); ++i) {
3606 SDValue Opi = Ops[i];
3607 if (Opi.isUndef())
3608 continue;
3609 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3610 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3611 }
3612}
3613
3614// Build a ResTy subvector from Node, taking NumElts elements starting at index
3615// 'first'.
3617 SelectionDAG &DAG, SDLoc DL,
3618 const LoongArchSubtarget &Subtarget,
3619 EVT ResTy, unsigned first) {
3620 unsigned NumElts = ResTy.getVectorNumElements();
3621
3622 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3623
3624 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3625 Node->op_begin() + first + NumElts);
3626 SDValue Vector = DAG.getUNDEF(ResTy);
3627 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3628 return Vector;
3629}
3630
3631SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3632 SelectionDAG &DAG) const {
3633 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3634 MVT VT = Node->getSimpleValueType(0);
3635 EVT ResTy = Op->getValueType(0);
3636 unsigned NumElts = ResTy.getVectorNumElements();
3637 SDLoc DL(Op);
3638 APInt SplatValue, SplatUndef;
3639 unsigned SplatBitSize;
3640 bool HasAnyUndefs;
3641 bool IsConstant = false;
3642 bool UseSameConstant = true;
3643 SDValue ConstantValue;
3644 bool Is128Vec = ResTy.is128BitVector();
3645 bool Is256Vec = ResTy.is256BitVector();
3646
3647 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3648 (!Subtarget.hasExtLASX() || !Is256Vec))
3649 return SDValue();
3650
3651 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3652 return Result;
3653
3654 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3655 /*MinSplatBits=*/8) &&
3656 SplatBitSize <= 64) {
3657 // We can only cope with 8, 16, 32, or 64-bit elements.
3658 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3659 SplatBitSize != 64)
3660 return SDValue();
3661
3662 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3663 // We can only handle 64-bit elements that are within
3664 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3665 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3666 if (!SplatValue.isSignedIntN(10) &&
3667 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3668 return SDValue();
3669 if ((Is128Vec && ResTy == MVT::v4i32) ||
3670 (Is256Vec && ResTy == MVT::v8i32))
3671 return Op;
3672 }
3673
3674 EVT ViaVecTy;
3675
3676 switch (SplatBitSize) {
3677 default:
3678 return SDValue();
3679 case 8:
3680 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3681 break;
3682 case 16:
3683 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3684 break;
3685 case 32:
3686 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3687 break;
3688 case 64:
3689 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3690 break;
3691 }
3692
3693 // SelectionDAG::getConstant will promote SplatValue appropriately.
3694 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3695
3696 // Bitcast to the type we originally wanted.
3697 if (ViaVecTy != ResTy)
3698 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3699
3700 return Result;
3701 }
3702
3703 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3704 return Op;
3705
3706 for (unsigned i = 0; i < NumElts; ++i) {
3707 SDValue Opi = Node->getOperand(i);
3708 if (isIntOrFPConstant(Opi)) {
3709 IsConstant = true;
3710 if (!ConstantValue.getNode())
3711 ConstantValue = Opi;
3712 else if (ConstantValue != Opi)
3713 UseSameConstant = false;
3714 }
3715 }
3716
3717 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3718 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3719 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3720 for (unsigned i = 0; i < NumElts; ++i) {
3721 SDValue Opi = Node->getOperand(i);
3722 if (!isIntOrFPConstant(Opi))
3723 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3724 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3725 }
3726 return Result;
3727 }
3728
3729 if (!IsConstant) {
3730 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3731 // the sub-sequence of the vector and then broadcast the sub-sequence.
3732 //
3733 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3734 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3735 // generates worse code in some cases. This could be further optimized
3736 // with more consideration.
3738 BitVector UndefElements;
3739 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3740 UndefElements.count() == 0) {
3741 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3742 // because the high part can be simply treated as undef.
3743 SDValue Vector = DAG.getUNDEF(ResTy);
3744 EVT FillTy = Is256Vec
3746 : ResTy;
3747 SDValue FillVec =
3748 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3749
3750 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3751
3752 unsigned SeqLen = Sequence.size();
3753 unsigned SplatLen = NumElts / SeqLen;
3754 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3755 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3756
3757 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3758 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3759 if (SplatEltTy == MVT::i128)
3760 SplatTy = MVT::v4i64;
3761
3762 SDValue SplatVec;
3763 SDValue SrcVec = DAG.getBitcast(
3764 SplatTy,
3765 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3766 if (Is256Vec) {
3767 SplatVec =
3768 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3769 : LoongArchISD::XVREPLVE0,
3770 DL, SplatTy, SrcVec);
3771 } else {
3772 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3773 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3774 }
3775
3776 return DAG.getBitcast(ResTy, SplatVec);
3777 }
3778
3779 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3780 // using memory operations is much lower.
3781 //
3782 // For 256-bit vectors, normally split into two halves and concatenate.
3783 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3784 // one non-undef element, skip spliting to avoid a worse result.
3785 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3786 ResTy == MVT::v4f64) {
3787 unsigned NonUndefCount = 0;
3788 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3789 if (!Node->getOperand(i).isUndef()) {
3790 ++NonUndefCount;
3791 if (NonUndefCount > 1)
3792 break;
3793 }
3794 }
3795 if (NonUndefCount == 1)
3796 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3797 }
3798
3799 EVT VecTy =
3800 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3801 SDValue Vector =
3802 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3803
3804 if (Is128Vec)
3805 return Vector;
3806
3807 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3808 VecTy, NumElts / 2);
3809
3810 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3811 }
3812
3813 return SDValue();
3814}
3815
3816SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3817 SelectionDAG &DAG) const {
3818 SDLoc DL(Op);
3819 MVT ResVT = Op.getSimpleValueType();
3820 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3821
3822 unsigned NumOperands = Op.getNumOperands();
3823 unsigned NumFreezeUndef = 0;
3824 unsigned NumZero = 0;
3825 unsigned NumNonZero = 0;
3826 unsigned NonZeros = 0;
3827 SmallSet<SDValue, 4> Undefs;
3828 for (unsigned i = 0; i != NumOperands; ++i) {
3829 SDValue SubVec = Op.getOperand(i);
3830 if (SubVec.isUndef())
3831 continue;
3832 if (ISD::isFreezeUndef(SubVec.getNode())) {
3833 // If the freeze(undef) has multiple uses then we must fold to zero.
3834 if (SubVec.hasOneUse()) {
3835 ++NumFreezeUndef;
3836 } else {
3837 ++NumZero;
3838 Undefs.insert(SubVec);
3839 }
3840 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3841 ++NumZero;
3842 else {
3843 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3844 NonZeros |= 1 << i;
3845 ++NumNonZero;
3846 }
3847 }
3848
3849 // If we have more than 2 non-zeros, build each half separately.
3850 if (NumNonZero > 2) {
3851 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3852 ArrayRef<SDUse> Ops = Op->ops();
3853 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3854 Ops.slice(0, NumOperands / 2));
3855 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3856 Ops.slice(NumOperands / 2));
3857 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3858 }
3859
3860 // Otherwise, build it up through insert_subvectors.
3861 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3862 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3863 : DAG.getUNDEF(ResVT));
3864
3865 // Replace Undef operands with ZeroVector.
3866 for (SDValue U : Undefs)
3867 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3868
3869 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3870 unsigned NumSubElems = SubVT.getVectorNumElements();
3871 for (unsigned i = 0; i != NumOperands; ++i) {
3872 if ((NonZeros & (1 << i)) == 0)
3873 continue;
3874
3875 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3876 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3877 }
3878
3879 return Vec;
3880}
3881
3882SDValue
3883LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3884 SelectionDAG &DAG) const {
3885 MVT EltVT = Op.getSimpleValueType();
3886 SDValue Vec = Op->getOperand(0);
3887 EVT VecTy = Vec->getValueType(0);
3888 SDValue Idx = Op->getOperand(1);
3889 SDLoc DL(Op);
3890 MVT GRLenVT = Subtarget.getGRLenVT();
3891
3892 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3893
3894 if (isa<ConstantSDNode>(Idx))
3895 return Op;
3896
3897 switch (VecTy.getSimpleVT().SimpleTy) {
3898 default:
3899 llvm_unreachable("Unexpected type");
3900 case MVT::v32i8:
3901 case MVT::v16i16:
3902 case MVT::v4i64:
3903 case MVT::v4f64: {
3904 // Extract the high half subvector and place it to the low half of a new
3905 // vector. It doesn't matter what the high half of the new vector is.
3906 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3907 SDValue VecHi =
3908 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3909 SDValue TmpVec =
3910 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3911 VecHi, DAG.getConstant(0, DL, GRLenVT));
3912
3913 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3914 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3915 // desired element.
3916 SDValue IdxCp =
3917 Subtarget.is64Bit()
3918 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3919 : DAG.getBitcast(MVT::f32, Idx);
3920 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3921 SDValue MaskVec =
3922 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3923 SDValue ResVec =
3924 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3925
3926 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3927 DAG.getConstant(0, DL, GRLenVT));
3928 }
3929 case MVT::v8i32:
3930 case MVT::v8f32: {
3931 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3932 SDValue SplatValue =
3933 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3934
3935 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3936 DAG.getConstant(0, DL, GRLenVT));
3937 }
3938 }
3939}
3940
3941SDValue
3942LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3943 SelectionDAG &DAG) const {
3944 MVT VT = Op.getSimpleValueType();
3945 MVT EltVT = VT.getVectorElementType();
3946 unsigned NumElts = VT.getVectorNumElements();
3947 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3948 SDLoc DL(Op);
3949 SDValue Op0 = Op.getOperand(0);
3950 SDValue Op1 = Op.getOperand(1);
3951 SDValue Op2 = Op.getOperand(2);
3952
3953 if (isa<ConstantSDNode>(Op2))
3954 return Op;
3955
3956 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3957 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3958
3959 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3960 return SDValue();
3961
3962 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3963 SmallVector<SDValue, 32> RawIndices;
3964 SDValue SplatIdx;
3965 SDValue Indices;
3966
3967 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3968 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3969 for (unsigned i = 0; i < NumElts; ++i) {
3970 RawIndices.push_back(Op2);
3971 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3972 }
3973 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3974 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3975
3976 RawIndices.clear();
3977 for (unsigned i = 0; i < NumElts; ++i) {
3978 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3979 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3980 }
3981 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3982 Indices = DAG.getBitcast(IdxVTy, Indices);
3983 } else {
3984 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3985
3986 for (unsigned i = 0; i < NumElts; ++i)
3987 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3988 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3989 }
3990
3991 // insert vec, elt, idx
3992 // =>
3993 // select (splatidx == {0,1,2...}) ? splatelt : vec
3994 SDValue SelectCC =
3995 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3996 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3997}
3998
3999SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4000 SelectionDAG &DAG) const {
4001 SDLoc DL(Op);
4002 SyncScope::ID FenceSSID =
4003 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4004
4005 // singlethread fences only synchronize with signal handlers on the same
4006 // thread and thus only need to preserve instruction order, not actually
4007 // enforce memory ordering.
4008 if (FenceSSID == SyncScope::SingleThread)
4009 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4010 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4011
4012 return Op;
4013}
4014
4015SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
4016 SelectionDAG &DAG) const {
4017
4018 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
4019 DAG.getContext()->emitError(
4020 "On LA64, only 64-bit registers can be written.");
4021 return Op.getOperand(0);
4022 }
4023
4024 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
4025 DAG.getContext()->emitError(
4026 "On LA32, only 32-bit registers can be written.");
4027 return Op.getOperand(0);
4028 }
4029
4030 return Op;
4031}
4032
4033SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
4034 SelectionDAG &DAG) const {
4035 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
4036 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
4037 "be a constant integer");
4038 return SDValue();
4039 }
4040
4041 MachineFunction &MF = DAG.getMachineFunction();
4043 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
4044 EVT VT = Op.getValueType();
4045 SDLoc DL(Op);
4046 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
4047 unsigned Depth = Op.getConstantOperandVal(0);
4048 int GRLenInBytes = Subtarget.getGRLen() / 8;
4049
4050 while (Depth--) {
4051 int Offset = -(GRLenInBytes * 2);
4052 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
4053 DAG.getSignedConstant(Offset, DL, VT));
4054 FrameAddr =
4055 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
4056 }
4057 return FrameAddr;
4058}
4059
4060SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
4061 SelectionDAG &DAG) const {
4062 // Currently only support lowering return address for current frame.
4063 if (Op.getConstantOperandVal(0) != 0) {
4064 DAG.getContext()->emitError(
4065 "return address can only be determined for the current frame");
4066 return SDValue();
4067 }
4068
4069 MachineFunction &MF = DAG.getMachineFunction();
4071 MVT GRLenVT = Subtarget.getGRLenVT();
4072
4073 // Return the value of the return address register, marking it an implicit
4074 // live-in.
4075 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
4076 getRegClassFor(GRLenVT));
4077 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
4078}
4079
4080SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
4081 SelectionDAG &DAG) const {
4082 MachineFunction &MF = DAG.getMachineFunction();
4083 auto Size = Subtarget.getGRLen() / 8;
4084 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
4085 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4086}
4087
4088SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
4089 SelectionDAG &DAG) const {
4090 MachineFunction &MF = DAG.getMachineFunction();
4091 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
4092
4093 SDLoc DL(Op);
4094 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4096
4097 // vastart just stores the address of the VarArgsFrameIndex slot into the
4098 // memory location argument.
4099 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4100 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
4101 MachinePointerInfo(SV));
4102}
4103
4104SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
4105 SelectionDAG &DAG) const {
4106 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4107 !Subtarget.hasBasicD() && "unexpected target features");
4108
4109 SDLoc DL(Op);
4110 SDValue Op0 = Op.getOperand(0);
4111 if (Op0->getOpcode() == ISD::AND) {
4112 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
4113 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
4114 return Op;
4115 }
4116
4117 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
4118 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
4119 Op0.getConstantOperandVal(2) == UINT64_C(0))
4120 return Op;
4121
4122 if (Op0.getOpcode() == ISD::AssertZext &&
4123 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
4124 return Op;
4125
4126 EVT OpVT = Op0.getValueType();
4127 EVT RetVT = Op.getValueType();
4128 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
4129 MakeLibCallOptions CallOptions;
4130 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4131 SDValue Chain = SDValue();
4133 std::tie(Result, Chain) =
4134 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4135 return Result;
4136}
4137
4138SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
4139 SelectionDAG &DAG) const {
4140 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4141 !Subtarget.hasBasicD() && "unexpected target features");
4142
4143 SDLoc DL(Op);
4144 SDValue Op0 = Op.getOperand(0);
4145
4146 if ((Op0.getOpcode() == ISD::AssertSext ||
4148 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
4149 return Op;
4150
4151 EVT OpVT = Op0.getValueType();
4152 EVT RetVT = Op.getValueType();
4153 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
4154 MakeLibCallOptions CallOptions;
4155 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4156 SDValue Chain = SDValue();
4158 std::tie(Result, Chain) =
4159 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4160 return Result;
4161}
4162
4163SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
4164 SelectionDAG &DAG) const {
4165
4166 SDLoc DL(Op);
4167 EVT VT = Op.getValueType();
4168 SDValue Op0 = Op.getOperand(0);
4169 EVT Op0VT = Op0.getValueType();
4170
4171 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
4172 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
4173 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4174 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
4175 }
4176 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
4177 SDValue Lo, Hi;
4178 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
4179 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
4180 }
4181 return Op;
4182}
4183
4184SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
4185 SelectionDAG &DAG) const {
4186
4187 SDLoc DL(Op);
4188 SDValue Op0 = Op.getOperand(0);
4189
4190 if (Op0.getValueType() == MVT::f16)
4191 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
4192
4193 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
4194 !Subtarget.hasBasicD()) {
4195 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
4196 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
4197 }
4198
4199 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
4200 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
4201 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
4202}
4203
4205 SelectionDAG &DAG, unsigned Flags) {
4206 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
4207}
4208
4210 SelectionDAG &DAG, unsigned Flags) {
4211 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
4212 Flags);
4213}
4214
4216 SelectionDAG &DAG, unsigned Flags) {
4217 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
4218 N->getOffset(), Flags);
4219}
4220
4222 SelectionDAG &DAG, unsigned Flags) {
4223 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
4224}
4225
4226template <class NodeTy>
4227SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4229 bool IsLocal) const {
4230 SDLoc DL(N);
4231 EVT Ty = getPointerTy(DAG.getDataLayout());
4232 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4233 SDValue Load;
4234
4235 switch (M) {
4236 default:
4237 report_fatal_error("Unsupported code model");
4238
4239 case CodeModel::Large: {
4240 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4241
4242 // This is not actually used, but is necessary for successfully matching
4243 // the PseudoLA_*_LARGE nodes.
4244 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4245 if (IsLocal) {
4246 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
4247 // eventually becomes the desired 5-insn code sequence.
4248 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
4249 Tmp, Addr),
4250 0);
4251 } else {
4252 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
4253 // eventually becomes the desired 5-insn code sequence.
4254 Load = SDValue(
4255 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
4256 0);
4257 }
4258 break;
4259 }
4260
4261 case CodeModel::Small:
4262 case CodeModel::Medium:
4263 if (IsLocal) {
4264 // This generates the pattern (PseudoLA_PCREL sym), which
4265 //
4266 // for la32r expands to:
4267 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4268 //
4269 // for la32s and la64 expands to:
4270 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
4271 Load = SDValue(
4272 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
4273 } else {
4274 // This generates the pattern (PseudoLA_GOT sym), which
4275 //
4276 // for la32r expands to:
4277 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4278 //
4279 // for la32s and la64 expands to:
4280 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
4281 Load =
4282 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
4283 }
4284 }
4285
4286 if (!IsLocal) {
4287 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4288 MachineFunction &MF = DAG.getMachineFunction();
4289 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4293 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4294 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
4295 }
4296
4297 return Load;
4298}
4299
4300SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
4301 SelectionDAG &DAG) const {
4302 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
4303 DAG.getTarget().getCodeModel());
4304}
4305
4306SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
4307 SelectionDAG &DAG) const {
4308 return getAddr(cast<JumpTableSDNode>(Op), DAG,
4309 DAG.getTarget().getCodeModel());
4310}
4311
4312SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
4313 SelectionDAG &DAG) const {
4314 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
4315 DAG.getTarget().getCodeModel());
4316}
4317
4318SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
4319 SelectionDAG &DAG) const {
4320 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4321 assert(N->getOffset() == 0 && "unexpected offset in global node");
4322 auto CM = DAG.getTarget().getCodeModel();
4323 const GlobalValue *GV = N->getGlobal();
4324
4325 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
4326 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
4327 CM = *GCM;
4328 }
4329
4330 return getAddr(N, DAG, CM, GV->isDSOLocal());
4331}
4332
4333SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
4334 SelectionDAG &DAG,
4335 unsigned Opc, bool UseGOT,
4336 bool Large) const {
4337 SDLoc DL(N);
4338 EVT Ty = getPointerTy(DAG.getDataLayout());
4339 MVT GRLenVT = Subtarget.getGRLenVT();
4340
4341 // This is not actually used, but is necessary for successfully matching the
4342 // PseudoLA_*_LARGE nodes.
4343 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4344 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4345
4346 // Only IE needs an extra argument for large code model.
4347 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
4348 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4349 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4350
4351 // If it is LE for normal/medium code model, the add tp operation will occur
4352 // during the pseudo-instruction expansion.
4353 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
4354 return Offset;
4355
4356 if (UseGOT) {
4357 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4358 MachineFunction &MF = DAG.getMachineFunction();
4359 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4363 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4364 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
4365 }
4366
4367 // Add the thread pointer.
4368 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
4369 DAG.getRegister(LoongArch::R2, GRLenVT));
4370}
4371
4372SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
4373 SelectionDAG &DAG,
4374 unsigned Opc,
4375 bool Large) const {
4376 SDLoc DL(N);
4377 EVT Ty = getPointerTy(DAG.getDataLayout());
4378 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
4379
4380 // This is not actually used, but is necessary for successfully matching the
4381 // PseudoLA_*_LARGE nodes.
4382 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4383
4384 // Use a PC-relative addressing mode to access the dynamic GOT address.
4385 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4386 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4387 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4388
4389 // Prepare argument list to generate call.
4391 Args.emplace_back(Load, CallTy);
4392
4393 // Setup call to __tls_get_addr.
4394 TargetLowering::CallLoweringInfo CLI(DAG);
4395 CLI.setDebugLoc(DL)
4396 .setChain(DAG.getEntryNode())
4397 .setLibCallee(CallingConv::C, CallTy,
4398 DAG.getExternalSymbol("__tls_get_addr", Ty),
4399 std::move(Args));
4400
4401 return LowerCallTo(CLI).first;
4402}
4403
4404SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
4405 SelectionDAG &DAG, unsigned Opc,
4406 bool Large) const {
4407 SDLoc DL(N);
4408 EVT Ty = getPointerTy(DAG.getDataLayout());
4409 const GlobalValue *GV = N->getGlobal();
4410
4411 // This is not actually used, but is necessary for successfully matching the
4412 // PseudoLA_*_LARGE nodes.
4413 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4414
4415 // Use a PC-relative addressing mode to access the global dynamic GOT address.
4416 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
4417 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
4418 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4419 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4420}
4421
4422SDValue
4423LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
4424 SelectionDAG &DAG) const {
4427 report_fatal_error("In GHC calling convention TLS is not supported");
4428
4429 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
4430 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
4431
4432 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4433 assert(N->getOffset() == 0 && "unexpected offset in global node");
4434
4435 if (DAG.getTarget().useEmulatedTLS())
4436 reportFatalUsageError("the emulated TLS is prohibited");
4437
4438 bool IsDesc = DAG.getTarget().useTLSDESC();
4439
4440 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
4442 // In this model, application code calls the dynamic linker function
4443 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
4444 // runtime.
4445 if (!IsDesc)
4446 return getDynamicTLSAddr(N, DAG,
4447 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
4448 : LoongArch::PseudoLA_TLS_GD,
4449 Large);
4450 break;
4452 // Same as GeneralDynamic, except for assembly modifiers and relocation
4453 // records.
4454 if (!IsDesc)
4455 return getDynamicTLSAddr(N, DAG,
4456 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
4457 : LoongArch::PseudoLA_TLS_LD,
4458 Large);
4459 break;
4461 // This model uses the GOT to resolve TLS offsets.
4462 return getStaticTLSAddr(N, DAG,
4463 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
4464 : LoongArch::PseudoLA_TLS_IE,
4465 /*UseGOT=*/true, Large);
4467 // This model is used when static linking as the TLS offsets are resolved
4468 // during program linking.
4469 //
4470 // This node doesn't need an extra argument for the large code model.
4471 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
4472 /*UseGOT=*/false, Large);
4473 }
4474
4475 return getTLSDescAddr(N, DAG,
4476 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4477 : LoongArch::PseudoLA_TLS_DESC,
4478 Large);
4479}
4480
4481template <unsigned N>
4483 SelectionDAG &DAG, bool IsSigned = false) {
4484 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
4485 // Check the ImmArg.
4486 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4487 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4488 DAG.getContext()->emitError(Op->getOperationName(0) +
4489 ": argument out of range.");
4490 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
4491 }
4492 return SDValue();
4493}
4494
4495SDValue
4496LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4497 SelectionDAG &DAG) const {
4498 switch (Op.getConstantOperandVal(0)) {
4499 default:
4500 return SDValue(); // Don't custom lower most intrinsics.
4501 case Intrinsic::thread_pointer: {
4502 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4503 return DAG.getRegister(LoongArch::R2, PtrVT);
4504 }
4505 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4506 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4507 case Intrinsic::loongarch_lsx_vreplvei_d:
4508 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4509 return checkIntrinsicImmArg<1>(Op, 2, DAG);
4510 case Intrinsic::loongarch_lsx_vreplvei_w:
4511 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4512 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4513 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4514 case Intrinsic::loongarch_lasx_xvpickve_d:
4515 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4516 return checkIntrinsicImmArg<2>(Op, 2, DAG);
4517 case Intrinsic::loongarch_lasx_xvinsve0_d:
4518 return checkIntrinsicImmArg<2>(Op, 3, DAG);
4519 case Intrinsic::loongarch_lsx_vsat_b:
4520 case Intrinsic::loongarch_lsx_vsat_bu:
4521 case Intrinsic::loongarch_lsx_vrotri_b:
4522 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4523 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4524 case Intrinsic::loongarch_lsx_vsrlri_b:
4525 case Intrinsic::loongarch_lsx_vsrari_b:
4526 case Intrinsic::loongarch_lsx_vreplvei_h:
4527 case Intrinsic::loongarch_lasx_xvsat_b:
4528 case Intrinsic::loongarch_lasx_xvsat_bu:
4529 case Intrinsic::loongarch_lasx_xvrotri_b:
4530 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4531 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4532 case Intrinsic::loongarch_lasx_xvsrlri_b:
4533 case Intrinsic::loongarch_lasx_xvsrari_b:
4534 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4535 case Intrinsic::loongarch_lasx_xvpickve_w:
4536 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4537 return checkIntrinsicImmArg<3>(Op, 2, DAG);
4538 case Intrinsic::loongarch_lasx_xvinsve0_w:
4539 return checkIntrinsicImmArg<3>(Op, 3, DAG);
4540 case Intrinsic::loongarch_lsx_vsat_h:
4541 case Intrinsic::loongarch_lsx_vsat_hu:
4542 case Intrinsic::loongarch_lsx_vrotri_h:
4543 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4544 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4545 case Intrinsic::loongarch_lsx_vsrlri_h:
4546 case Intrinsic::loongarch_lsx_vsrari_h:
4547 case Intrinsic::loongarch_lsx_vreplvei_b:
4548 case Intrinsic::loongarch_lasx_xvsat_h:
4549 case Intrinsic::loongarch_lasx_xvsat_hu:
4550 case Intrinsic::loongarch_lasx_xvrotri_h:
4551 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4552 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4553 case Intrinsic::loongarch_lasx_xvsrlri_h:
4554 case Intrinsic::loongarch_lasx_xvsrari_h:
4555 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4556 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4557 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4558 case Intrinsic::loongarch_lsx_vsrani_b_h:
4559 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4560 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4561 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4562 case Intrinsic::loongarch_lsx_vssrani_b_h:
4563 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4564 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4565 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4566 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4567 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4568 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4569 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4570 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4571 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4572 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4573 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4574 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4575 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4576 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4577 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4578 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4579 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4580 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4581 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4582 case Intrinsic::loongarch_lsx_vsat_w:
4583 case Intrinsic::loongarch_lsx_vsat_wu:
4584 case Intrinsic::loongarch_lsx_vrotri_w:
4585 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4586 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4587 case Intrinsic::loongarch_lsx_vsrlri_w:
4588 case Intrinsic::loongarch_lsx_vsrari_w:
4589 case Intrinsic::loongarch_lsx_vslei_bu:
4590 case Intrinsic::loongarch_lsx_vslei_hu:
4591 case Intrinsic::loongarch_lsx_vslei_wu:
4592 case Intrinsic::loongarch_lsx_vslei_du:
4593 case Intrinsic::loongarch_lsx_vslti_bu:
4594 case Intrinsic::loongarch_lsx_vslti_hu:
4595 case Intrinsic::loongarch_lsx_vslti_wu:
4596 case Intrinsic::loongarch_lsx_vslti_du:
4597 case Intrinsic::loongarch_lsx_vbsll_v:
4598 case Intrinsic::loongarch_lsx_vbsrl_v:
4599 case Intrinsic::loongarch_lasx_xvsat_w:
4600 case Intrinsic::loongarch_lasx_xvsat_wu:
4601 case Intrinsic::loongarch_lasx_xvrotri_w:
4602 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4603 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4604 case Intrinsic::loongarch_lasx_xvsrlri_w:
4605 case Intrinsic::loongarch_lasx_xvsrari_w:
4606 case Intrinsic::loongarch_lasx_xvslei_bu:
4607 case Intrinsic::loongarch_lasx_xvslei_hu:
4608 case Intrinsic::loongarch_lasx_xvslei_wu:
4609 case Intrinsic::loongarch_lasx_xvslei_du:
4610 case Intrinsic::loongarch_lasx_xvslti_bu:
4611 case Intrinsic::loongarch_lasx_xvslti_hu:
4612 case Intrinsic::loongarch_lasx_xvslti_wu:
4613 case Intrinsic::loongarch_lasx_xvslti_du:
4614 case Intrinsic::loongarch_lasx_xvbsll_v:
4615 case Intrinsic::loongarch_lasx_xvbsrl_v:
4616 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4617 case Intrinsic::loongarch_lsx_vseqi_b:
4618 case Intrinsic::loongarch_lsx_vseqi_h:
4619 case Intrinsic::loongarch_lsx_vseqi_w:
4620 case Intrinsic::loongarch_lsx_vseqi_d:
4621 case Intrinsic::loongarch_lsx_vslei_b:
4622 case Intrinsic::loongarch_lsx_vslei_h:
4623 case Intrinsic::loongarch_lsx_vslei_w:
4624 case Intrinsic::loongarch_lsx_vslei_d:
4625 case Intrinsic::loongarch_lsx_vslti_b:
4626 case Intrinsic::loongarch_lsx_vslti_h:
4627 case Intrinsic::loongarch_lsx_vslti_w:
4628 case Intrinsic::loongarch_lsx_vslti_d:
4629 case Intrinsic::loongarch_lasx_xvseqi_b:
4630 case Intrinsic::loongarch_lasx_xvseqi_h:
4631 case Intrinsic::loongarch_lasx_xvseqi_w:
4632 case Intrinsic::loongarch_lasx_xvseqi_d:
4633 case Intrinsic::loongarch_lasx_xvslei_b:
4634 case Intrinsic::loongarch_lasx_xvslei_h:
4635 case Intrinsic::loongarch_lasx_xvslei_w:
4636 case Intrinsic::loongarch_lasx_xvslei_d:
4637 case Intrinsic::loongarch_lasx_xvslti_b:
4638 case Intrinsic::loongarch_lasx_xvslti_h:
4639 case Intrinsic::loongarch_lasx_xvslti_w:
4640 case Intrinsic::loongarch_lasx_xvslti_d:
4641 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4642 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4643 case Intrinsic::loongarch_lsx_vsrani_h_w:
4644 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4645 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4646 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4647 case Intrinsic::loongarch_lsx_vssrani_h_w:
4648 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4649 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4650 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4651 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4652 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4653 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4654 case Intrinsic::loongarch_lsx_vfrstpi_b:
4655 case Intrinsic::loongarch_lsx_vfrstpi_h:
4656 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4657 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4658 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4659 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4660 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4661 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4662 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4663 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4664 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4665 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4666 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4667 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4668 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4669 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4670 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4671 case Intrinsic::loongarch_lsx_vsat_d:
4672 case Intrinsic::loongarch_lsx_vsat_du:
4673 case Intrinsic::loongarch_lsx_vrotri_d:
4674 case Intrinsic::loongarch_lsx_vsrlri_d:
4675 case Intrinsic::loongarch_lsx_vsrari_d:
4676 case Intrinsic::loongarch_lasx_xvsat_d:
4677 case Intrinsic::loongarch_lasx_xvsat_du:
4678 case Intrinsic::loongarch_lasx_xvrotri_d:
4679 case Intrinsic::loongarch_lasx_xvsrlri_d:
4680 case Intrinsic::loongarch_lasx_xvsrari_d:
4681 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4682 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4683 case Intrinsic::loongarch_lsx_vsrani_w_d:
4684 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4685 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4686 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4687 case Intrinsic::loongarch_lsx_vssrani_w_d:
4688 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4689 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4690 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4691 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4692 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4693 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4694 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4695 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4696 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4697 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4698 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4699 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4700 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4701 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4702 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4703 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4704 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4705 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4706 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4707 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4708 case Intrinsic::loongarch_lsx_vsrani_d_q:
4709 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4710 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4711 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4712 case Intrinsic::loongarch_lsx_vssrani_d_q:
4713 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4714 case Intrinsic::loongarch_lsx_vssrani_du_q:
4715 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4716 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4717 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4718 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4719 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4720 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4721 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4722 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4723 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4724 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4725 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4726 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4727 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4728 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4729 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4730 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4731 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4732 case Intrinsic::loongarch_lsx_vnori_b:
4733 case Intrinsic::loongarch_lsx_vshuf4i_b:
4734 case Intrinsic::loongarch_lsx_vshuf4i_h:
4735 case Intrinsic::loongarch_lsx_vshuf4i_w:
4736 case Intrinsic::loongarch_lasx_xvnori_b:
4737 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4738 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4739 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4740 case Intrinsic::loongarch_lasx_xvpermi_d:
4741 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4742 case Intrinsic::loongarch_lsx_vshuf4i_d:
4743 case Intrinsic::loongarch_lsx_vpermi_w:
4744 case Intrinsic::loongarch_lsx_vbitseli_b:
4745 case Intrinsic::loongarch_lsx_vextrins_b:
4746 case Intrinsic::loongarch_lsx_vextrins_h:
4747 case Intrinsic::loongarch_lsx_vextrins_w:
4748 case Intrinsic::loongarch_lsx_vextrins_d:
4749 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4750 case Intrinsic::loongarch_lasx_xvpermi_w:
4751 case Intrinsic::loongarch_lasx_xvpermi_q:
4752 case Intrinsic::loongarch_lasx_xvbitseli_b:
4753 case Intrinsic::loongarch_lasx_xvextrins_b:
4754 case Intrinsic::loongarch_lasx_xvextrins_h:
4755 case Intrinsic::loongarch_lasx_xvextrins_w:
4756 case Intrinsic::loongarch_lasx_xvextrins_d:
4757 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4758 case Intrinsic::loongarch_lsx_vrepli_b:
4759 case Intrinsic::loongarch_lsx_vrepli_h:
4760 case Intrinsic::loongarch_lsx_vrepli_w:
4761 case Intrinsic::loongarch_lsx_vrepli_d:
4762 case Intrinsic::loongarch_lasx_xvrepli_b:
4763 case Intrinsic::loongarch_lasx_xvrepli_h:
4764 case Intrinsic::loongarch_lasx_xvrepli_w:
4765 case Intrinsic::loongarch_lasx_xvrepli_d:
4766 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4767 case Intrinsic::loongarch_lsx_vldi:
4768 case Intrinsic::loongarch_lasx_xvldi:
4769 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4770 }
4771}
4772
4773// Helper function that emits error message for intrinsics with chain and return
4774// merge values of a UNDEF and the chain.
4776 StringRef ErrorMsg,
4777 SelectionDAG &DAG) {
4778 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4779 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4780 SDLoc(Op));
4781}
4782
4783SDValue
4784LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4785 SelectionDAG &DAG) const {
4786 SDLoc DL(Op);
4787 MVT GRLenVT = Subtarget.getGRLenVT();
4788 EVT VT = Op.getValueType();
4789 SDValue Chain = Op.getOperand(0);
4790 const StringRef ErrorMsgOOR = "argument out of range";
4791 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4792 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4793
4794 switch (Op.getConstantOperandVal(1)) {
4795 default:
4796 return Op;
4797 case Intrinsic::loongarch_crc_w_b_w:
4798 case Intrinsic::loongarch_crc_w_h_w:
4799 case Intrinsic::loongarch_crc_w_w_w:
4800 case Intrinsic::loongarch_crc_w_d_w:
4801 case Intrinsic::loongarch_crcc_w_b_w:
4802 case Intrinsic::loongarch_crcc_w_h_w:
4803 case Intrinsic::loongarch_crcc_w_w_w:
4804 case Intrinsic::loongarch_crcc_w_d_w:
4805 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4806 case Intrinsic::loongarch_csrrd_w:
4807 case Intrinsic::loongarch_csrrd_d: {
4808 unsigned Imm = Op.getConstantOperandVal(2);
4809 return !isUInt<14>(Imm)
4810 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4811 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4812 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4813 }
4814 case Intrinsic::loongarch_csrwr_w:
4815 case Intrinsic::loongarch_csrwr_d: {
4816 unsigned Imm = Op.getConstantOperandVal(3);
4817 return !isUInt<14>(Imm)
4818 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4819 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4820 {Chain, Op.getOperand(2),
4821 DAG.getConstant(Imm, DL, GRLenVT)});
4822 }
4823 case Intrinsic::loongarch_csrxchg_w:
4824 case Intrinsic::loongarch_csrxchg_d: {
4825 unsigned Imm = Op.getConstantOperandVal(4);
4826 return !isUInt<14>(Imm)
4827 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4828 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4829 {Chain, Op.getOperand(2), Op.getOperand(3),
4830 DAG.getConstant(Imm, DL, GRLenVT)});
4831 }
4832 case Intrinsic::loongarch_iocsrrd_d: {
4833 return DAG.getNode(
4834 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4835 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4836 }
4837#define IOCSRRD_CASE(NAME, NODE) \
4838 case Intrinsic::loongarch_##NAME: { \
4839 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4840 {Chain, Op.getOperand(2)}); \
4841 }
4842 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4843 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4844 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4845#undef IOCSRRD_CASE
4846 case Intrinsic::loongarch_cpucfg: {
4847 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4848 {Chain, Op.getOperand(2)});
4849 }
4850 case Intrinsic::loongarch_lddir_d: {
4851 unsigned Imm = Op.getConstantOperandVal(3);
4852 return !isUInt<8>(Imm)
4853 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4854 : Op;
4855 }
4856 case Intrinsic::loongarch_movfcsr2gr: {
4857 if (!Subtarget.hasBasicF())
4858 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4859 unsigned Imm = Op.getConstantOperandVal(2);
4860 return !isUInt<2>(Imm)
4861 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4862 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4863 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4864 }
4865 case Intrinsic::loongarch_lsx_vld:
4866 case Intrinsic::loongarch_lsx_vldrepl_b:
4867 case Intrinsic::loongarch_lasx_xvld:
4868 case Intrinsic::loongarch_lasx_xvldrepl_b:
4869 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4870 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4871 : SDValue();
4872 case Intrinsic::loongarch_lsx_vldrepl_h:
4873 case Intrinsic::loongarch_lasx_xvldrepl_h:
4874 return !isShiftedInt<11, 1>(
4875 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4877 Op, "argument out of range or not a multiple of 2", DAG)
4878 : SDValue();
4879 case Intrinsic::loongarch_lsx_vldrepl_w:
4880 case Intrinsic::loongarch_lasx_xvldrepl_w:
4881 return !isShiftedInt<10, 2>(
4882 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4884 Op, "argument out of range or not a multiple of 4", DAG)
4885 : SDValue();
4886 case Intrinsic::loongarch_lsx_vldrepl_d:
4887 case Intrinsic::loongarch_lasx_xvldrepl_d:
4888 return !isShiftedInt<9, 3>(
4889 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4891 Op, "argument out of range or not a multiple of 8", DAG)
4892 : SDValue();
4893 }
4894}
4895
4896// Helper function that emits error message for intrinsics with void return
4897// value and return the chain.
4899 SelectionDAG &DAG) {
4900
4901 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4902 return Op.getOperand(0);
4903}
4904
4905SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4906 SelectionDAG &DAG) const {
4907 SDLoc DL(Op);
4908 MVT GRLenVT = Subtarget.getGRLenVT();
4909 SDValue Chain = Op.getOperand(0);
4910 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4911 SDValue Op2 = Op.getOperand(2);
4912 const StringRef ErrorMsgOOR = "argument out of range";
4913 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4914 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4915 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4916
4917 switch (IntrinsicEnum) {
4918 default:
4919 // TODO: Add more Intrinsics.
4920 return SDValue();
4921 case Intrinsic::loongarch_cacop_d:
4922 case Intrinsic::loongarch_cacop_w: {
4923 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4924 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4925 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4926 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4927 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4928 unsigned Imm1 = Op2->getAsZExtVal();
4929 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4930 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4931 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4932 return Op;
4933 }
4934 case Intrinsic::loongarch_dbar: {
4935 unsigned Imm = Op2->getAsZExtVal();
4936 return !isUInt<15>(Imm)
4937 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4938 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4939 DAG.getConstant(Imm, DL, GRLenVT));
4940 }
4941 case Intrinsic::loongarch_ibar: {
4942 unsigned Imm = Op2->getAsZExtVal();
4943 return !isUInt<15>(Imm)
4944 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4945 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4946 DAG.getConstant(Imm, DL, GRLenVT));
4947 }
4948 case Intrinsic::loongarch_break: {
4949 unsigned Imm = Op2->getAsZExtVal();
4950 return !isUInt<15>(Imm)
4951 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4952 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4953 DAG.getConstant(Imm, DL, GRLenVT));
4954 }
4955 case Intrinsic::loongarch_movgr2fcsr: {
4956 if (!Subtarget.hasBasicF())
4957 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4958 unsigned Imm = Op2->getAsZExtVal();
4959 return !isUInt<2>(Imm)
4960 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4961 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4962 DAG.getConstant(Imm, DL, GRLenVT),
4963 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4964 Op.getOperand(3)));
4965 }
4966 case Intrinsic::loongarch_syscall: {
4967 unsigned Imm = Op2->getAsZExtVal();
4968 return !isUInt<15>(Imm)
4969 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4970 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4971 DAG.getConstant(Imm, DL, GRLenVT));
4972 }
4973#define IOCSRWR_CASE(NAME, NODE) \
4974 case Intrinsic::loongarch_##NAME: { \
4975 SDValue Op3 = Op.getOperand(3); \
4976 return Subtarget.is64Bit() \
4977 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4978 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4979 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4980 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4981 Op3); \
4982 }
4983 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4984 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4985 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4986#undef IOCSRWR_CASE
4987 case Intrinsic::loongarch_iocsrwr_d: {
4988 return !Subtarget.is64Bit()
4989 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4990 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4991 Op2,
4992 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4993 Op.getOperand(3)));
4994 }
4995#define ASRT_LE_GT_CASE(NAME) \
4996 case Intrinsic::loongarch_##NAME: { \
4997 return !Subtarget.is64Bit() \
4998 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4999 : Op; \
5000 }
5001 ASRT_LE_GT_CASE(asrtle_d)
5002 ASRT_LE_GT_CASE(asrtgt_d)
5003#undef ASRT_LE_GT_CASE
5004 case Intrinsic::loongarch_ldpte_d: {
5005 unsigned Imm = Op.getConstantOperandVal(3);
5006 return !Subtarget.is64Bit()
5007 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
5008 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5009 : Op;
5010 }
5011 case Intrinsic::loongarch_lsx_vst:
5012 case Intrinsic::loongarch_lasx_xvst:
5013 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
5014 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5015 : SDValue();
5016 case Intrinsic::loongarch_lasx_xvstelm_b:
5017 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5018 !isUInt<5>(Op.getConstantOperandVal(5)))
5019 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5020 : SDValue();
5021 case Intrinsic::loongarch_lsx_vstelm_b:
5022 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5023 !isUInt<4>(Op.getConstantOperandVal(5)))
5024 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5025 : SDValue();
5026 case Intrinsic::loongarch_lasx_xvstelm_h:
5027 return (!isShiftedInt<8, 1>(
5028 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5029 !isUInt<4>(Op.getConstantOperandVal(5)))
5031 Op, "argument out of range or not a multiple of 2", DAG)
5032 : SDValue();
5033 case Intrinsic::loongarch_lsx_vstelm_h:
5034 return (!isShiftedInt<8, 1>(
5035 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5036 !isUInt<3>(Op.getConstantOperandVal(5)))
5038 Op, "argument out of range or not a multiple of 2", DAG)
5039 : SDValue();
5040 case Intrinsic::loongarch_lasx_xvstelm_w:
5041 return (!isShiftedInt<8, 2>(
5042 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5043 !isUInt<3>(Op.getConstantOperandVal(5)))
5045 Op, "argument out of range or not a multiple of 4", DAG)
5046 : SDValue();
5047 case Intrinsic::loongarch_lsx_vstelm_w:
5048 return (!isShiftedInt<8, 2>(
5049 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5050 !isUInt<2>(Op.getConstantOperandVal(5)))
5052 Op, "argument out of range or not a multiple of 4", DAG)
5053 : SDValue();
5054 case Intrinsic::loongarch_lasx_xvstelm_d:
5055 return (!isShiftedInt<8, 3>(
5056 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5057 !isUInt<2>(Op.getConstantOperandVal(5)))
5059 Op, "argument out of range or not a multiple of 8", DAG)
5060 : SDValue();
5061 case Intrinsic::loongarch_lsx_vstelm_d:
5062 return (!isShiftedInt<8, 3>(
5063 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5064 !isUInt<1>(Op.getConstantOperandVal(5)))
5066 Op, "argument out of range or not a multiple of 8", DAG)
5067 : SDValue();
5068 }
5069}
5070
5071SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
5072 SelectionDAG &DAG) const {
5073 SDLoc DL(Op);
5074 SDValue Lo = Op.getOperand(0);
5075 SDValue Hi = Op.getOperand(1);
5076 SDValue Shamt = Op.getOperand(2);
5077 EVT VT = Lo.getValueType();
5078
5079 // if Shamt-GRLen < 0: // Shamt < GRLen
5080 // Lo = Lo << Shamt
5081 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
5082 // else:
5083 // Lo = 0
5084 // Hi = Lo << (Shamt-GRLen)
5085
5086 SDValue Zero = DAG.getConstant(0, DL, VT);
5087 SDValue One = DAG.getConstant(1, DL, VT);
5088 SDValue MinusGRLen =
5089 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5090 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5091 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5092 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5093
5094 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
5095 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
5096 SDValue ShiftRightLo =
5097 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
5098 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
5099 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
5100 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
5101
5102 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5103
5104 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
5105 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5106
5107 SDValue Parts[2] = {Lo, Hi};
5108 return DAG.getMergeValues(Parts, DL);
5109}
5110
5111SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
5112 SelectionDAG &DAG,
5113 bool IsSRA) const {
5114 SDLoc DL(Op);
5115 SDValue Lo = Op.getOperand(0);
5116 SDValue Hi = Op.getOperand(1);
5117 SDValue Shamt = Op.getOperand(2);
5118 EVT VT = Lo.getValueType();
5119
5120 // SRA expansion:
5121 // if Shamt-GRLen < 0: // Shamt < GRLen
5122 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5123 // Hi = Hi >>s Shamt
5124 // else:
5125 // Lo = Hi >>s (Shamt-GRLen);
5126 // Hi = Hi >>s (GRLen-1)
5127 //
5128 // SRL expansion:
5129 // if Shamt-GRLen < 0: // Shamt < GRLen
5130 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5131 // Hi = Hi >>u Shamt
5132 // else:
5133 // Lo = Hi >>u (Shamt-GRLen);
5134 // Hi = 0;
5135
5136 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
5137
5138 SDValue Zero = DAG.getConstant(0, DL, VT);
5139 SDValue One = DAG.getConstant(1, DL, VT);
5140 SDValue MinusGRLen =
5141 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5142 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5143 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5144 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5145
5146 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
5147 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
5148 SDValue ShiftLeftHi =
5149 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
5150 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
5151 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
5152 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
5153 SDValue HiFalse =
5154 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
5155
5156 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5157
5158 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
5159 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5160
5161 SDValue Parts[2] = {Lo, Hi};
5162 return DAG.getMergeValues(Parts, DL);
5163}
5164
5165// Returns the opcode of the target-specific SDNode that implements the 32-bit
5166// form of the given Opcode.
5167static unsigned getLoongArchWOpcode(unsigned Opcode) {
5168 switch (Opcode) {
5169 default:
5170 llvm_unreachable("Unexpected opcode");
5171 case ISD::SDIV:
5172 return LoongArchISD::DIV_W;
5173 case ISD::UDIV:
5174 return LoongArchISD::DIV_WU;
5175 case ISD::SREM:
5176 return LoongArchISD::MOD_W;
5177 case ISD::UREM:
5178 return LoongArchISD::MOD_WU;
5179 case ISD::SHL:
5180 return LoongArchISD::SLL_W;
5181 case ISD::SRA:
5182 return LoongArchISD::SRA_W;
5183 case ISD::SRL:
5184 return LoongArchISD::SRL_W;
5185 case ISD::ROTL:
5186 case ISD::ROTR:
5187 return LoongArchISD::ROTR_W;
5188 case ISD::CTTZ:
5189 return LoongArchISD::CTZ_W;
5190 case ISD::CTLZ:
5191 return LoongArchISD::CLZ_W;
5192 }
5193}
5194
5195// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
5196// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
5197// otherwise be promoted to i64, making it difficult to select the
5198// SLL_W/.../*W later one because the fact the operation was originally of
5199// type i8/i16/i32 is lost.
5201 unsigned ExtOpc = ISD::ANY_EXTEND) {
5202 SDLoc DL(N);
5203 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
5204 SDValue NewOp0, NewRes;
5205
5206 switch (NumOp) {
5207 default:
5208 llvm_unreachable("Unexpected NumOp");
5209 case 1: {
5210 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5211 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
5212 break;
5213 }
5214 case 2: {
5215 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5216 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
5217 if (N->getOpcode() == ISD::ROTL) {
5218 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
5219 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
5220 }
5221 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
5222 break;
5223 }
5224 // TODO:Handle more NumOp.
5225 }
5226
5227 // ReplaceNodeResults requires we maintain the same type for the return
5228 // value.
5229 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
5230}
5231
5232// Converts the given 32-bit operation to a i64 operation with signed extension
5233// semantic to reduce the signed extension instructions.
5235 SDLoc DL(N);
5236 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5237 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5238 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
5239 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
5240 DAG.getValueType(MVT::i32));
5241 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
5242}
5243
5244// Helper function that emits error message for intrinsics with/without chain
5245// and return a UNDEF or and the chain as the results.
5248 StringRef ErrorMsg, bool WithChain = true) {
5249 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
5250 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
5251 if (!WithChain)
5252 return;
5253 Results.push_back(N->getOperand(0));
5254}
5255
5256template <unsigned N>
5257static void
5259 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
5260 unsigned ResOp) {
5261 const StringRef ErrorMsgOOR = "argument out of range";
5262 unsigned Imm = Node->getConstantOperandVal(2);
5263 if (!isUInt<N>(Imm)) {
5265 /*WithChain=*/false);
5266 return;
5267 }
5268 SDLoc DL(Node);
5269 SDValue Vec = Node->getOperand(1);
5270
5271 SDValue PickElt =
5272 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
5273 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
5275 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
5276 PickElt.getValue(0)));
5277}
5278
5281 SelectionDAG &DAG,
5282 const LoongArchSubtarget &Subtarget,
5283 unsigned ResOp) {
5284 SDLoc DL(N);
5285 SDValue Vec = N->getOperand(1);
5286
5287 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
5288 Results.push_back(
5289 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
5290}
5291
5292static void
5294 SelectionDAG &DAG,
5295 const LoongArchSubtarget &Subtarget) {
5296 switch (N->getConstantOperandVal(0)) {
5297 default:
5298 llvm_unreachable("Unexpected Intrinsic.");
5299 case Intrinsic::loongarch_lsx_vpickve2gr_b:
5300 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5301 LoongArchISD::VPICK_SEXT_ELT);
5302 break;
5303 case Intrinsic::loongarch_lsx_vpickve2gr_h:
5304 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
5305 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5306 LoongArchISD::VPICK_SEXT_ELT);
5307 break;
5308 case Intrinsic::loongarch_lsx_vpickve2gr_w:
5309 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5310 LoongArchISD::VPICK_SEXT_ELT);
5311 break;
5312 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
5313 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5314 LoongArchISD::VPICK_ZEXT_ELT);
5315 break;
5316 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
5317 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
5318 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5319 LoongArchISD::VPICK_ZEXT_ELT);
5320 break;
5321 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
5322 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5323 LoongArchISD::VPICK_ZEXT_ELT);
5324 break;
5325 case Intrinsic::loongarch_lsx_bz_b:
5326 case Intrinsic::loongarch_lsx_bz_h:
5327 case Intrinsic::loongarch_lsx_bz_w:
5328 case Intrinsic::loongarch_lsx_bz_d:
5329 case Intrinsic::loongarch_lasx_xbz_b:
5330 case Intrinsic::loongarch_lasx_xbz_h:
5331 case Intrinsic::loongarch_lasx_xbz_w:
5332 case Intrinsic::loongarch_lasx_xbz_d:
5333 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5334 LoongArchISD::VALL_ZERO);
5335 break;
5336 case Intrinsic::loongarch_lsx_bz_v:
5337 case Intrinsic::loongarch_lasx_xbz_v:
5338 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5339 LoongArchISD::VANY_ZERO);
5340 break;
5341 case Intrinsic::loongarch_lsx_bnz_b:
5342 case Intrinsic::loongarch_lsx_bnz_h:
5343 case Intrinsic::loongarch_lsx_bnz_w:
5344 case Intrinsic::loongarch_lsx_bnz_d:
5345 case Intrinsic::loongarch_lasx_xbnz_b:
5346 case Intrinsic::loongarch_lasx_xbnz_h:
5347 case Intrinsic::loongarch_lasx_xbnz_w:
5348 case Intrinsic::loongarch_lasx_xbnz_d:
5349 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5350 LoongArchISD::VALL_NONZERO);
5351 break;
5352 case Intrinsic::loongarch_lsx_bnz_v:
5353 case Intrinsic::loongarch_lasx_xbnz_v:
5354 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5355 LoongArchISD::VANY_NONZERO);
5356 break;
5357 }
5358}
5359
5362 SelectionDAG &DAG) {
5363 assert(N->getValueType(0) == MVT::i128 &&
5364 "AtomicCmpSwap on types less than 128 should be legal");
5365 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
5366
5367 unsigned Opcode;
5368 switch (MemOp->getMergedOrdering()) {
5372 Opcode = LoongArch::PseudoCmpXchg128Acquire;
5373 break;
5376 Opcode = LoongArch::PseudoCmpXchg128;
5377 break;
5378 default:
5379 llvm_unreachable("Unexpected ordering!");
5380 }
5381
5382 SDLoc DL(N);
5383 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
5384 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
5385 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
5386 NewVal.first, NewVal.second, N->getOperand(0)};
5387
5388 SDNode *CmpSwap = DAG.getMachineNode(
5389 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
5390 Ops);
5391 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
5392 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
5393 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
5394 Results.push_back(SDValue(CmpSwap, 3));
5395}
5396
5399 SDLoc DL(N);
5400 EVT VT = N->getValueType(0);
5401 switch (N->getOpcode()) {
5402 default:
5403 llvm_unreachable("Don't know how to legalize this operation");
5404 case ISD::ADD:
5405 case ISD::SUB:
5406 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5407 "Unexpected custom legalisation");
5408 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
5409 break;
5410 case ISD::SDIV:
5411 case ISD::UDIV:
5412 case ISD::SREM:
5413 case ISD::UREM:
5414 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5415 "Unexpected custom legalisation");
5416 Results.push_back(customLegalizeToWOp(N, DAG, 2,
5417 Subtarget.hasDiv32() && VT == MVT::i32
5419 : ISD::SIGN_EXTEND));
5420 break;
5421 case ISD::SHL:
5422 case ISD::SRA:
5423 case ISD::SRL:
5424 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5425 "Unexpected custom legalisation");
5426 if (N->getOperand(1).getOpcode() != ISD::Constant) {
5427 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5428 break;
5429 }
5430 break;
5431 case ISD::ROTL:
5432 case ISD::ROTR:
5433 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5434 "Unexpected custom legalisation");
5435 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5436 break;
5437 case ISD::LOAD: {
5438 // Use an f64 load and a scalar_to_vector for v2f32 loads. This avoids
5439 // scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
5440 // cast since type legalization will try to use an i64 load.
5441 MVT VT = N->getSimpleValueType(0);
5442 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5443 "Unexpected custom legalisation");
5445 "Unexpected type action!");
5446 if (!ISD::isNON_EXTLoad(N))
5447 return;
5448 auto *Ld = cast<LoadSDNode>(N);
5449 SDValue Res = DAG.getLoad(MVT::f64, DL, Ld->getChain(), Ld->getBasePtr(),
5450 Ld->getPointerInfo(), Ld->getBaseAlign(),
5451 Ld->getMemOperand()->getFlags());
5452 SDValue Chain = Res.getValue(1);
5453 MVT VecVT = MVT::getVectorVT(MVT::f64, 2);
5454 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Res);
5455 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
5456 Res = DAG.getBitcast(WideVT, Res);
5457 Results.push_back(Res);
5458 Results.push_back(Chain);
5459 break;
5460 }
5461 case ISD::FP_TO_SINT: {
5462 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5463 "Unexpected custom legalisation");
5464 SDValue Src = N->getOperand(0);
5465 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
5466 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
5468 if (!isTypeLegal(Src.getValueType()))
5469 return;
5470 if (Src.getValueType() == MVT::f16)
5471 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
5472 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
5473 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
5474 return;
5475 }
5476 // If the FP type needs to be softened, emit a library call using the 'si'
5477 // version. If we left it to default legalization we'd end up with 'di'.
5478 RTLIB::Libcall LC;
5479 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
5480 MakeLibCallOptions CallOptions;
5481 EVT OpVT = Src.getValueType();
5482 CallOptions.setTypeListBeforeSoften(OpVT, VT);
5483 SDValue Chain = SDValue();
5484 SDValue Result;
5485 std::tie(Result, Chain) =
5486 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
5487 Results.push_back(Result);
5488 break;
5489 }
5490 case ISD::BITCAST: {
5491 SDValue Src = N->getOperand(0);
5492 EVT SrcVT = Src.getValueType();
5493 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5494 Subtarget.hasBasicF()) {
5495 SDValue Dst =
5496 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
5497 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
5498 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5499 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
5500 DAG.getVTList(MVT::i32, MVT::i32), Src);
5501 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5502 NewReg.getValue(0), NewReg.getValue(1));
5503 Results.push_back(RetReg);
5504 }
5505 break;
5506 }
5507 case ISD::FP_TO_UINT: {
5508 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5509 "Unexpected custom legalisation");
5510 auto &TLI = DAG.getTargetLoweringInfo();
5511 SDValue Tmp1, Tmp2;
5512 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
5513 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
5514 break;
5515 }
5516 case ISD::FP_ROUND: {
5517 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5518 "Unexpected custom legalisation");
5519 // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
5520 // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
5521 // a target-specific LoongArchISD::VFCVT to optimize it.
5522 SDValue Op0 = N->getOperand(0);
5523 EVT OpVT = Op0.getValueType();
5524 if (OpVT == MVT::v2f64) {
5525 SDValue Undef = DAG.getUNDEF(OpVT);
5526 SDValue Dst =
5527 DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Op0);
5528 Results.push_back(Dst);
5529 }
5530 break;
5531 }
5532 case ISD::BSWAP: {
5533 SDValue Src = N->getOperand(0);
5534 assert((VT == MVT::i16 || VT == MVT::i32) &&
5535 "Unexpected custom legalization");
5536 MVT GRLenVT = Subtarget.getGRLenVT();
5537 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5538 SDValue Tmp;
5539 switch (VT.getSizeInBits()) {
5540 default:
5541 llvm_unreachable("Unexpected operand width");
5542 case 16:
5543 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
5544 break;
5545 case 32:
5546 // Only LA64 will get to here due to the size mismatch between VT and
5547 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5548 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
5549 break;
5550 }
5551 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5552 break;
5553 }
5554 case ISD::BITREVERSE: {
5555 SDValue Src = N->getOperand(0);
5556 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5557 "Unexpected custom legalization");
5558 MVT GRLenVT = Subtarget.getGRLenVT();
5559 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5560 SDValue Tmp;
5561 switch (VT.getSizeInBits()) {
5562 default:
5563 llvm_unreachable("Unexpected operand width");
5564 case 8:
5565 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
5566 break;
5567 case 32:
5568 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
5569 break;
5570 }
5571 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5572 break;
5573 }
5574 case ISD::CTLZ:
5575 case ISD::CTTZ: {
5576 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5577 "Unexpected custom legalisation");
5578 Results.push_back(customLegalizeToWOp(N, DAG, 1));
5579 break;
5580 }
5582 SDValue Chain = N->getOperand(0);
5583 SDValue Op2 = N->getOperand(2);
5584 MVT GRLenVT = Subtarget.getGRLenVT();
5585 const StringRef ErrorMsgOOR = "argument out of range";
5586 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5587 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5588
5589 switch (N->getConstantOperandVal(1)) {
5590 default:
5591 llvm_unreachable("Unexpected Intrinsic.");
5592 case Intrinsic::loongarch_movfcsr2gr: {
5593 if (!Subtarget.hasBasicF()) {
5594 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5595 return;
5596 }
5597 unsigned Imm = Op2->getAsZExtVal();
5598 if (!isUInt<2>(Imm)) {
5599 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5600 return;
5601 }
5602 SDValue MOVFCSR2GRResults = DAG.getNode(
5603 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5604 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5605 Results.push_back(
5606 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5607 Results.push_back(MOVFCSR2GRResults.getValue(1));
5608 break;
5609 }
5610#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5611 case Intrinsic::loongarch_##NAME: { \
5612 SDValue NODE = DAG.getNode( \
5613 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5614 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5615 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5616 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5617 Results.push_back(NODE.getValue(1)); \
5618 break; \
5619 }
5620 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5621 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5622 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5623 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5624 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5625 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5626#undef CRC_CASE_EXT_BINARYOP
5627
5628#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5629 case Intrinsic::loongarch_##NAME: { \
5630 SDValue NODE = DAG.getNode( \
5631 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5632 {Chain, Op2, \
5633 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5634 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5635 Results.push_back(NODE.getValue(1)); \
5636 break; \
5637 }
5638 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5639 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5640#undef CRC_CASE_EXT_UNARYOP
5641#define CSR_CASE(ID) \
5642 case Intrinsic::loongarch_##ID: { \
5643 if (!Subtarget.is64Bit()) \
5644 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5645 break; \
5646 }
5647 CSR_CASE(csrrd_d);
5648 CSR_CASE(csrwr_d);
5649 CSR_CASE(csrxchg_d);
5650 CSR_CASE(iocsrrd_d);
5651#undef CSR_CASE
5652 case Intrinsic::loongarch_csrrd_w: {
5653 unsigned Imm = Op2->getAsZExtVal();
5654 if (!isUInt<14>(Imm)) {
5655 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5656 return;
5657 }
5658 SDValue CSRRDResults =
5659 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5660 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5661 Results.push_back(
5662 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5663 Results.push_back(CSRRDResults.getValue(1));
5664 break;
5665 }
5666 case Intrinsic::loongarch_csrwr_w: {
5667 unsigned Imm = N->getConstantOperandVal(3);
5668 if (!isUInt<14>(Imm)) {
5669 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5670 return;
5671 }
5672 SDValue CSRWRResults =
5673 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5674 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5675 DAG.getConstant(Imm, DL, GRLenVT)});
5676 Results.push_back(
5677 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5678 Results.push_back(CSRWRResults.getValue(1));
5679 break;
5680 }
5681 case Intrinsic::loongarch_csrxchg_w: {
5682 unsigned Imm = N->getConstantOperandVal(4);
5683 if (!isUInt<14>(Imm)) {
5684 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5685 return;
5686 }
5687 SDValue CSRXCHGResults = DAG.getNode(
5688 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5689 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5690 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5691 DAG.getConstant(Imm, DL, GRLenVT)});
5692 Results.push_back(
5693 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5694 Results.push_back(CSRXCHGResults.getValue(1));
5695 break;
5696 }
5697#define IOCSRRD_CASE(NAME, NODE) \
5698 case Intrinsic::loongarch_##NAME: { \
5699 SDValue IOCSRRDResults = \
5700 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5701 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5702 Results.push_back( \
5703 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5704 Results.push_back(IOCSRRDResults.getValue(1)); \
5705 break; \
5706 }
5707 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5708 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5709 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5710#undef IOCSRRD_CASE
5711 case Intrinsic::loongarch_cpucfg: {
5712 SDValue CPUCFGResults =
5713 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5714 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5715 Results.push_back(
5716 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5717 Results.push_back(CPUCFGResults.getValue(1));
5718 break;
5719 }
5720 case Intrinsic::loongarch_lddir_d: {
5721 if (!Subtarget.is64Bit()) {
5722 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5723 return;
5724 }
5725 break;
5726 }
5727 }
5728 break;
5729 }
5730 case ISD::READ_REGISTER: {
5731 if (Subtarget.is64Bit())
5732 DAG.getContext()->emitError(
5733 "On LA64, only 64-bit registers can be read.");
5734 else
5735 DAG.getContext()->emitError(
5736 "On LA32, only 32-bit registers can be read.");
5737 Results.push_back(DAG.getUNDEF(VT));
5738 Results.push_back(N->getOperand(0));
5739 break;
5740 }
5742 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5743 break;
5744 }
5745 case ISD::LROUND: {
5746 SDValue Op0 = N->getOperand(0);
5747 EVT OpVT = Op0.getValueType();
5748 RTLIB::Libcall LC =
5749 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5750 MakeLibCallOptions CallOptions;
5751 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5752 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5753 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5754 Results.push_back(Result);
5755 break;
5756 }
5757 case ISD::ATOMIC_CMP_SWAP: {
5759 break;
5760 }
5761 case ISD::TRUNCATE: {
5762 MVT VT = N->getSimpleValueType(0);
5763 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5764 return;
5765
5766 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5767 SDValue In = N->getOperand(0);
5768 EVT InVT = In.getValueType();
5769 EVT InEltVT = InVT.getVectorElementType();
5770 EVT EltVT = VT.getVectorElementType();
5771 unsigned MinElts = VT.getVectorNumElements();
5772 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5773 unsigned InBits = InVT.getSizeInBits();
5774
5775 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5776 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5777 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5778 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5779 for (unsigned I = 0; I < MinElts; ++I)
5780 TruncMask[I] = Scale * I;
5781
5782 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5783 MVT SVT = In.getSimpleValueType().getScalarType();
5784 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5785 SDValue WidenIn =
5786 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5787 DAG.getVectorIdxConstant(0, DL));
5788 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5789 "Illegal vector type in truncation");
5790 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5791 Results.push_back(
5792 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5793 return;
5794 }
5795 }
5796
5797 break;
5798 }
5799 case ISD::SIGN_EXTEND: {
5800 // LASX has native VEXT2XV_* for sign extension.
5801 if (!Subtarget.hasExtLSX() || Subtarget.hasExtLASX())
5802 return;
5803
5804 EVT DstVT = N->getValueType(0);
5805 SDValue Src = N->getOperand(0);
5806 MVT SrcVT = Src.getSimpleValueType();
5807
5808 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5809 unsigned DstEltBits = DstVT.getScalarSizeInBits();
5810 unsigned NumElts = DstVT.getVectorNumElements();
5811
5812 if (SrcVT.getSizeInBits() > 128)
5813 return;
5814
5815 if (!DstVT.isVector() || DstVT.getSizeInBits() <= 128)
5816 return;
5817
5818 // Legalize and extend the src to 128-bit first.
5819 if (SrcVT.getSizeInBits() < 128) {
5820 unsigned WidenSrcElts = 128 / SrcEltBits;
5821 MVT WidenSrcVT = MVT::getVectorVT(SrcVT.getScalarType(), WidenSrcElts);
5822 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WidenSrcVT,
5823 DAG.getUNDEF(WidenSrcVT), Src,
5824 DAG.getVectorIdxConstant(0, DL));
5825 SrcVT = WidenSrcVT;
5826
5827 unsigned FirstStageEltBits = 128 / NumElts;
5828 MVT FirstStageEltVT = MVT::getIntegerVT(FirstStageEltBits);
5829 MVT FirstStageVT = MVT::getVectorVT(FirstStageEltVT, NumElts);
5830 Src = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, FirstStageVT, Src);
5831 SrcVT = FirstStageVT;
5832 SrcEltBits = FirstStageEltBits;
5833 }
5834
5836 Blocks.push_back(Src);
5837
5838 // Sign-extend the src by using SLTI + VILVL + VILVH recursively.
5839 while (SrcEltBits < DstEltBits) {
5840 unsigned NextEltBits = SrcEltBits * 2;
5841 MVT NextEltVT = MVT::getIntegerVT(NextEltBits);
5842 unsigned CurEltsPerBlock = SrcVT.getVectorNumElements();
5843 unsigned NextEltsPerBlock = CurEltsPerBlock / 2;
5844 MVT NextBlockVT = MVT::getVectorVT(NextEltVT, NextEltsPerBlock);
5845
5846 SmallVector<SDValue, 8> NextBlocks;
5847 NextBlocks.reserve(Blocks.size() * 2);
5848 for (SDValue Block : Blocks) {
5849 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
5850 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Block, Zero,
5851 DAG.getCondCode(ISD::SETLT));
5852 SDValue LoInterleaved =
5853 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Block);
5854 SDValue HiInterleaved =
5855 DAG.getNode(LoongArchISD::VILVH, DL, SrcVT, Mask, Block);
5856
5857 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, LoInterleaved));
5858 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, HiInterleaved));
5859 }
5860
5861 Blocks = std::move(NextBlocks);
5862 SrcVT = NextBlockVT;
5863 SrcEltBits = NextEltBits;
5864 }
5865
5866 Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Blocks));
5867 break;
5868 }
5869 case ISD::FP_EXTEND:
5870 // FP_EXTEND may reach here due to the Custom action for v2f32 results, but
5871 // no target-specific lowering is required. Leave it unchanged and rely on
5872 // the default type legalization.
5873 break;
5874 }
5875}
5876
5877/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5879 SelectionDAG &DAG) {
5880 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5881
5882 MVT VT = N->getSimpleValueType(0);
5883 if (!VT.is128BitVector() && !VT.is256BitVector())
5884 return SDValue();
5885
5886 SDValue X, Y;
5887 SDValue N0 = N->getOperand(0);
5888 SDValue N1 = N->getOperand(1);
5889
5890 if (SDValue Not = isNOT(N0, DAG)) {
5891 X = Not;
5892 Y = N1;
5893 } else if (SDValue Not = isNOT(N1, DAG)) {
5894 X = Not;
5895 Y = N0;
5896 } else
5897 return SDValue();
5898
5899 X = DAG.getBitcast(VT, X);
5900 Y = DAG.getBitcast(VT, Y);
5901 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5902}
5903
5904static bool isConstantSplatVector(SDValue N, APInt &SplatValue,
5905 unsigned MinSizeInBits) {
5908
5909 if (!Node)
5910 return false;
5911
5912 APInt SplatUndef;
5913 unsigned SplatBitSize;
5914 bool HasAnyUndefs;
5915
5916 return Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
5917 HasAnyUndefs, MinSizeInBits,
5918 /*IsBigEndian=*/false);
5919}
5920
5921static SDValue matchDeinterleaveBuildVector(SDValue N, unsigned &StartIndex) {
5922 auto *BV = dyn_cast<BuildVectorSDNode>(N);
5923 if (!BV)
5924 return SDValue();
5925
5926 SDValue Src;
5927 int Start = -1;
5928
5929 for (unsigned i = 0, NumElts = BV->getNumOperands(); i < NumElts; ++i) {
5930 SDValue Op = BV->getOperand(i);
5931 if (Op.isUndef())
5932 continue;
5933 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
5934 return SDValue();
5935
5936 auto *IdxC = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5937 if (!IdxC)
5938 return SDValue();
5939
5940 unsigned EltIdx = IdxC->getZExtValue();
5941 if (Start < 0)
5942 Start = (int)EltIdx - (int)(i * 2);
5943 if (Start < 0 || Start > 1 || EltIdx != (unsigned)(Start + (int)(i * 2)))
5944 return SDValue();
5945
5946 SDValue CurSrc = Op.getOperand(0);
5947 if (!Src)
5948 Src = CurSrc;
5949 else if (Src != CurSrc)
5950 return SDValue();
5951 }
5952
5953 if (!Src || Start < 0)
5954 return SDValue();
5955
5956 StartIndex = (unsigned)Start;
5957 return Src;
5958}
5959
5960static SDValue
5962 const LoongArchSubtarget &Subtarget) {
5963 if (!Subtarget.hasExtLSX())
5964 return SDValue();
5965
5966 unsigned Opc = N->getOpcode();
5967 assert((Opc == ISD::ADD || Opc == ISD::SUB) && "Unexpected opcode");
5968
5969 EVT VT = N->getValueType(0);
5970 SDLoc DL(N);
5971
5972 SDValue LHS = N->getOperand(0);
5973 SDValue RHS = N->getOperand(1);
5974
5975 bool isSigned;
5976 unsigned ExtOpc = LHS.getOpcode();
5977 if (ExtOpc == ISD::SIGN_EXTEND)
5978 isSigned = true;
5979 else if (ExtOpc == ISD::ZERO_EXTEND)
5980 isSigned = false;
5981 else
5982 return SDValue();
5983
5984 if (ExtOpc != RHS.getOpcode())
5985 return SDValue();
5986
5987 if (!LHS.hasOneUse() || !RHS.hasOneUse())
5988 return SDValue();
5989
5990 unsigned OddIdx, EvenIdx;
5991 SDValue LHSVec = matchDeinterleaveBuildVector(LHS.getOperand(0), OddIdx);
5992 SDValue RHSVec = matchDeinterleaveBuildVector(RHS.getOperand(0), EvenIdx);
5993
5994 if (!LHSVec || !RHSVec)
5995 return SDValue();
5996 if (OddIdx != 1 || EvenIdx != 0)
5997 return SDValue();
5998 if (LHSVec.getValueType() != RHSVec.getValueType())
5999 return SDValue();
6000
6001 EVT SrcVT = LHSVec.getValueType();
6002 EVT SrcEltVT = SrcVT.getVectorElementType();
6003 EVT DstEltVT = VT.getVectorElementType();
6004 auto &TLI = DAG.getTargetLoweringInfo();
6005
6006 if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
6007 return SDValue();
6008 if (!SrcVT.isVector() || !VT.isVector())
6009 return SDValue();
6010 if (SrcVT.getSizeInBits() != VT.getSizeInBits())
6011 return SDValue();
6012 if (DstEltVT.getSizeInBits() != SrcEltVT.getSizeInBits() * 2)
6013 return SDValue();
6014 if (!SrcEltVT.isInteger() || SrcEltVT.getSizeInBits() > 32)
6015 return SDValue();
6016
6017 unsigned TargetOpc;
6018 if (Opc == ISD::ADD)
6019 TargetOpc = isSigned ? LoongArchISD::VHADDW : LoongArchISD::VHADDW_U;
6020 else
6021 TargetOpc = isSigned ? LoongArchISD::VHSUBW : LoongArchISD::VHSUBW_U;
6022
6023 return DAG.getNode(TargetOpc, DL, VT, LHSVec, RHSVec);
6024}
6025
6028 const LoongArchSubtarget &Subtarget) {
6029 if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget))
6030 return V;
6031
6032 if (DCI.isBeforeLegalizeOps())
6033 return SDValue();
6034
6035 EVT VT = N->getValueType(0);
6036 if (!VT.isVector())
6037 return SDValue();
6038
6039 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
6040 return SDValue();
6041
6042 EVT EltVT = VT.getVectorElementType();
6043 if (!EltVT.isInteger())
6044 return SDValue();
6045
6046 // match:
6047 //
6048 // add
6049 // (and
6050 // (srl X, shift-1) / X
6051 // 1)
6052 // (srl/sra X, shift)
6053
6054 SDValue Add0 = N->getOperand(0);
6055 SDValue Add1 = N->getOperand(1);
6056 SDValue And;
6057 SDValue Shr;
6058
6059 if (Add0.getOpcode() == ISD::AND) {
6060 And = Add0;
6061 Shr = Add1;
6062 } else if (Add1.getOpcode() == ISD::AND) {
6063 And = Add1;
6064 Shr = Add0;
6065 } else {
6066 return SDValue();
6067 }
6068
6069 // match:
6070 //
6071 // srl/sra X, shift
6072
6073 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
6074 return SDValue();
6075
6076 SDValue X = Shr.getOperand(0);
6077 SDValue Shift = Shr.getOperand(1);
6078 APInt ShiftVal;
6079
6080 if (!isConstantSplatVector(Shift, ShiftVal, EltVT.getSizeInBits()))
6081 return SDValue();
6082
6083 if (ShiftVal == 0)
6084 return SDValue();
6085
6086 // match:
6087 //
6088 // and
6089 // (srl X, shift-1) / X
6090 // 1
6091
6092 SDValue One = And.getOperand(1);
6093 APInt SplatVal;
6094
6095 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
6096 return SDValue();
6097
6098 if (SplatVal != 1)
6099 return SDValue();
6100
6101 if (And.getOperand(0) == X) {
6102 // match:
6103 //
6104 // shift == 1
6105
6106 if (ShiftVal != 1)
6107 return SDValue();
6108 } else {
6109 // match:
6110 //
6111 // srl X, shift-1
6112
6113 SDValue Srl = And.getOperand(0);
6114
6115 if (Srl.getOpcode() != ISD::SRL)
6116 return SDValue();
6117
6118 if (Srl.getOperand(0) != X)
6119 return SDValue();
6120
6121 // match:
6122 //
6123 // shift-1
6124
6125 SDValue ShiftMinus1 = Srl.getOperand(1);
6126
6127 if (!isConstantSplatVector(ShiftMinus1, SplatVal, EltVT.getSizeInBits()))
6128 return SDValue();
6129
6130 if (ShiftVal != (SplatVal + 1))
6131 return SDValue();
6132 }
6133
6134 // We matched a rounded right shift pattern and can lower it
6135 // to a single vector rounded shift instruction.
6136
6137 SDLoc DL(N);
6138 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
6139 : LoongArchISD::VSRAR,
6140 DL, VT, X, Shift);
6141}
6142
6145 const LoongArchSubtarget &Subtarget) {
6146 if (DCI.isBeforeLegalizeOps())
6147 return SDValue();
6148
6149 SDValue FirstOperand = N->getOperand(0);
6150 SDValue SecondOperand = N->getOperand(1);
6151 unsigned FirstOperandOpc = FirstOperand.getOpcode();
6152 EVT ValTy = N->getValueType(0);
6153 SDLoc DL(N);
6154 uint64_t lsb, msb;
6155 unsigned SMIdx, SMLen;
6156 ConstantSDNode *CN;
6157 SDValue NewOperand;
6158 MVT GRLenVT = Subtarget.getGRLenVT();
6159
6160 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
6161 return R;
6162
6163 // BSTRPICK requires the 32S feature.
6164 if (!Subtarget.has32S())
6165 return SDValue();
6166
6167 // Op's second operand must be a shifted mask.
6168 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
6169 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
6170 return SDValue();
6171
6172 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
6173 // Pattern match BSTRPICK.
6174 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
6175 // => BSTRPICK $dst, $src, msb, lsb
6176 // where msb = lsb + len - 1
6177
6178 // The second operand of the shift must be an immediate.
6179 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
6180 return SDValue();
6181
6182 lsb = CN->getZExtValue();
6183
6184 // Return if the shifted mask does not start at bit 0 or the sum of its
6185 // length and lsb exceeds the word's size.
6186 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
6187 return SDValue();
6188
6189 NewOperand = FirstOperand.getOperand(0);
6190 } else {
6191 // Pattern match BSTRPICK.
6192 // $dst = and $src, (2**len- 1) , if len > 12
6193 // => BSTRPICK $dst, $src, msb, lsb
6194 // where lsb = 0 and msb = len - 1
6195
6196 // If the mask is <= 0xfff, andi can be used instead.
6197 if (CN->getZExtValue() <= 0xfff)
6198 return SDValue();
6199
6200 // Return if the MSB exceeds.
6201 if (SMIdx + SMLen > ValTy.getSizeInBits())
6202 return SDValue();
6203
6204 if (SMIdx > 0) {
6205 // Omit if the constant has more than 2 uses. This a conservative
6206 // decision. Whether it is a win depends on the HW microarchitecture.
6207 // However it should always be better for 1 and 2 uses.
6208 if (CN->use_size() > 2)
6209 return SDValue();
6210 // Return if the constant can be composed by a single LU12I.W.
6211 if ((CN->getZExtValue() & 0xfff) == 0)
6212 return SDValue();
6213 // Return if the constand can be composed by a single ADDI with
6214 // the zero register.
6215 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
6216 return SDValue();
6217 }
6218
6219 lsb = SMIdx;
6220 NewOperand = FirstOperand;
6221 }
6222
6223 msb = lsb + SMLen - 1;
6224 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
6225 DAG.getConstant(msb, DL, GRLenVT),
6226 DAG.getConstant(lsb, DL, GRLenVT));
6227 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
6228 return NR0;
6229 // Try to optimize to
6230 // bstrpick $Rd, $Rs, msb, lsb
6231 // slli $Rd, $Rd, lsb
6232 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
6233 DAG.getConstant(lsb, DL, GRLenVT));
6234}
6235
6236// Return the original source vector if N consists of the low half
6237// of each 128-bit lane.
6240
6241 EVT DstVT = N.getValueType();
6242 if (!DstVT.isVector())
6243 return SDValue();
6244
6245 // LSX canonical form:
6246 if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
6247 SDValue Src = N.getOperand(0);
6248 EVT SrcVT = Src.getValueType();
6249
6250 if (!SrcVT.isVector() || !SrcVT.is128BitVector())
6251 return SDValue();
6252 if (N.getConstantOperandVal(1) != 0)
6253 return SDValue();
6254 if (SrcVT.getSizeInBits() != DstVT.getSizeInBits() * 2)
6255 return SDValue();
6256 if (SrcVT.getVectorNumElements() != DstVT.getVectorNumElements() * 2)
6257 return SDValue();
6258
6259 return Src;
6260 }
6261
6262 // LASX canonical form:
6263 auto *BV = dyn_cast<BuildVectorSDNode>(N);
6264 if (!BV)
6265 return SDValue();
6266
6267 unsigned NumElts = DstVT.getVectorNumElements();
6268 if (NumElts % 2 != 0)
6269 return SDValue();
6270
6271 SDValue Src;
6272 EVT SrcVT;
6273
6274 for (unsigned I = 0; I != NumElts; ++I) {
6275 SDValue Elt = BV->getOperand(I);
6277 return SDValue();
6278
6279 SDValue ThisSrc = Elt.getOperand(0);
6280 SDValue Idx = Elt.getOperand(1);
6281 auto *CI = dyn_cast<ConstantSDNode>(Idx);
6282 if (!CI)
6283 return SDValue();
6284
6285 if (!Src) {
6286 Src = ThisSrc;
6287 SrcVT = Src.getValueType();
6288 if (!SrcVT.isVector())
6289 return SDValue();
6290
6291 if (SrcVT.getSizeInBits() != DstVT.getSizeInBits() * 2)
6292 return SDValue();
6293 if (SrcVT.getVectorNumElements() != NumElts * 2)
6294 return SDValue();
6295 if (!SrcVT.is256BitVector())
6296 return SDValue();
6297 } else if (ThisSrc != Src) {
6298 return SDValue();
6299 }
6300
6301 unsigned Half = NumElts / 2;
6302 unsigned ExpectedIdx = (I < Half) ? I : (I + Half);
6303 if (CI->getZExtValue() != ExpectedIdx)
6304 return SDValue();
6305 }
6306
6307 return Src;
6308}
6309
6312 const LoongArchSubtarget &Subtarget) {
6313 if (!Subtarget.hasExtLSX())
6314 return SDValue();
6315
6316 assert(N->getOpcode() == ISD::SHL && "Unexpected opcode");
6317
6318 EVT VT = N->getValueType(0);
6319 SDLoc DL(N);
6320
6321 SDValue LHS = N->getOperand(0);
6322 SDValue RHS = N->getOperand(1);
6323
6324 bool isSigned;
6325 unsigned ExtOpc = LHS.getOpcode();
6326 if (ExtOpc == ISD::SIGN_EXTEND)
6327 isSigned = true;
6328 else if (ExtOpc == ISD::ZERO_EXTEND)
6329 isSigned = false;
6330 else
6331 return SDValue();
6332
6333 if (!LHS.hasOneUse())
6334 return SDValue();
6335
6336 SDValue Vec = matchLowHalfOf128BitLanes(LHS.getOperand(0));
6337 if (!Vec)
6338 return SDValue();
6339
6340 EVT SrcVT = Vec.getValueType();
6341 EVT SrcEltVT = SrcVT.getVectorElementType();
6342 EVT DstEltVT = VT.getVectorElementType();
6343
6344 if (!SrcVT.isVector() || !VT.isVector())
6345 return SDValue();
6346 if (SrcVT.getSizeInBits() != VT.getSizeInBits())
6347 return SDValue();
6348 if (DstEltVT.getSizeInBits() != SrcEltVT.getSizeInBits() * 2)
6349 return SDValue();
6350 if (!SrcEltVT.isInteger() || SrcEltVT.getSizeInBits() > 32)
6351 return SDValue();
6352
6353 APInt Imm;
6354 if (!isConstantSplatVector(RHS, Imm, DstEltVT.getSizeInBits()))
6355 return SDValue();
6356 if (!Imm.ult(SrcEltVT.getSizeInBits()))
6357 return SDValue();
6358
6359 unsigned Opc = isSigned ? LoongArchISD::VSLLWIL : LoongArchISD::VSLLWIL_U;
6360 SDValue Sht = DAG.getConstant(Imm.getZExtValue(), DL, Subtarget.getGRLenVT());
6361 return DAG.getNode(Opc, DL, VT, Vec, Sht);
6362}
6363
6366 const LoongArchSubtarget &Subtarget) {
6367 // BSTRPICK requires the 32S feature.
6368 if (!Subtarget.has32S())
6369 return SDValue();
6370
6371 if (DCI.isBeforeLegalizeOps())
6372 return SDValue();
6373
6374 // $dst = srl (and $src, Mask), Shamt
6375 // =>
6376 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
6377 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
6378 //
6379
6380 SDValue FirstOperand = N->getOperand(0);
6381 ConstantSDNode *CN;
6382 EVT ValTy = N->getValueType(0);
6383 SDLoc DL(N);
6384 MVT GRLenVT = Subtarget.getGRLenVT();
6385 unsigned MaskIdx, MaskLen;
6386 uint64_t Shamt;
6387
6388 // The first operand must be an AND and the second operand of the AND must be
6389 // a shifted mask.
6390 if (FirstOperand.getOpcode() != ISD::AND ||
6391 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
6392 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
6393 return SDValue();
6394
6395 // The second operand (shift amount) must be an immediate.
6396 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
6397 return SDValue();
6398
6399 Shamt = CN->getZExtValue();
6400 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
6401 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
6402 FirstOperand->getOperand(0),
6403 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6404 DAG.getConstant(Shamt, DL, GRLenVT));
6405
6406 return SDValue();
6407}
6408
6411 const LoongArchSubtarget &Subtarget) {
6412 if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget))
6413 return V;
6414
6415 return SDValue();
6416}
6417
6418// Helper to peek through bitops/trunc/setcc to determine size of source vector.
6419// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
6420static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
6421 unsigned Depth) {
6422 // Limit recursion.
6424 return false;
6425 switch (Src.getOpcode()) {
6426 case ISD::SETCC:
6427 case ISD::TRUNCATE:
6428 return Src.getOperand(0).getValueSizeInBits() == Size;
6429 case ISD::FREEZE:
6430 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
6431 case ISD::AND:
6432 case ISD::XOR:
6433 case ISD::OR:
6434 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
6435 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
6436 case ISD::SELECT:
6437 case ISD::VSELECT:
6438 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
6439 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
6440 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
6441 case ISD::BUILD_VECTOR:
6442 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
6443 ISD::isBuildVectorAllOnes(Src.getNode());
6444 }
6445 return false;
6446}
6447
6448// Helper to push sign extension of vXi1 SETCC result through bitops.
6450 SDValue Src, const SDLoc &DL) {
6451 switch (Src.getOpcode()) {
6452 case ISD::SETCC:
6453 case ISD::FREEZE:
6454 case ISD::TRUNCATE:
6455 case ISD::BUILD_VECTOR:
6456 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6457 case ISD::AND:
6458 case ISD::XOR:
6459 case ISD::OR:
6460 return DAG.getNode(
6461 Src.getOpcode(), DL, SExtVT,
6462 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
6463 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
6464 case ISD::SELECT:
6465 case ISD::VSELECT:
6466 return DAG.getSelect(
6467 DL, SExtVT, Src.getOperand(0),
6468 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
6469 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
6470 }
6471 llvm_unreachable("Unexpected node type for vXi1 sign extension");
6472}
6473
6474static SDValue
6477 const LoongArchSubtarget &Subtarget) {
6478 SDLoc DL(N);
6479 EVT VT = N->getValueType(0);
6480 SDValue Src = N->getOperand(0);
6481 EVT SrcVT = Src.getValueType();
6482
6483 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
6484 return SDValue();
6485
6486 bool UseLASX;
6487 unsigned Opc = ISD::DELETED_NODE;
6488 EVT CmpVT = Src.getOperand(0).getValueType();
6489 EVT EltVT = CmpVT.getVectorElementType();
6490
6491 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
6492 UseLASX = false;
6493 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
6494 CmpVT.getSizeInBits() == 256)
6495 UseLASX = true;
6496 else
6497 return SDValue();
6498
6499 SDValue SrcN1 = Src.getOperand(1);
6500 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
6501 default:
6502 break;
6503 case ISD::SETEQ:
6504 // x == 0 => not (vmsknez.b x)
6505 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6506 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
6507 break;
6508 case ISD::SETGT:
6509 // x > -1 => vmskgez.b x
6510 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
6511 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6512 break;
6513 case ISD::SETGE:
6514 // x >= 0 => vmskgez.b x
6515 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6516 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6517 break;
6518 case ISD::SETLT:
6519 // x < 0 => vmskltz.{b,h,w,d} x
6520 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
6521 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6522 EltVT == MVT::i64))
6523 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6524 break;
6525 case ISD::SETLE:
6526 // x <= -1 => vmskltz.{b,h,w,d} x
6527 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
6528 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6529 EltVT == MVT::i64))
6530 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6531 break;
6532 case ISD::SETNE:
6533 // x != 0 => vmsknez.b x
6534 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6535 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
6536 break;
6537 }
6538
6539 if (Opc == ISD::DELETED_NODE)
6540 return SDValue();
6541
6542 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
6544 V = DAG.getZExtOrTrunc(V, DL, T);
6545 return DAG.getBitcast(VT, V);
6546}
6547
6550 const LoongArchSubtarget &Subtarget) {
6551 SDLoc DL(N);
6552 EVT VT = N->getValueType(0);
6553 SDValue Src = N->getOperand(0);
6554 EVT SrcVT = Src.getValueType();
6555 MVT GRLenVT = Subtarget.getGRLenVT();
6556
6557 if (!DCI.isBeforeLegalizeOps())
6558 return SDValue();
6559
6560 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
6561 return SDValue();
6562
6563 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
6564 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
6565 if (Res)
6566 return Res;
6567
6568 // Generate vXi1 using [X]VMSKLTZ
6569 MVT SExtVT;
6570 unsigned Opc;
6571 bool UseLASX = false;
6572 bool PropagateSExt = false;
6573
6574 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
6575 EVT CmpVT = Src.getOperand(0).getValueType();
6576 if (CmpVT.getSizeInBits() > 256)
6577 return SDValue();
6578 }
6579
6580 switch (SrcVT.getSimpleVT().SimpleTy) {
6581 default:
6582 return SDValue();
6583 case MVT::v2i1:
6584 SExtVT = MVT::v2i64;
6585 break;
6586 case MVT::v4i1:
6587 SExtVT = MVT::v4i32;
6588 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6589 SExtVT = MVT::v4i64;
6590 UseLASX = true;
6591 PropagateSExt = true;
6592 }
6593 break;
6594 case MVT::v8i1:
6595 SExtVT = MVT::v8i16;
6596 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6597 SExtVT = MVT::v8i32;
6598 UseLASX = true;
6599 PropagateSExt = true;
6600 }
6601 break;
6602 case MVT::v16i1:
6603 SExtVT = MVT::v16i8;
6604 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6605 SExtVT = MVT::v16i16;
6606 UseLASX = true;
6607 PropagateSExt = true;
6608 }
6609 break;
6610 case MVT::v32i1:
6611 SExtVT = MVT::v32i8;
6612 UseLASX = true;
6613 break;
6614 };
6615 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
6616 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6617
6618 SDValue V;
6619 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
6620 if (Src.getSimpleValueType() == MVT::v32i8) {
6621 SDValue Lo, Hi;
6622 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
6623 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
6624 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
6625 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
6626 DAG.getShiftAmountConstant(16, GRLenVT, DL));
6627 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
6628 } else if (UseLASX) {
6629 return SDValue();
6630 }
6631 }
6632
6633 if (!V) {
6634 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6635 V = DAG.getNode(Opc, DL, GRLenVT, Src);
6636 }
6637
6639 V = DAG.getZExtOrTrunc(V, DL, T);
6640 return DAG.getBitcast(VT, V);
6641}
6642
6645 const LoongArchSubtarget &Subtarget) {
6646 MVT GRLenVT = Subtarget.getGRLenVT();
6647 EVT ValTy = N->getValueType(0);
6648 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6649 ConstantSDNode *CN0, *CN1;
6650 SDLoc DL(N);
6651 unsigned ValBits = ValTy.getSizeInBits();
6652 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
6653 unsigned Shamt;
6654 bool SwapAndRetried = false;
6655
6656 // BSTRPICK requires the 32S feature.
6657 if (!Subtarget.has32S())
6658 return SDValue();
6659
6660 if (DCI.isBeforeLegalizeOps())
6661 return SDValue();
6662
6663 if (ValBits != 32 && ValBits != 64)
6664 return SDValue();
6665
6666Retry:
6667 // 1st pattern to match BSTRINS:
6668 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
6669 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
6670 // =>
6671 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6672 if (N0.getOpcode() == ISD::AND &&
6673 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6674 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6675 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
6676 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6677 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6678 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
6679 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6680 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6681 (MaskIdx0 + MaskLen0 <= ValBits)) {
6682 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
6683 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6684 N1.getOperand(0).getOperand(0),
6685 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6686 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6687 }
6688
6689 // 2nd pattern to match BSTRINS:
6690 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
6691 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
6692 // =>
6693 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6694 if (N0.getOpcode() == ISD::AND &&
6695 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6696 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6697 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6698 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6699 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6700 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6701 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6702 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
6703 (MaskIdx0 + MaskLen0 <= ValBits)) {
6704 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
6705 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6706 N1.getOperand(0).getOperand(0),
6707 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6708 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6709 }
6710
6711 // 3rd pattern to match BSTRINS:
6712 // R = or (and X, mask0), (and Y, mask1)
6713 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
6714 // =>
6715 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
6716 // where msb = lsb + size - 1
6717 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6718 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6719 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6720 (MaskIdx0 + MaskLen0 <= 64) &&
6721 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
6722 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6723 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
6724 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6725 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
6726 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
6727 DAG.getConstant(ValBits == 32
6728 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6729 : (MaskIdx0 + MaskLen0 - 1),
6730 DL, GRLenVT),
6731 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6732 }
6733
6734 // 4th pattern to match BSTRINS:
6735 // R = or (and X, mask), (shl Y, shamt)
6736 // where mask = (2**shamt - 1)
6737 // =>
6738 // R = BSTRINS X, Y, ValBits - 1, shamt
6739 // where ValBits = 32 or 64
6740 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
6741 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6742 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
6743 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6744 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
6745 (MaskIdx0 + MaskLen0 <= ValBits)) {
6746 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
6747 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6748 N1.getOperand(0),
6749 DAG.getConstant((ValBits - 1), DL, GRLenVT),
6750 DAG.getConstant(Shamt, DL, GRLenVT));
6751 }
6752
6753 // 5th pattern to match BSTRINS:
6754 // R = or (and X, mask), const
6755 // where ~mask = (2**size - 1) << lsb, mask & const = 0
6756 // =>
6757 // R = BSTRINS X, (const >> lsb), msb, lsb
6758 // where msb = lsb + size - 1
6759 if (N0.getOpcode() == ISD::AND &&
6760 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6761 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6762 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
6763 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6764 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
6765 return DAG.getNode(
6766 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6767 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
6768 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6769 : (MaskIdx0 + MaskLen0 - 1),
6770 DL, GRLenVT),
6771 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6772 }
6773
6774 // 6th pattern.
6775 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
6776 // by the incoming bits are known to be zero.
6777 // =>
6778 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
6779 //
6780 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
6781 // pattern is more common than the 1st. So we put the 1st before the 6th in
6782 // order to match as many nodes as possible.
6783 ConstantSDNode *CNMask, *CNShamt;
6784 unsigned MaskIdx, MaskLen;
6785 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6786 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6787 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6788 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6789 CNShamt->getZExtValue() + MaskLen <= ValBits) {
6790 Shamt = CNShamt->getZExtValue();
6791 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
6792 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6793 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
6794 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6795 N1.getOperand(0).getOperand(0),
6796 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
6797 DAG.getConstant(Shamt, DL, GRLenVT));
6798 }
6799 }
6800
6801 // 7th pattern.
6802 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
6803 // overwritten by the incoming bits are known to be zero.
6804 // =>
6805 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
6806 //
6807 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
6808 // before the 7th in order to match as many nodes as possible.
6809 if (N1.getOpcode() == ISD::AND &&
6810 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6811 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6812 N1.getOperand(0).getOpcode() == ISD::SHL &&
6813 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6814 CNShamt->getZExtValue() == MaskIdx) {
6815 APInt ShMask(ValBits, CNMask->getZExtValue());
6816 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6817 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
6818 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6819 N1.getOperand(0).getOperand(0),
6820 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6821 DAG.getConstant(MaskIdx, DL, GRLenVT));
6822 }
6823 }
6824
6825 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
6826 if (!SwapAndRetried) {
6827 std::swap(N0, N1);
6828 SwapAndRetried = true;
6829 goto Retry;
6830 }
6831
6832 SwapAndRetried = false;
6833Retry2:
6834 // 8th pattern.
6835 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
6836 // the incoming bits are known to be zero.
6837 // =>
6838 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
6839 //
6840 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
6841 // we put it here in order to match as many nodes as possible or generate less
6842 // instructions.
6843 if (N1.getOpcode() == ISD::AND &&
6844 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6845 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
6846 APInt ShMask(ValBits, CNMask->getZExtValue());
6847 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6848 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
6849 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6850 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
6851 N1->getOperand(0),
6852 DAG.getConstant(MaskIdx, DL, GRLenVT)),
6853 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6854 DAG.getConstant(MaskIdx, DL, GRLenVT));
6855 }
6856 }
6857 // Swap N0/N1 and retry.
6858 if (!SwapAndRetried) {
6859 std::swap(N0, N1);
6860 SwapAndRetried = true;
6861 goto Retry2;
6862 }
6863
6864 return SDValue();
6865}
6866
6867static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
6868 ExtType = ISD::NON_EXTLOAD;
6869
6870 switch (V.getNode()->getOpcode()) {
6871 case ISD::LOAD: {
6872 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
6873 if ((LoadNode->getMemoryVT() == MVT::i8) ||
6874 (LoadNode->getMemoryVT() == MVT::i16)) {
6875 ExtType = LoadNode->getExtensionType();
6876 return true;
6877 }
6878 return false;
6879 }
6880 case ISD::AssertSext: {
6881 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6882 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6883 ExtType = ISD::SEXTLOAD;
6884 return true;
6885 }
6886 return false;
6887 }
6888 case ISD::AssertZext: {
6889 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6890 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6891 ExtType = ISD::ZEXTLOAD;
6892 return true;
6893 }
6894 return false;
6895 }
6896 default:
6897 return false;
6898 }
6899
6900 return false;
6901}
6902
6903// Eliminate redundant truncation and zero-extension nodes.
6904// * Case 1:
6905// +------------+ +------------+ +------------+
6906// | Input1 | | Input2 | | CC |
6907// +------------+ +------------+ +------------+
6908// | | |
6909// V V +----+
6910// +------------+ +------------+ |
6911// | TRUNCATE | | TRUNCATE | |
6912// +------------+ +------------+ |
6913// | | |
6914// V V |
6915// +------------+ +------------+ |
6916// | ZERO_EXT | | ZERO_EXT | |
6917// +------------+ +------------+ |
6918// | | |
6919// | +-------------+ |
6920// V V | |
6921// +----------------+ | |
6922// | AND | | |
6923// +----------------+ | |
6924// | | |
6925// +---------------+ | |
6926// | | |
6927// V V V
6928// +-------------+
6929// | CMP |
6930// +-------------+
6931// * Case 2:
6932// +------------+ +------------+ +-------------+ +------------+ +------------+
6933// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
6934// +------------+ +------------+ +-------------+ +------------+ +------------+
6935// | | | | |
6936// V | | | |
6937// +------------+ | | | |
6938// | XOR |<---------------------+ | |
6939// +------------+ | | |
6940// | | | |
6941// V V +---------------+ |
6942// +------------+ +------------+ | |
6943// | TRUNCATE | | TRUNCATE | | +-------------------------+
6944// +------------+ +------------+ | |
6945// | | | |
6946// V V | |
6947// +------------+ +------------+ | |
6948// | ZERO_EXT | | ZERO_EXT | | |
6949// +------------+ +------------+ | |
6950// | | | |
6951// V V | |
6952// +----------------+ | |
6953// | AND | | |
6954// +----------------+ | |
6955// | | |
6956// +---------------+ | |
6957// | | |
6958// V V V
6959// +-------------+
6960// | CMP |
6961// +-------------+
6964 const LoongArchSubtarget &Subtarget) {
6965 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
6966
6967 SDNode *AndNode = N->getOperand(0).getNode();
6968 if (AndNode->getOpcode() != ISD::AND)
6969 return SDValue();
6970
6971 SDValue AndInputValue2 = AndNode->getOperand(1);
6972 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
6973 return SDValue();
6974
6975 SDValue CmpInputValue = N->getOperand(1);
6976 SDValue AndInputValue1 = AndNode->getOperand(0);
6977 if (AndInputValue1.getOpcode() == ISD::XOR) {
6978 if (CC != ISD::SETEQ && CC != ISD::SETNE)
6979 return SDValue();
6980 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
6981 if (!CN || !CN->isAllOnes())
6982 return SDValue();
6983 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
6984 if (!CN || !CN->isZero())
6985 return SDValue();
6986 AndInputValue1 = AndInputValue1.getOperand(0);
6987 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
6988 return SDValue();
6989 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
6990 if (AndInputValue2 != CmpInputValue)
6991 return SDValue();
6992 } else {
6993 return SDValue();
6994 }
6995
6996 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
6997 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
6998 return SDValue();
6999
7000 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
7001 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
7002 return SDValue();
7003
7004 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
7005 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
7006 ISD::LoadExtType ExtType1;
7007 ISD::LoadExtType ExtType2;
7008
7009 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
7010 !checkValueWidth(TruncInputValue2, ExtType2))
7011 return SDValue();
7012
7013 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
7014 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
7015 return SDValue();
7016
7017 if ((ExtType2 != ISD::ZEXTLOAD) &&
7018 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
7019 return SDValue();
7020
7021 // These truncation and zero-extension nodes are not necessary, remove them.
7022 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
7023 TruncInputValue1, TruncInputValue2);
7024 SDValue NewSetCC =
7025 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
7026 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
7027 return SDValue(N, 0);
7028}
7029
7030// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
7033 const LoongArchSubtarget &Subtarget) {
7034 if (DCI.isBeforeLegalizeOps())
7035 return SDValue();
7036
7037 SDValue Src = N->getOperand(0);
7038 if (Src.getOpcode() != LoongArchISD::REVB_2W)
7039 return SDValue();
7040
7041 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
7042 Src.getOperand(0));
7043}
7044
7045// Perform common combines for BR_CC and SELECT_CC conditions.
7046static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
7047 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
7048 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
7049
7050 // As far as arithmetic right shift always saves the sign,
7051 // shift can be omitted.
7052 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
7053 // setge (sra X, N), 0 -> setge X, 0
7054 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
7055 LHS.getOpcode() == ISD::SRA) {
7056 LHS = LHS.getOperand(0);
7057 return true;
7058 }
7059
7060 if (!ISD::isIntEqualitySetCC(CCVal))
7061 return false;
7062
7063 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
7064 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
7065 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
7066 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
7067 // If we're looking for eq 0 instead of ne 0, we need to invert the
7068 // condition.
7069 bool Invert = CCVal == ISD::SETEQ;
7070 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
7071 if (Invert)
7072 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7073
7074 RHS = LHS.getOperand(1);
7075 LHS = LHS.getOperand(0);
7076 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7077
7078 CC = DAG.getCondCode(CCVal);
7079 return true;
7080 }
7081
7082 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
7083 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
7084 LHS.getOperand(1).getOpcode() == ISD::Constant) {
7085 SDValue LHS0 = LHS.getOperand(0);
7086 if (LHS0.getOpcode() == ISD::AND &&
7087 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
7088 uint64_t Mask = LHS0.getConstantOperandVal(1);
7089 uint64_t ShAmt = LHS.getConstantOperandVal(1);
7090 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
7091 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
7092 CC = DAG.getCondCode(CCVal);
7093
7094 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
7095 LHS = LHS0.getOperand(0);
7096 if (ShAmt != 0)
7097 LHS =
7098 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
7099 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
7100 return true;
7101 }
7102 }
7103 }
7104
7105 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
7106 // This can occur when legalizing some floating point comparisons.
7107 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
7108 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
7109 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7110 CC = DAG.getCondCode(CCVal);
7111 RHS = DAG.getConstant(0, DL, LHS.getValueType());
7112 return true;
7113 }
7114
7115 return false;
7116}
7117
7120 const LoongArchSubtarget &Subtarget) {
7121 SDValue LHS = N->getOperand(1);
7122 SDValue RHS = N->getOperand(2);
7123 SDValue CC = N->getOperand(3);
7124 SDLoc DL(N);
7125
7126 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
7127 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
7128 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
7129
7130 return SDValue();
7131}
7132
7135 const LoongArchSubtarget &Subtarget) {
7136 // Transform
7137 SDValue LHS = N->getOperand(0);
7138 SDValue RHS = N->getOperand(1);
7139 SDValue CC = N->getOperand(2);
7140 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
7141 SDValue TrueV = N->getOperand(3);
7142 SDValue FalseV = N->getOperand(4);
7143 SDLoc DL(N);
7144 EVT VT = N->getValueType(0);
7145
7146 // If the True and False values are the same, we don't need a select_cc.
7147 if (TrueV == FalseV)
7148 return TrueV;
7149
7150 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
7151 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
7152 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7154 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
7155 if (CCVal == ISD::CondCode::SETGE)
7156 std::swap(TrueV, FalseV);
7157
7158 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
7159 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
7160 // Only handle simm12, if it is not in this range, it can be considered as
7161 // register.
7162 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
7163 isInt<12>(TrueSImm - FalseSImm)) {
7164 SDValue SRA =
7165 DAG.getNode(ISD::SRA, DL, VT, LHS,
7166 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
7167 SDValue AND =
7168 DAG.getNode(ISD::AND, DL, VT, SRA,
7169 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
7170 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
7171 }
7172
7173 if (CCVal == ISD::CondCode::SETGE)
7174 std::swap(TrueV, FalseV);
7175 }
7176
7177 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
7178 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
7179 {LHS, RHS, CC, TrueV, FalseV});
7180
7181 return SDValue();
7182}
7183
7184template <unsigned N>
7186 SelectionDAG &DAG,
7187 const LoongArchSubtarget &Subtarget,
7188 bool IsSigned = false) {
7189 SDLoc DL(Node);
7190 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
7191 // Check the ImmArg.
7192 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
7193 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
7194 DAG.getContext()->emitError(Node->getOperationName(0) +
7195 ": argument out of range.");
7196 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
7197 }
7198 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
7199}
7200
7201template <unsigned N>
7202static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
7203 SelectionDAG &DAG, bool IsSigned = false) {
7204 SDLoc DL(Node);
7205 EVT ResTy = Node->getValueType(0);
7206 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
7207
7208 // Check the ImmArg.
7209 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
7210 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
7211 DAG.getContext()->emitError(Node->getOperationName(0) +
7212 ": argument out of range.");
7213 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7214 }
7215 return DAG.getConstant(
7217 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
7218 DL, ResTy);
7219}
7220
7222 SDLoc DL(Node);
7223 EVT ResTy = Node->getValueType(0);
7224 SDValue Vec = Node->getOperand(2);
7225 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
7226 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
7227}
7228
7230 SDLoc DL(Node);
7231 EVT ResTy = Node->getValueType(0);
7232 SDValue One = DAG.getConstant(1, DL, ResTy);
7233 SDValue Bit =
7234 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
7235
7236 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
7237 DAG.getNOT(DL, Bit, ResTy));
7238}
7239
7240template <unsigned N>
7242 SDLoc DL(Node);
7243 EVT ResTy = Node->getValueType(0);
7244 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7245 // Check the unsigned ImmArg.
7246 if (!isUInt<N>(CImm->getZExtValue())) {
7247 DAG.getContext()->emitError(Node->getOperationName(0) +
7248 ": argument out of range.");
7249 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7250 }
7251
7252 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7253 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
7254
7255 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
7256}
7257
7258template <unsigned N>
7260 SDLoc DL(Node);
7261 EVT ResTy = Node->getValueType(0);
7262 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7263 // Check the unsigned ImmArg.
7264 if (!isUInt<N>(CImm->getZExtValue())) {
7265 DAG.getContext()->emitError(Node->getOperationName(0) +
7266 ": argument out of range.");
7267 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7268 }
7269
7270 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7271 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7272 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
7273}
7274
7275template <unsigned N>
7277 SDLoc DL(Node);
7278 EVT ResTy = Node->getValueType(0);
7279 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7280 // Check the unsigned ImmArg.
7281 if (!isUInt<N>(CImm->getZExtValue())) {
7282 DAG.getContext()->emitError(Node->getOperationName(0) +
7283 ": argument out of range.");
7284 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7285 }
7286
7287 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7288 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7289 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
7290}
7291
7292template <unsigned W>
7294 unsigned ResOp) {
7295 unsigned Imm = N->getConstantOperandVal(2);
7296 if (!isUInt<W>(Imm)) {
7297 const StringRef ErrorMsg = "argument out of range";
7298 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
7299 return DAG.getUNDEF(N->getValueType(0));
7300 }
7301 SDLoc DL(N);
7302 SDValue Vec = N->getOperand(1);
7303 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
7305 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
7306}
7307
7308static SDValue
7311 const LoongArchSubtarget &Subtarget) {
7312 SDLoc DL(N);
7313 switch (N->getConstantOperandVal(0)) {
7314 default:
7315 break;
7316 case Intrinsic::loongarch_lsx_vadd_b:
7317 case Intrinsic::loongarch_lsx_vadd_h:
7318 case Intrinsic::loongarch_lsx_vadd_w:
7319 case Intrinsic::loongarch_lsx_vadd_d:
7320 case Intrinsic::loongarch_lasx_xvadd_b:
7321 case Intrinsic::loongarch_lasx_xvadd_h:
7322 case Intrinsic::loongarch_lasx_xvadd_w:
7323 case Intrinsic::loongarch_lasx_xvadd_d:
7324 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7325 N->getOperand(2));
7326 case Intrinsic::loongarch_lsx_vaddi_bu:
7327 case Intrinsic::loongarch_lsx_vaddi_hu:
7328 case Intrinsic::loongarch_lsx_vaddi_wu:
7329 case Intrinsic::loongarch_lsx_vaddi_du:
7330 case Intrinsic::loongarch_lasx_xvaddi_bu:
7331 case Intrinsic::loongarch_lasx_xvaddi_hu:
7332 case Intrinsic::loongarch_lasx_xvaddi_wu:
7333 case Intrinsic::loongarch_lasx_xvaddi_du:
7334 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7335 lowerVectorSplatImm<5>(N, 2, DAG));
7336 case Intrinsic::loongarch_lsx_vsub_b:
7337 case Intrinsic::loongarch_lsx_vsub_h:
7338 case Intrinsic::loongarch_lsx_vsub_w:
7339 case Intrinsic::loongarch_lsx_vsub_d:
7340 case Intrinsic::loongarch_lasx_xvsub_b:
7341 case Intrinsic::loongarch_lasx_xvsub_h:
7342 case Intrinsic::loongarch_lasx_xvsub_w:
7343 case Intrinsic::loongarch_lasx_xvsub_d:
7344 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7345 N->getOperand(2));
7346 case Intrinsic::loongarch_lsx_vsubi_bu:
7347 case Intrinsic::loongarch_lsx_vsubi_hu:
7348 case Intrinsic::loongarch_lsx_vsubi_wu:
7349 case Intrinsic::loongarch_lsx_vsubi_du:
7350 case Intrinsic::loongarch_lasx_xvsubi_bu:
7351 case Intrinsic::loongarch_lasx_xvsubi_hu:
7352 case Intrinsic::loongarch_lasx_xvsubi_wu:
7353 case Intrinsic::loongarch_lasx_xvsubi_du:
7354 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7355 lowerVectorSplatImm<5>(N, 2, DAG));
7356 case Intrinsic::loongarch_lsx_vneg_b:
7357 case Intrinsic::loongarch_lsx_vneg_h:
7358 case Intrinsic::loongarch_lsx_vneg_w:
7359 case Intrinsic::loongarch_lsx_vneg_d:
7360 case Intrinsic::loongarch_lasx_xvneg_b:
7361 case Intrinsic::loongarch_lasx_xvneg_h:
7362 case Intrinsic::loongarch_lasx_xvneg_w:
7363 case Intrinsic::loongarch_lasx_xvneg_d:
7364 return DAG.getNode(
7365 ISD::SUB, DL, N->getValueType(0),
7366 DAG.getConstant(
7367 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
7368 /*isSigned=*/true),
7369 SDLoc(N), N->getValueType(0)),
7370 N->getOperand(1));
7371 case Intrinsic::loongarch_lsx_vmax_b:
7372 case Intrinsic::loongarch_lsx_vmax_h:
7373 case Intrinsic::loongarch_lsx_vmax_w:
7374 case Intrinsic::loongarch_lsx_vmax_d:
7375 case Intrinsic::loongarch_lasx_xvmax_b:
7376 case Intrinsic::loongarch_lasx_xvmax_h:
7377 case Intrinsic::loongarch_lasx_xvmax_w:
7378 case Intrinsic::loongarch_lasx_xvmax_d:
7379 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7380 N->getOperand(2));
7381 case Intrinsic::loongarch_lsx_vmax_bu:
7382 case Intrinsic::loongarch_lsx_vmax_hu:
7383 case Intrinsic::loongarch_lsx_vmax_wu:
7384 case Intrinsic::loongarch_lsx_vmax_du:
7385 case Intrinsic::loongarch_lasx_xvmax_bu:
7386 case Intrinsic::loongarch_lasx_xvmax_hu:
7387 case Intrinsic::loongarch_lasx_xvmax_wu:
7388 case Intrinsic::loongarch_lasx_xvmax_du:
7389 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7390 N->getOperand(2));
7391 case Intrinsic::loongarch_lsx_vmaxi_b:
7392 case Intrinsic::loongarch_lsx_vmaxi_h:
7393 case Intrinsic::loongarch_lsx_vmaxi_w:
7394 case Intrinsic::loongarch_lsx_vmaxi_d:
7395 case Intrinsic::loongarch_lasx_xvmaxi_b:
7396 case Intrinsic::loongarch_lasx_xvmaxi_h:
7397 case Intrinsic::loongarch_lasx_xvmaxi_w:
7398 case Intrinsic::loongarch_lasx_xvmaxi_d:
7399 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7400 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7401 case Intrinsic::loongarch_lsx_vmaxi_bu:
7402 case Intrinsic::loongarch_lsx_vmaxi_hu:
7403 case Intrinsic::loongarch_lsx_vmaxi_wu:
7404 case Intrinsic::loongarch_lsx_vmaxi_du:
7405 case Intrinsic::loongarch_lasx_xvmaxi_bu:
7406 case Intrinsic::loongarch_lasx_xvmaxi_hu:
7407 case Intrinsic::loongarch_lasx_xvmaxi_wu:
7408 case Intrinsic::loongarch_lasx_xvmaxi_du:
7409 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7410 lowerVectorSplatImm<5>(N, 2, DAG));
7411 case Intrinsic::loongarch_lsx_vmin_b:
7412 case Intrinsic::loongarch_lsx_vmin_h:
7413 case Intrinsic::loongarch_lsx_vmin_w:
7414 case Intrinsic::loongarch_lsx_vmin_d:
7415 case Intrinsic::loongarch_lasx_xvmin_b:
7416 case Intrinsic::loongarch_lasx_xvmin_h:
7417 case Intrinsic::loongarch_lasx_xvmin_w:
7418 case Intrinsic::loongarch_lasx_xvmin_d:
7419 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7420 N->getOperand(2));
7421 case Intrinsic::loongarch_lsx_vmin_bu:
7422 case Intrinsic::loongarch_lsx_vmin_hu:
7423 case Intrinsic::loongarch_lsx_vmin_wu:
7424 case Intrinsic::loongarch_lsx_vmin_du:
7425 case Intrinsic::loongarch_lasx_xvmin_bu:
7426 case Intrinsic::loongarch_lasx_xvmin_hu:
7427 case Intrinsic::loongarch_lasx_xvmin_wu:
7428 case Intrinsic::loongarch_lasx_xvmin_du:
7429 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7430 N->getOperand(2));
7431 case Intrinsic::loongarch_lsx_vmini_b:
7432 case Intrinsic::loongarch_lsx_vmini_h:
7433 case Intrinsic::loongarch_lsx_vmini_w:
7434 case Intrinsic::loongarch_lsx_vmini_d:
7435 case Intrinsic::loongarch_lasx_xvmini_b:
7436 case Intrinsic::loongarch_lasx_xvmini_h:
7437 case Intrinsic::loongarch_lasx_xvmini_w:
7438 case Intrinsic::loongarch_lasx_xvmini_d:
7439 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7440 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7441 case Intrinsic::loongarch_lsx_vmini_bu:
7442 case Intrinsic::loongarch_lsx_vmini_hu:
7443 case Intrinsic::loongarch_lsx_vmini_wu:
7444 case Intrinsic::loongarch_lsx_vmini_du:
7445 case Intrinsic::loongarch_lasx_xvmini_bu:
7446 case Intrinsic::loongarch_lasx_xvmini_hu:
7447 case Intrinsic::loongarch_lasx_xvmini_wu:
7448 case Intrinsic::loongarch_lasx_xvmini_du:
7449 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7450 lowerVectorSplatImm<5>(N, 2, DAG));
7451 case Intrinsic::loongarch_lsx_vmul_b:
7452 case Intrinsic::loongarch_lsx_vmul_h:
7453 case Intrinsic::loongarch_lsx_vmul_w:
7454 case Intrinsic::loongarch_lsx_vmul_d:
7455 case Intrinsic::loongarch_lasx_xvmul_b:
7456 case Intrinsic::loongarch_lasx_xvmul_h:
7457 case Intrinsic::loongarch_lasx_xvmul_w:
7458 case Intrinsic::loongarch_lasx_xvmul_d:
7459 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
7460 N->getOperand(2));
7461 case Intrinsic::loongarch_lsx_vmadd_b:
7462 case Intrinsic::loongarch_lsx_vmadd_h:
7463 case Intrinsic::loongarch_lsx_vmadd_w:
7464 case Intrinsic::loongarch_lsx_vmadd_d:
7465 case Intrinsic::loongarch_lasx_xvmadd_b:
7466 case Intrinsic::loongarch_lasx_xvmadd_h:
7467 case Intrinsic::loongarch_lasx_xvmadd_w:
7468 case Intrinsic::loongarch_lasx_xvmadd_d: {
7469 EVT ResTy = N->getValueType(0);
7470 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
7471 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7472 N->getOperand(3)));
7473 }
7474 case Intrinsic::loongarch_lsx_vmsub_b:
7475 case Intrinsic::loongarch_lsx_vmsub_h:
7476 case Intrinsic::loongarch_lsx_vmsub_w:
7477 case Intrinsic::loongarch_lsx_vmsub_d:
7478 case Intrinsic::loongarch_lasx_xvmsub_b:
7479 case Intrinsic::loongarch_lasx_xvmsub_h:
7480 case Intrinsic::loongarch_lasx_xvmsub_w:
7481 case Intrinsic::loongarch_lasx_xvmsub_d: {
7482 EVT ResTy = N->getValueType(0);
7483 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
7484 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7485 N->getOperand(3)));
7486 }
7487 case Intrinsic::loongarch_lsx_vdiv_b:
7488 case Intrinsic::loongarch_lsx_vdiv_h:
7489 case Intrinsic::loongarch_lsx_vdiv_w:
7490 case Intrinsic::loongarch_lsx_vdiv_d:
7491 case Intrinsic::loongarch_lasx_xvdiv_b:
7492 case Intrinsic::loongarch_lasx_xvdiv_h:
7493 case Intrinsic::loongarch_lasx_xvdiv_w:
7494 case Intrinsic::loongarch_lasx_xvdiv_d:
7495 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
7496 N->getOperand(2));
7497 case Intrinsic::loongarch_lsx_vdiv_bu:
7498 case Intrinsic::loongarch_lsx_vdiv_hu:
7499 case Intrinsic::loongarch_lsx_vdiv_wu:
7500 case Intrinsic::loongarch_lsx_vdiv_du:
7501 case Intrinsic::loongarch_lasx_xvdiv_bu:
7502 case Intrinsic::loongarch_lasx_xvdiv_hu:
7503 case Intrinsic::loongarch_lasx_xvdiv_wu:
7504 case Intrinsic::loongarch_lasx_xvdiv_du:
7505 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
7506 N->getOperand(2));
7507 case Intrinsic::loongarch_lsx_vmod_b:
7508 case Intrinsic::loongarch_lsx_vmod_h:
7509 case Intrinsic::loongarch_lsx_vmod_w:
7510 case Intrinsic::loongarch_lsx_vmod_d:
7511 case Intrinsic::loongarch_lasx_xvmod_b:
7512 case Intrinsic::loongarch_lasx_xvmod_h:
7513 case Intrinsic::loongarch_lasx_xvmod_w:
7514 case Intrinsic::loongarch_lasx_xvmod_d:
7515 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
7516 N->getOperand(2));
7517 case Intrinsic::loongarch_lsx_vmod_bu:
7518 case Intrinsic::loongarch_lsx_vmod_hu:
7519 case Intrinsic::loongarch_lsx_vmod_wu:
7520 case Intrinsic::loongarch_lsx_vmod_du:
7521 case Intrinsic::loongarch_lasx_xvmod_bu:
7522 case Intrinsic::loongarch_lasx_xvmod_hu:
7523 case Intrinsic::loongarch_lasx_xvmod_wu:
7524 case Intrinsic::loongarch_lasx_xvmod_du:
7525 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
7526 N->getOperand(2));
7527 case Intrinsic::loongarch_lsx_vand_v:
7528 case Intrinsic::loongarch_lasx_xvand_v:
7529 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7530 N->getOperand(2));
7531 case Intrinsic::loongarch_lsx_vor_v:
7532 case Intrinsic::loongarch_lasx_xvor_v:
7533 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7534 N->getOperand(2));
7535 case Intrinsic::loongarch_lsx_vxor_v:
7536 case Intrinsic::loongarch_lasx_xvxor_v:
7537 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7538 N->getOperand(2));
7539 case Intrinsic::loongarch_lsx_vnor_v:
7540 case Intrinsic::loongarch_lasx_xvnor_v: {
7541 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7542 N->getOperand(2));
7543 return DAG.getNOT(DL, Res, Res->getValueType(0));
7544 }
7545 case Intrinsic::loongarch_lsx_vandi_b:
7546 case Intrinsic::loongarch_lasx_xvandi_b:
7547 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7548 lowerVectorSplatImm<8>(N, 2, DAG));
7549 case Intrinsic::loongarch_lsx_vori_b:
7550 case Intrinsic::loongarch_lasx_xvori_b:
7551 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7552 lowerVectorSplatImm<8>(N, 2, DAG));
7553 case Intrinsic::loongarch_lsx_vxori_b:
7554 case Intrinsic::loongarch_lasx_xvxori_b:
7555 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7556 lowerVectorSplatImm<8>(N, 2, DAG));
7557 case Intrinsic::loongarch_lsx_vsll_b:
7558 case Intrinsic::loongarch_lsx_vsll_h:
7559 case Intrinsic::loongarch_lsx_vsll_w:
7560 case Intrinsic::loongarch_lsx_vsll_d:
7561 case Intrinsic::loongarch_lasx_xvsll_b:
7562 case Intrinsic::loongarch_lasx_xvsll_h:
7563 case Intrinsic::loongarch_lasx_xvsll_w:
7564 case Intrinsic::loongarch_lasx_xvsll_d:
7565 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7566 truncateVecElts(N, DAG));
7567 case Intrinsic::loongarch_lsx_vslli_b:
7568 case Intrinsic::loongarch_lasx_xvslli_b:
7569 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7570 lowerVectorSplatImm<3>(N, 2, DAG));
7571 case Intrinsic::loongarch_lsx_vslli_h:
7572 case Intrinsic::loongarch_lasx_xvslli_h:
7573 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7574 lowerVectorSplatImm<4>(N, 2, DAG));
7575 case Intrinsic::loongarch_lsx_vslli_w:
7576 case Intrinsic::loongarch_lasx_xvslli_w:
7577 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7578 lowerVectorSplatImm<5>(N, 2, DAG));
7579 case Intrinsic::loongarch_lsx_vslli_d:
7580 case Intrinsic::loongarch_lasx_xvslli_d:
7581 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7582 lowerVectorSplatImm<6>(N, 2, DAG));
7583 case Intrinsic::loongarch_lsx_vsrl_b:
7584 case Intrinsic::loongarch_lsx_vsrl_h:
7585 case Intrinsic::loongarch_lsx_vsrl_w:
7586 case Intrinsic::loongarch_lsx_vsrl_d:
7587 case Intrinsic::loongarch_lasx_xvsrl_b:
7588 case Intrinsic::loongarch_lasx_xvsrl_h:
7589 case Intrinsic::loongarch_lasx_xvsrl_w:
7590 case Intrinsic::loongarch_lasx_xvsrl_d:
7591 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7592 truncateVecElts(N, DAG));
7593 case Intrinsic::loongarch_lsx_vsrli_b:
7594 case Intrinsic::loongarch_lasx_xvsrli_b:
7595 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7596 lowerVectorSplatImm<3>(N, 2, DAG));
7597 case Intrinsic::loongarch_lsx_vsrli_h:
7598 case Intrinsic::loongarch_lasx_xvsrli_h:
7599 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7600 lowerVectorSplatImm<4>(N, 2, DAG));
7601 case Intrinsic::loongarch_lsx_vsrli_w:
7602 case Intrinsic::loongarch_lasx_xvsrli_w:
7603 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7604 lowerVectorSplatImm<5>(N, 2, DAG));
7605 case Intrinsic::loongarch_lsx_vsrli_d:
7606 case Intrinsic::loongarch_lasx_xvsrli_d:
7607 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7608 lowerVectorSplatImm<6>(N, 2, DAG));
7609 case Intrinsic::loongarch_lsx_vsra_b:
7610 case Intrinsic::loongarch_lsx_vsra_h:
7611 case Intrinsic::loongarch_lsx_vsra_w:
7612 case Intrinsic::loongarch_lsx_vsra_d:
7613 case Intrinsic::loongarch_lasx_xvsra_b:
7614 case Intrinsic::loongarch_lasx_xvsra_h:
7615 case Intrinsic::loongarch_lasx_xvsra_w:
7616 case Intrinsic::loongarch_lasx_xvsra_d:
7617 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7618 truncateVecElts(N, DAG));
7619 case Intrinsic::loongarch_lsx_vsrai_b:
7620 case Intrinsic::loongarch_lasx_xvsrai_b:
7621 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7622 lowerVectorSplatImm<3>(N, 2, DAG));
7623 case Intrinsic::loongarch_lsx_vsrai_h:
7624 case Intrinsic::loongarch_lasx_xvsrai_h:
7625 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7626 lowerVectorSplatImm<4>(N, 2, DAG));
7627 case Intrinsic::loongarch_lsx_vsrai_w:
7628 case Intrinsic::loongarch_lasx_xvsrai_w:
7629 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7630 lowerVectorSplatImm<5>(N, 2, DAG));
7631 case Intrinsic::loongarch_lsx_vsrai_d:
7632 case Intrinsic::loongarch_lasx_xvsrai_d:
7633 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7634 lowerVectorSplatImm<6>(N, 2, DAG));
7635 case Intrinsic::loongarch_lsx_vclz_b:
7636 case Intrinsic::loongarch_lsx_vclz_h:
7637 case Intrinsic::loongarch_lsx_vclz_w:
7638 case Intrinsic::loongarch_lsx_vclz_d:
7639 case Intrinsic::loongarch_lasx_xvclz_b:
7640 case Intrinsic::loongarch_lasx_xvclz_h:
7641 case Intrinsic::loongarch_lasx_xvclz_w:
7642 case Intrinsic::loongarch_lasx_xvclz_d:
7643 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
7644 case Intrinsic::loongarch_lsx_vpcnt_b:
7645 case Intrinsic::loongarch_lsx_vpcnt_h:
7646 case Intrinsic::loongarch_lsx_vpcnt_w:
7647 case Intrinsic::loongarch_lsx_vpcnt_d:
7648 case Intrinsic::loongarch_lasx_xvpcnt_b:
7649 case Intrinsic::loongarch_lasx_xvpcnt_h:
7650 case Intrinsic::loongarch_lasx_xvpcnt_w:
7651 case Intrinsic::loongarch_lasx_xvpcnt_d:
7652 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
7653 case Intrinsic::loongarch_lsx_vbitclr_b:
7654 case Intrinsic::loongarch_lsx_vbitclr_h:
7655 case Intrinsic::loongarch_lsx_vbitclr_w:
7656 case Intrinsic::loongarch_lsx_vbitclr_d:
7657 case Intrinsic::loongarch_lasx_xvbitclr_b:
7658 case Intrinsic::loongarch_lasx_xvbitclr_h:
7659 case Intrinsic::loongarch_lasx_xvbitclr_w:
7660 case Intrinsic::loongarch_lasx_xvbitclr_d:
7661 return lowerVectorBitClear(N, DAG);
7662 case Intrinsic::loongarch_lsx_vbitclri_b:
7663 case Intrinsic::loongarch_lasx_xvbitclri_b:
7664 return lowerVectorBitClearImm<3>(N, DAG);
7665 case Intrinsic::loongarch_lsx_vbitclri_h:
7666 case Intrinsic::loongarch_lasx_xvbitclri_h:
7667 return lowerVectorBitClearImm<4>(N, DAG);
7668 case Intrinsic::loongarch_lsx_vbitclri_w:
7669 case Intrinsic::loongarch_lasx_xvbitclri_w:
7670 return lowerVectorBitClearImm<5>(N, DAG);
7671 case Intrinsic::loongarch_lsx_vbitclri_d:
7672 case Intrinsic::loongarch_lasx_xvbitclri_d:
7673 return lowerVectorBitClearImm<6>(N, DAG);
7674 case Intrinsic::loongarch_lsx_vbitset_b:
7675 case Intrinsic::loongarch_lsx_vbitset_h:
7676 case Intrinsic::loongarch_lsx_vbitset_w:
7677 case Intrinsic::loongarch_lsx_vbitset_d:
7678 case Intrinsic::loongarch_lasx_xvbitset_b:
7679 case Intrinsic::loongarch_lasx_xvbitset_h:
7680 case Intrinsic::loongarch_lasx_xvbitset_w:
7681 case Intrinsic::loongarch_lasx_xvbitset_d: {
7682 EVT VecTy = N->getValueType(0);
7683 SDValue One = DAG.getConstant(1, DL, VecTy);
7684 return DAG.getNode(
7685 ISD::OR, DL, VecTy, N->getOperand(1),
7686 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7687 }
7688 case Intrinsic::loongarch_lsx_vbitseti_b:
7689 case Intrinsic::loongarch_lasx_xvbitseti_b:
7690 return lowerVectorBitSetImm<3>(N, DAG);
7691 case Intrinsic::loongarch_lsx_vbitseti_h:
7692 case Intrinsic::loongarch_lasx_xvbitseti_h:
7693 return lowerVectorBitSetImm<4>(N, DAG);
7694 case Intrinsic::loongarch_lsx_vbitseti_w:
7695 case Intrinsic::loongarch_lasx_xvbitseti_w:
7696 return lowerVectorBitSetImm<5>(N, DAG);
7697 case Intrinsic::loongarch_lsx_vbitseti_d:
7698 case Intrinsic::loongarch_lasx_xvbitseti_d:
7699 return lowerVectorBitSetImm<6>(N, DAG);
7700 case Intrinsic::loongarch_lsx_vbitrev_b:
7701 case Intrinsic::loongarch_lsx_vbitrev_h:
7702 case Intrinsic::loongarch_lsx_vbitrev_w:
7703 case Intrinsic::loongarch_lsx_vbitrev_d:
7704 case Intrinsic::loongarch_lasx_xvbitrev_b:
7705 case Intrinsic::loongarch_lasx_xvbitrev_h:
7706 case Intrinsic::loongarch_lasx_xvbitrev_w:
7707 case Intrinsic::loongarch_lasx_xvbitrev_d: {
7708 EVT VecTy = N->getValueType(0);
7709 SDValue One = DAG.getConstant(1, DL, VecTy);
7710 return DAG.getNode(
7711 ISD::XOR, DL, VecTy, N->getOperand(1),
7712 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7713 }
7714 case Intrinsic::loongarch_lsx_vbitrevi_b:
7715 case Intrinsic::loongarch_lasx_xvbitrevi_b:
7716 return lowerVectorBitRevImm<3>(N, DAG);
7717 case Intrinsic::loongarch_lsx_vbitrevi_h:
7718 case Intrinsic::loongarch_lasx_xvbitrevi_h:
7719 return lowerVectorBitRevImm<4>(N, DAG);
7720 case Intrinsic::loongarch_lsx_vbitrevi_w:
7721 case Intrinsic::loongarch_lasx_xvbitrevi_w:
7722 return lowerVectorBitRevImm<5>(N, DAG);
7723 case Intrinsic::loongarch_lsx_vbitrevi_d:
7724 case Intrinsic::loongarch_lasx_xvbitrevi_d:
7725 return lowerVectorBitRevImm<6>(N, DAG);
7726 case Intrinsic::loongarch_lsx_vfadd_s:
7727 case Intrinsic::loongarch_lsx_vfadd_d:
7728 case Intrinsic::loongarch_lasx_xvfadd_s:
7729 case Intrinsic::loongarch_lasx_xvfadd_d:
7730 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
7731 N->getOperand(2));
7732 case Intrinsic::loongarch_lsx_vfsub_s:
7733 case Intrinsic::loongarch_lsx_vfsub_d:
7734 case Intrinsic::loongarch_lasx_xvfsub_s:
7735 case Intrinsic::loongarch_lasx_xvfsub_d:
7736 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
7737 N->getOperand(2));
7738 case Intrinsic::loongarch_lsx_vfmul_s:
7739 case Intrinsic::loongarch_lsx_vfmul_d:
7740 case Intrinsic::loongarch_lasx_xvfmul_s:
7741 case Intrinsic::loongarch_lasx_xvfmul_d:
7742 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
7743 N->getOperand(2));
7744 case Intrinsic::loongarch_lsx_vfdiv_s:
7745 case Intrinsic::loongarch_lsx_vfdiv_d:
7746 case Intrinsic::loongarch_lasx_xvfdiv_s:
7747 case Intrinsic::loongarch_lasx_xvfdiv_d:
7748 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
7749 N->getOperand(2));
7750 case Intrinsic::loongarch_lsx_vfmadd_s:
7751 case Intrinsic::loongarch_lsx_vfmadd_d:
7752 case Intrinsic::loongarch_lasx_xvfmadd_s:
7753 case Intrinsic::loongarch_lasx_xvfmadd_d:
7754 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
7755 N->getOperand(2), N->getOperand(3));
7756 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
7757 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7758 N->getOperand(1), N->getOperand(2),
7759 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
7760 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
7761 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
7762 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7763 N->getOperand(1), N->getOperand(2),
7764 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
7765 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
7766 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
7767 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7768 N->getOperand(1), N->getOperand(2),
7769 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
7770 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
7771 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7772 N->getOperand(1), N->getOperand(2),
7773 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
7774 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
7775 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
7776 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
7777 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
7778 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
7779 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
7780 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
7781 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
7782 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
7783 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7784 N->getOperand(1)));
7785 case Intrinsic::loongarch_lsx_vreplve_b:
7786 case Intrinsic::loongarch_lsx_vreplve_h:
7787 case Intrinsic::loongarch_lsx_vreplve_w:
7788 case Intrinsic::loongarch_lsx_vreplve_d:
7789 case Intrinsic::loongarch_lasx_xvreplve_b:
7790 case Intrinsic::loongarch_lasx_xvreplve_h:
7791 case Intrinsic::loongarch_lasx_xvreplve_w:
7792 case Intrinsic::loongarch_lasx_xvreplve_d:
7793 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
7794 N->getOperand(1),
7795 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7796 N->getOperand(2)));
7797 case Intrinsic::loongarch_lsx_vpickve2gr_b:
7798 if (!Subtarget.is64Bit())
7799 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7800 break;
7801 case Intrinsic::loongarch_lsx_vpickve2gr_h:
7802 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
7803 if (!Subtarget.is64Bit())
7804 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7805 break;
7806 case Intrinsic::loongarch_lsx_vpickve2gr_w:
7807 if (!Subtarget.is64Bit())
7808 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7809 break;
7810 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
7811 if (!Subtarget.is64Bit())
7812 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7813 break;
7814 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
7815 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
7816 if (!Subtarget.is64Bit())
7817 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7818 break;
7819 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
7820 if (!Subtarget.is64Bit())
7821 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7822 break;
7823 case Intrinsic::loongarch_lsx_bz_b:
7824 case Intrinsic::loongarch_lsx_bz_h:
7825 case Intrinsic::loongarch_lsx_bz_w:
7826 case Intrinsic::loongarch_lsx_bz_d:
7827 case Intrinsic::loongarch_lasx_xbz_b:
7828 case Intrinsic::loongarch_lasx_xbz_h:
7829 case Intrinsic::loongarch_lasx_xbz_w:
7830 case Intrinsic::loongarch_lasx_xbz_d:
7831 if (!Subtarget.is64Bit())
7832 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
7833 N->getOperand(1));
7834 break;
7835 case Intrinsic::loongarch_lsx_bz_v:
7836 case Intrinsic::loongarch_lasx_xbz_v:
7837 if (!Subtarget.is64Bit())
7838 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
7839 N->getOperand(1));
7840 break;
7841 case Intrinsic::loongarch_lsx_bnz_b:
7842 case Intrinsic::loongarch_lsx_bnz_h:
7843 case Intrinsic::loongarch_lsx_bnz_w:
7844 case Intrinsic::loongarch_lsx_bnz_d:
7845 case Intrinsic::loongarch_lasx_xbnz_b:
7846 case Intrinsic::loongarch_lasx_xbnz_h:
7847 case Intrinsic::loongarch_lasx_xbnz_w:
7848 case Intrinsic::loongarch_lasx_xbnz_d:
7849 if (!Subtarget.is64Bit())
7850 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
7851 N->getOperand(1));
7852 break;
7853 case Intrinsic::loongarch_lsx_bnz_v:
7854 case Intrinsic::loongarch_lasx_xbnz_v:
7855 if (!Subtarget.is64Bit())
7856 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
7857 N->getOperand(1));
7858 break;
7859 case Intrinsic::loongarch_lasx_concat_128_s:
7860 case Intrinsic::loongarch_lasx_concat_128_d:
7861 case Intrinsic::loongarch_lasx_concat_128:
7862 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
7863 N->getOperand(1), N->getOperand(2));
7864 }
7865 return SDValue();
7866}
7867
7870 const LoongArchSubtarget &Subtarget) {
7871 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
7872 // conversion is unnecessary and can be replaced with the
7873 // MOVFR2GR_S_LA64 operand.
7874 SDValue Op0 = N->getOperand(0);
7875 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
7876 return Op0.getOperand(0);
7877 return SDValue();
7878}
7879
7882 const LoongArchSubtarget &Subtarget) {
7883 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
7884 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
7885 // operand.
7886 SDValue Op0 = N->getOperand(0);
7887 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
7888 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
7889 "Unexpected value type!");
7890 return Op0.getOperand(0);
7891 }
7892 return SDValue();
7893}
7894
7895static SDValue
7898 MVT VT = N->getSimpleValueType(0);
7899 unsigned NumBits = VT.getScalarSizeInBits();
7900
7901 // Simplify the inputs.
7902 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7903 APInt DemandedMask(APInt::getAllOnes(NumBits));
7904 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
7905 return SDValue(N, 0);
7906
7907 return SDValue();
7908}
7909
7910static SDValue
7913 const LoongArchSubtarget &Subtarget) {
7914 SDValue Op0 = N->getOperand(0);
7915 SDLoc DL(N);
7916
7917 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
7918 // redundant. Instead, use BuildPairF64's operands directly.
7919 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
7920 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7921
7922 if (Op0->isUndef()) {
7923 SDValue Lo = DAG.getUNDEF(MVT::i32);
7924 SDValue Hi = DAG.getUNDEF(MVT::i32);
7925 return DCI.CombineTo(N, Lo, Hi);
7926 }
7927
7928 // It's cheaper to materialise two 32-bit integers than to load a double
7929 // from the constant pool and transfer it to integer registers through the
7930 // stack.
7932 APInt V = C->getValueAPF().bitcastToAPInt();
7933 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7934 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7935 return DCI.CombineTo(N, Lo, Hi);
7936 }
7937
7938 return SDValue();
7939}
7940
7941/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
7944 const LoongArchSubtarget &Subtarget) {
7945 SDValue N0 = N->getOperand(0);
7946 SDValue N1 = N->getOperand(1);
7947 MVT VT = N->getSimpleValueType(0);
7948 SDLoc DL(N);
7949
7950 // VANDN(undef, x) -> 0
7951 // VANDN(x, undef) -> 0
7952 if (N0.isUndef() || N1.isUndef())
7953 return DAG.getConstant(0, DL, VT);
7954
7955 // VANDN(0, x) -> x
7957 return N1;
7958
7959 // VANDN(x, 0) -> 0
7961 return DAG.getConstant(0, DL, VT);
7962
7963 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
7965 return DAG.getNOT(DL, N0, VT);
7966
7967 // Turn VANDN back to AND if input is inverted.
7968 if (SDValue Not = isNOT(N0, DAG))
7969 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
7970
7971 // Folds for better commutativity:
7972 if (N1->hasOneUse()) {
7973 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
7974 if (SDValue Not = isNOT(N1, DAG))
7975 return DAG.getNOT(
7976 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
7977
7978 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
7979 // -> NOT(OR(x, SplatVector(-Imm))
7980 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
7981 // gain benefits.
7982 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
7983 N1.getOpcode() == ISD::BUILD_VECTOR) {
7984 if (SDValue SplatValue =
7985 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
7986 if (!N1->isOnlyUserOf(SplatValue.getNode()))
7987 return SDValue();
7988
7989 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
7990 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
7991 SDValue Not =
7992 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
7993 return DAG.getNOT(
7994 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
7995 VT);
7996 }
7997 }
7998 }
7999 }
8000
8001 return SDValue();
8002}
8003
8006 const LoongArchSubtarget &Subtarget) {
8007 SDLoc DL(N);
8008 EVT VT = N->getValueType(0);
8009
8010 if (VT != MVT::f32 && VT != MVT::f64)
8011 return SDValue();
8012 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8013 return SDValue();
8014 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8015 return SDValue();
8016
8017 // Only optimize when the source and destination types have the same width.
8018 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
8019 return SDValue();
8020
8021 SDValue Src = N->getOperand(0);
8022 // If the result of an integer load is only used by an integer-to-float
8023 // conversion, use a fp load instead. This eliminates an integer-to-float-move
8024 // (movgr2fr) instruction.
8025 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
8026 // Do not change the width of a volatile load. This condition check is
8027 // inspired by AArch64.
8028 !cast<LoadSDNode>(Src)->isVolatile()) {
8029 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
8030 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
8031 LN0->getPointerInfo(), LN0->getAlign(),
8032 LN0->getMemOperand()->getFlags());
8033
8034 // Make sure successors of the original load stay after it by updating them
8035 // to use the new Chain.
8036 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
8037 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
8038 }
8039
8040 return SDValue();
8041}
8042
8043// Using [X]VFTINTRZ_W_D for double to signed 32-bit integer conversion.
8044// For example:
8045// v4i32 = fp_to_sint (concat_vectors v2f64, v2f64)
8046// Can be combined into:
8047// v4i32 = VFTINTRZ_W_D v2f64. v2f64
8050 const LoongArchSubtarget &Subtarget) {
8051 if (!Subtarget.hasExtLSX())
8052 return SDValue();
8053
8054 SDLoc DL(N);
8055 EVT DstVT = N->getValueType(0);
8056 SDValue Src = N->getOperand(0);
8057 EVT SrcVT = Src.getValueType();
8058 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
8059
8060 if (!DstVT.isVector() || !DstVT.isSimple() || !SrcVT.isSimple())
8061 return SDValue();
8062
8063 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8064 unsigned SrcBits = SrcVT.getSizeInBits();
8065 unsigned DstEltBits = DstVT.getScalarSizeInBits();
8066 unsigned NumElts = DstVT.getVectorNumElements();
8067 unsigned BlockBits = Subtarget.hasExtLASX() ? 256 : 128;
8068
8069 if (!isPowerOf2_32(NumElts) || !isPowerOf2_32(DstEltBits))
8070 return SDValue();
8071
8072 if (SrcBits % BlockBits != 0 && SrcBits != 128)
8073 return SDValue();
8074
8075 if (DstEltBits < 32) {
8076 MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(32), NumElts);
8077 SDValue Conv = DAG.getNode(N->getOpcode(), DL, PromoteVT, Src);
8078 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Conv);
8079 }
8080
8081 if (SrcEltBits != 64 || DstEltBits != 32)
8082 return SDValue();
8083
8084 if (!IsSigned) {
8085 // LASX already has pattern for double convert to uint32.
8086 if (Subtarget.hasExtLASX())
8087 return SDValue();
8088 MVT TmpVT = MVT::getVectorVT(MVT::i64, NumElts);
8089 SDValue Tmp = DAG.getNode(ISD::FP_TO_SINT, DL, TmpVT, Src);
8090 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Tmp);
8091 }
8092
8094 unsigned BlockNumElts = BlockBits / 64;
8095 MVT BlockVT = MVT::getVectorVT(MVT::f64, BlockNumElts);
8096 if (Src.getOpcode() == ISD::CONCAT_VECTORS &&
8097 Src.getOperand(0).getValueType() == BlockVT) {
8098 for (unsigned i = 0; i < Src.getNumOperands(); i++)
8099 Blocks.push_back(Src.getOperand(i));
8100 } else if (SrcBits > BlockBits) {
8101 // Wider than one register: extract each BlockBits-wide sub-vector.
8102 for (unsigned i = 0; i < SrcBits / BlockBits; i++)
8103 Blocks.push_back(
8104 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, BlockVT, Src,
8105 DAG.getVectorIdxConstant(i * BlockNumElts, DL)));
8106 } else {
8107 BlockBits = SrcBits;
8108 Blocks.push_back(Src);
8109 }
8110
8111 MVT NativeVT = BlockBits == 256 ? MVT::v8i32 : MVT::v4i32;
8113 for (unsigned i = 0; i < Blocks.size(); i += 2) {
8114 SDValue Lo = Blocks[i];
8115 SDValue Hi = Blocks.size() > 1 ? Blocks[i + 1] : Lo;
8116 SDValue Res = DAG.getNode(LoongArchISD::VFTINTRZ, DL, NativeVT, Hi, Lo);
8117
8118 if (BlockBits == 256) {
8119 SDValue Undef = DAG.getUNDEF(Res.getValueType());
8120 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
8121 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
8122 Res = DAG.getBitcast(NativeVT, Res);
8123 }
8124
8125 Parts.push_back(Res);
8126 }
8127
8128 if (Blocks.size() == 1)
8129 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, DstVT, Parts[0],
8130 DAG.getVectorIdxConstant(0, DL));
8131 return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Parts);
8132}
8133
8134// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
8135// logical operations, like in the example below.
8136// or (and (truncate x, truncate y)),
8137// (xor (truncate z, build_vector (constants)))
8138// Given a target type \p VT, we generate
8139// or (and x, y), (xor z, zext(build_vector (constants)))
8140// given x, y and z are of type \p VT. We can do so, if operands are either
8141// truncates from VT types, the second operand is a vector of constants, can
8142// be recursively promoted or is an existing extension we can extend further.
8144 SelectionDAG &DAG,
8145 const LoongArchSubtarget &Subtarget,
8146 unsigned Depth) {
8147 // Limit recursion to avoid excessive compile times.
8149 return SDValue();
8150
8151 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
8152 return SDValue();
8153
8154 SDValue N0 = N.getOperand(0);
8155 SDValue N1 = N.getOperand(1);
8156
8157 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8158 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
8159 return SDValue();
8160
8161 if (SDValue NN0 =
8162 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
8163 N0 = NN0;
8164 else {
8165 // The left side has to be a 'trunc'.
8166 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
8167 N0.getOperand(0).getValueType() == VT;
8168 if (LHSTrunc)
8169 N0 = N0.getOperand(0);
8170 else
8171 return SDValue();
8172 }
8173
8174 if (SDValue NN1 =
8175 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
8176 N1 = NN1;
8177 else {
8178 // The right side has to be a 'trunc', a (foldable) constant or an
8179 // existing extension we can extend further.
8180 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
8181 N1.getOperand(0).getValueType() == VT;
8182 if (RHSTrunc)
8183 N1 = N1.getOperand(0);
8184 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
8185 Subtarget.hasExtLASX() && N1.hasOneUse())
8186 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
8187 // On 32-bit platform, i64 is an illegal integer scalar type, and
8188 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
8189 // future.
8190 else if (SDValue Cst =
8192 N1 = Cst;
8193 else
8194 return SDValue();
8195 }
8196
8197 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
8198}
8199
8200// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
8201// is LSX-sized register. In most cases we actually compare or select LASX-sized
8202// registers and mixing the two types creates horrible code. This method
8203// optimizes some of the transition sequences.
8205 SelectionDAG &DAG,
8206 const LoongArchSubtarget &Subtarget) {
8207 EVT VT = N.getValueType();
8208 assert(VT.isVector() && "Expected vector type");
8209 assert((N.getOpcode() == ISD::ANY_EXTEND ||
8210 N.getOpcode() == ISD::ZERO_EXTEND ||
8211 N.getOpcode() == ISD::SIGN_EXTEND) &&
8212 "Invalid Node");
8213
8214 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
8215 return SDValue();
8216
8217 SDValue Narrow = N.getOperand(0);
8218 EVT NarrowVT = Narrow.getValueType();
8219
8220 // Generate the wide operation.
8221 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
8222 if (!Op)
8223 return SDValue();
8224 switch (N.getOpcode()) {
8225 default:
8226 llvm_unreachable("Unexpected opcode");
8227 case ISD::ANY_EXTEND:
8228 return Op;
8229 case ISD::ZERO_EXTEND:
8230 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
8231 case ISD::SIGN_EXTEND:
8232 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8233 DAG.getValueType(NarrowVT));
8234 }
8235}
8236
8239 const LoongArchSubtarget &Subtarget) {
8240 EVT VT = N->getValueType(0);
8241 SDLoc DL(N);
8242
8243 if (VT.isVector())
8244 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
8245 return R;
8246
8247 return SDValue();
8248}
8249
8250static SDValue
8253 const LoongArchSubtarget &Subtarget) {
8254 SDLoc DL(N);
8255 EVT VT = N->getValueType(0);
8256
8257 if (VT.isVector() && N->getNumOperands() == 2)
8258 if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
8259 return R;
8260
8261 return SDValue();
8262}
8263
8266 const LoongArchSubtarget &Subtarget) {
8267 if (DCI.isBeforeLegalizeOps())
8268 return SDValue();
8269
8270 EVT VT = N->getValueType(0);
8271 if (!VT.isVector())
8272 return SDValue();
8273
8274 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8275 return SDValue();
8276
8277 EVT EltVT = VT.getVectorElementType();
8278 if (!EltVT.isInteger())
8279 return SDValue();
8280
8281 SDValue Cond = N->getOperand(0);
8282 SDValue TrueVal = N->getOperand(1);
8283 SDValue FalseVal = N->getOperand(2);
8284
8285 // match:
8286 //
8287 // vselect (setcc shift, 0, seteq),
8288 // x,
8289 // rounded_shift
8290
8291 if (Cond.getOpcode() != ISD::SETCC)
8292 return SDValue();
8293
8294 if (!ISD::isConstantSplatVectorAllZeros(Cond.getOperand(1).getNode()))
8295 return SDValue();
8296
8297 auto *CC = cast<CondCodeSDNode>(Cond.getOperand(2));
8298 if (CC->get() != ISD::SETEQ)
8299 return SDValue();
8300
8301 SDValue Shift = Cond.getOperand(0);
8302
8303 // True branch must be original value:
8304 //
8305 // vselect cond, x, ...
8306
8307 SDValue X = TrueVal;
8308
8309 // Now match rounded shift pattern:
8310 //
8311 // add
8312 // (and
8313 // (srl X, shift-1)
8314 // 1)
8315 // (srl/sra X, shift)
8316
8317 if (FalseVal.getOpcode() != ISD::ADD)
8318 return SDValue();
8319
8320 SDValue Add0 = FalseVal.getOperand(0);
8321 SDValue Add1 = FalseVal.getOperand(1);
8322 SDValue And;
8323 SDValue Shr;
8324
8325 if (Add0.getOpcode() == ISD::AND) {
8326 And = Add0;
8327 Shr = Add1;
8328 } else if (Add1.getOpcode() == ISD::AND) {
8329 And = Add1;
8330 Shr = Add0;
8331 } else {
8332 return SDValue();
8333 }
8334
8335 // match:
8336 //
8337 // srl/sra X, shift
8338
8339 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
8340 return SDValue();
8341
8342 if (Shr.getOperand(0) != X)
8343 return SDValue();
8344
8345 if (Shr.getOperand(1) != Shift)
8346 return SDValue();
8347
8348 // match:
8349 //
8350 // and
8351 // (srl X, shift-1)
8352 // 1
8353
8354 SDValue Srl = And.getOperand(0);
8355 SDValue One = And.getOperand(1);
8356 APInt SplatVal;
8357
8358 if (Srl.getOpcode() != ISD::SRL)
8359 return SDValue();
8360
8361 One = peekThroughBitcasts(One);
8362 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
8363 return SDValue();
8364
8365 if (SplatVal != 1)
8366 return SDValue();
8367
8368 if (Srl.getOperand(0) != X)
8369 return SDValue();
8370
8371 // match:
8372 //
8373 // shift-1
8374
8375 SDValue ShiftMinus1 = Srl.getOperand(1);
8376
8377 if (ShiftMinus1.getOpcode() != ISD::ADD)
8378 return SDValue();
8379
8380 if (ShiftMinus1.getOperand(0) != Shift)
8381 return SDValue();
8382
8384 return SDValue();
8385
8386 // We matched a rounded right shift pattern and can lower it
8387 // to a single vector rounded shift instruction.
8388
8389 SDLoc DL(N);
8390 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
8391 : LoongArchISD::VSRAR,
8392 DL, VT, X, Shift);
8393}
8394
8396 DAGCombinerInfo &DCI) const {
8397 SelectionDAG &DAG = DCI.DAG;
8398 switch (N->getOpcode()) {
8399 default:
8400 break;
8401 case ISD::ADD:
8402 return performADDCombine(N, DAG, DCI, Subtarget);
8403 case ISD::AND:
8404 return performANDCombine(N, DAG, DCI, Subtarget);
8405 case ISD::OR:
8406 return performORCombine(N, DAG, DCI, Subtarget);
8407 case ISD::SETCC:
8408 return performSETCCCombine(N, DAG, DCI, Subtarget);
8409 case ISD::SHL:
8410 return performSHLCombine(N, DAG, DCI, Subtarget);
8411 case ISD::SRL:
8412 return performSRLCombine(N, DAG, DCI, Subtarget);
8413 case ISD::SUB:
8414 return performSUBCombine(N, DAG, DCI, Subtarget);
8415 case ISD::BITCAST:
8416 return performBITCASTCombine(N, DAG, DCI, Subtarget);
8417 case ISD::ANY_EXTEND:
8418 case ISD::ZERO_EXTEND:
8419 case ISD::SIGN_EXTEND:
8420 return performEXTENDCombine(N, DAG, DCI, Subtarget);
8421 case ISD::SINT_TO_FP:
8422 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
8423 case ISD::FP_TO_SINT:
8424 case ISD::FP_TO_UINT:
8425 return performFP_TO_INTCombine(N, DAG, DCI, Subtarget);
8426 case LoongArchISD::BITREV_W:
8427 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
8428 case LoongArchISD::BR_CC:
8429 return performBR_CCCombine(N, DAG, DCI, Subtarget);
8430 case LoongArchISD::SELECT_CC:
8431 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
8433 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
8434 case LoongArchISD::MOVGR2FR_W_LA64:
8435 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
8436 case LoongArchISD::MOVFR2GR_S_LA64:
8437 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
8438 case LoongArchISD::CRC_W_B_W:
8439 case LoongArchISD::CRC_W_H_W:
8440 case LoongArchISD::CRCC_W_B_W:
8441 case LoongArchISD::CRCC_W_H_W:
8442 case LoongArchISD::VMSKLTZ:
8443 case LoongArchISD::XVMSKLTZ:
8444 return performDemandedBitsCombine(N, DAG, DCI);
8445 case LoongArchISD::SPLIT_PAIR_F64:
8446 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
8447 case LoongArchISD::VANDN:
8448 return performVANDNCombine(N, DAG, DCI, Subtarget);
8450 return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
8451 case ISD::VSELECT:
8452 return performVSELECTCombine(N, DAG, DCI, Subtarget);
8453 case LoongArchISD::VPACKEV:
8454 case LoongArchISD::VPERMI:
8455 if (SDValue Result =
8456 combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
8457 return Result;
8458 }
8459 return SDValue();
8460}
8461
8464 if (!ZeroDivCheck)
8465 return MBB;
8466
8467 // Build instructions:
8468 // MBB:
8469 // div(or mod) $dst, $dividend, $divisor
8470 // bne $divisor, $zero, SinkMBB
8471 // BreakMBB:
8472 // break 7 // BRK_DIVZERO
8473 // SinkMBB:
8474 // fallthrough
8475 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
8476 MachineFunction::iterator It = ++MBB->getIterator();
8477 MachineFunction *MF = MBB->getParent();
8478 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8479 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8480 MF->insert(It, BreakMBB);
8481 MF->insert(It, SinkMBB);
8482
8483 // Transfer the remainder of MBB and its successor edges to SinkMBB.
8484 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
8485 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8486
8487 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
8488 DebugLoc DL = MI.getDebugLoc();
8489 MachineOperand &Divisor = MI.getOperand(2);
8490 Register DivisorReg = Divisor.getReg();
8491
8492 // MBB:
8493 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
8494 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
8495 .addReg(LoongArch::R0)
8496 .addMBB(SinkMBB);
8497 MBB->addSuccessor(BreakMBB);
8498 MBB->addSuccessor(SinkMBB);
8499
8500 // BreakMBB:
8501 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
8502 // definition of BRK_DIVZERO.
8503 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
8504 BreakMBB->addSuccessor(SinkMBB);
8505
8506 // Clear Divisor's kill flag.
8507 Divisor.setIsKill(false);
8508
8509 return SinkMBB;
8510}
8511
8512static MachineBasicBlock *
8514 const LoongArchSubtarget &Subtarget) {
8515 unsigned CondOpc;
8516 switch (MI.getOpcode()) {
8517 default:
8518 llvm_unreachable("Unexpected opcode");
8519 case LoongArch::PseudoVBZ:
8520 CondOpc = LoongArch::VSETEQZ_V;
8521 break;
8522 case LoongArch::PseudoVBZ_B:
8523 CondOpc = LoongArch::VSETANYEQZ_B;
8524 break;
8525 case LoongArch::PseudoVBZ_H:
8526 CondOpc = LoongArch::VSETANYEQZ_H;
8527 break;
8528 case LoongArch::PseudoVBZ_W:
8529 CondOpc = LoongArch::VSETANYEQZ_W;
8530 break;
8531 case LoongArch::PseudoVBZ_D:
8532 CondOpc = LoongArch::VSETANYEQZ_D;
8533 break;
8534 case LoongArch::PseudoVBNZ:
8535 CondOpc = LoongArch::VSETNEZ_V;
8536 break;
8537 case LoongArch::PseudoVBNZ_B:
8538 CondOpc = LoongArch::VSETALLNEZ_B;
8539 break;
8540 case LoongArch::PseudoVBNZ_H:
8541 CondOpc = LoongArch::VSETALLNEZ_H;
8542 break;
8543 case LoongArch::PseudoVBNZ_W:
8544 CondOpc = LoongArch::VSETALLNEZ_W;
8545 break;
8546 case LoongArch::PseudoVBNZ_D:
8547 CondOpc = LoongArch::VSETALLNEZ_D;
8548 break;
8549 case LoongArch::PseudoXVBZ:
8550 CondOpc = LoongArch::XVSETEQZ_V;
8551 break;
8552 case LoongArch::PseudoXVBZ_B:
8553 CondOpc = LoongArch::XVSETANYEQZ_B;
8554 break;
8555 case LoongArch::PseudoXVBZ_H:
8556 CondOpc = LoongArch::XVSETANYEQZ_H;
8557 break;
8558 case LoongArch::PseudoXVBZ_W:
8559 CondOpc = LoongArch::XVSETANYEQZ_W;
8560 break;
8561 case LoongArch::PseudoXVBZ_D:
8562 CondOpc = LoongArch::XVSETANYEQZ_D;
8563 break;
8564 case LoongArch::PseudoXVBNZ:
8565 CondOpc = LoongArch::XVSETNEZ_V;
8566 break;
8567 case LoongArch::PseudoXVBNZ_B:
8568 CondOpc = LoongArch::XVSETALLNEZ_B;
8569 break;
8570 case LoongArch::PseudoXVBNZ_H:
8571 CondOpc = LoongArch::XVSETALLNEZ_H;
8572 break;
8573 case LoongArch::PseudoXVBNZ_W:
8574 CondOpc = LoongArch::XVSETALLNEZ_W;
8575 break;
8576 case LoongArch::PseudoXVBNZ_D:
8577 CondOpc = LoongArch::XVSETALLNEZ_D;
8578 break;
8579 }
8580
8581 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8582 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8583 DebugLoc DL = MI.getDebugLoc();
8586
8587 MachineFunction *F = BB->getParent();
8588 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
8589 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
8590 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
8591
8592 F->insert(It, FalseBB);
8593 F->insert(It, TrueBB);
8594 F->insert(It, SinkBB);
8595
8596 // Transfer the remainder of MBB and its successor edges to Sink.
8597 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
8599
8600 // Insert the real instruction to BB.
8601 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
8602 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
8603
8604 // Insert branch.
8605 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
8606 BB->addSuccessor(FalseBB);
8607 BB->addSuccessor(TrueBB);
8608
8609 // FalseBB.
8610 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8611 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
8612 .addReg(LoongArch::R0)
8613 .addImm(0);
8614 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
8615 FalseBB->addSuccessor(SinkBB);
8616
8617 // TrueBB.
8618 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8619 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
8620 .addReg(LoongArch::R0)
8621 .addImm(1);
8622 TrueBB->addSuccessor(SinkBB);
8623
8624 // SinkBB: merge the results.
8625 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
8626 MI.getOperand(0).getReg())
8627 .addReg(RD1)
8628 .addMBB(FalseBB)
8629 .addReg(RD2)
8630 .addMBB(TrueBB);
8631
8632 // The pseudo instruction is gone now.
8633 MI.eraseFromParent();
8634 return SinkBB;
8635}
8636
8637static MachineBasicBlock *
8639 const LoongArchSubtarget &Subtarget) {
8640 unsigned InsOp;
8641 unsigned BroadcastOp;
8642 unsigned HalfSize;
8643 switch (MI.getOpcode()) {
8644 default:
8645 llvm_unreachable("Unexpected opcode");
8646 case LoongArch::PseudoXVINSGR2VR_B:
8647 HalfSize = 16;
8648 BroadcastOp = LoongArch::XVREPLGR2VR_B;
8649 InsOp = LoongArch::XVEXTRINS_B;
8650 break;
8651 case LoongArch::PseudoXVINSGR2VR_H:
8652 HalfSize = 8;
8653 BroadcastOp = LoongArch::XVREPLGR2VR_H;
8654 InsOp = LoongArch::XVEXTRINS_H;
8655 break;
8656 }
8657 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8658 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
8659 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
8660 DebugLoc DL = MI.getDebugLoc();
8662 // XDst = vector_insert XSrc, Elt, Idx
8663 Register XDst = MI.getOperand(0).getReg();
8664 Register XSrc = MI.getOperand(1).getReg();
8665 Register Elt = MI.getOperand(2).getReg();
8666 unsigned Idx = MI.getOperand(3).getImm();
8667
8668 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
8669 Idx < HalfSize) {
8670 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
8671 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
8672
8673 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
8674 .addReg(XSrc, {}, LoongArch::sub_128);
8675 BuildMI(*BB, MI, DL,
8676 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
8677 : LoongArch::VINSGR2VR_B),
8678 ScratchSubReg2)
8679 .addReg(ScratchSubReg1)
8680 .addReg(Elt)
8681 .addImm(Idx);
8682
8683 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
8684 .addReg(ScratchSubReg2)
8685 .addImm(LoongArch::sub_128);
8686 } else {
8687 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8688 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8689
8690 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
8691
8692 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
8693 .addReg(ScratchReg1)
8694 .addReg(XSrc)
8695 .addImm(Idx >= HalfSize ? 48 : 18);
8696
8697 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
8698 .addReg(XSrc)
8699 .addReg(ScratchReg2)
8700 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
8701 }
8702
8703 MI.eraseFromParent();
8704 return BB;
8705}
8706
8709 const LoongArchSubtarget &Subtarget) {
8710 assert(Subtarget.hasExtLSX());
8711 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8712 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8713 DebugLoc DL = MI.getDebugLoc();
8715 Register Dst = MI.getOperand(0).getReg();
8716 Register Src = MI.getOperand(1).getReg();
8717
8718 unsigned BroadcastOp, CTOp, PickOp;
8719 switch (MI.getOpcode()) {
8720 default:
8721 llvm_unreachable("Unexpected opcode");
8722 case LoongArch::PseudoCTPOP_B:
8723 BroadcastOp = LoongArch::VREPLGR2VR_B;
8724 CTOp = LoongArch::VPCNT_B;
8725 PickOp = LoongArch::VPICKVE2GR_B;
8726 break;
8727 case LoongArch::PseudoCTPOP_H:
8728 case LoongArch::PseudoCTPOP_H_LA32:
8729 BroadcastOp = LoongArch::VREPLGR2VR_H;
8730 CTOp = LoongArch::VPCNT_H;
8731 PickOp = LoongArch::VPICKVE2GR_H;
8732 break;
8733 case LoongArch::PseudoCTPOP_W:
8734 case LoongArch::PseudoCTPOP_W_LA32:
8735 BroadcastOp = LoongArch::VREPLGR2VR_W;
8736 CTOp = LoongArch::VPCNT_W;
8737 PickOp = LoongArch::VPICKVE2GR_W;
8738 break;
8739 case LoongArch::PseudoCTPOP_D:
8740 BroadcastOp = LoongArch::VREPLGR2VR_D;
8741 CTOp = LoongArch::VPCNT_D;
8742 PickOp = LoongArch::VPICKVE2GR_D;
8743 break;
8744 }
8745
8746 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8747 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8748 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Src);
8749 BuildMI(*BB, MI, DL, TII->get(CTOp), ScratchReg2).addReg(ScratchReg1);
8750 BuildMI(*BB, MI, DL, TII->get(PickOp), Dst).addReg(ScratchReg2).addImm(0);
8751
8752 MI.eraseFromParent();
8753 return BB;
8754}
8755
8756static MachineBasicBlock *
8758 const LoongArchSubtarget &Subtarget) {
8759 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8760 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8761 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8763 Register Dst = MI.getOperand(0).getReg();
8764 Register Src = MI.getOperand(1).getReg();
8765 DebugLoc DL = MI.getDebugLoc();
8766 unsigned EleBits = 8;
8767 unsigned NotOpc = 0;
8768 unsigned MskOpc;
8769
8770 switch (MI.getOpcode()) {
8771 default:
8772 llvm_unreachable("Unexpected opcode");
8773 case LoongArch::PseudoVMSKLTZ_B:
8774 MskOpc = LoongArch::VMSKLTZ_B;
8775 break;
8776 case LoongArch::PseudoVMSKLTZ_H:
8777 MskOpc = LoongArch::VMSKLTZ_H;
8778 EleBits = 16;
8779 break;
8780 case LoongArch::PseudoVMSKLTZ_W:
8781 MskOpc = LoongArch::VMSKLTZ_W;
8782 EleBits = 32;
8783 break;
8784 case LoongArch::PseudoVMSKLTZ_D:
8785 MskOpc = LoongArch::VMSKLTZ_D;
8786 EleBits = 64;
8787 break;
8788 case LoongArch::PseudoVMSKGEZ_B:
8789 MskOpc = LoongArch::VMSKGEZ_B;
8790 break;
8791 case LoongArch::PseudoVMSKEQZ_B:
8792 MskOpc = LoongArch::VMSKNZ_B;
8793 NotOpc = LoongArch::VNOR_V;
8794 break;
8795 case LoongArch::PseudoVMSKNEZ_B:
8796 MskOpc = LoongArch::VMSKNZ_B;
8797 break;
8798 case LoongArch::PseudoXVMSKLTZ_B:
8799 MskOpc = LoongArch::XVMSKLTZ_B;
8800 RC = &LoongArch::LASX256RegClass;
8801 break;
8802 case LoongArch::PseudoXVMSKLTZ_H:
8803 MskOpc = LoongArch::XVMSKLTZ_H;
8804 RC = &LoongArch::LASX256RegClass;
8805 EleBits = 16;
8806 break;
8807 case LoongArch::PseudoXVMSKLTZ_W:
8808 MskOpc = LoongArch::XVMSKLTZ_W;
8809 RC = &LoongArch::LASX256RegClass;
8810 EleBits = 32;
8811 break;
8812 case LoongArch::PseudoXVMSKLTZ_D:
8813 MskOpc = LoongArch::XVMSKLTZ_D;
8814 RC = &LoongArch::LASX256RegClass;
8815 EleBits = 64;
8816 break;
8817 case LoongArch::PseudoXVMSKGEZ_B:
8818 MskOpc = LoongArch::XVMSKGEZ_B;
8819 RC = &LoongArch::LASX256RegClass;
8820 break;
8821 case LoongArch::PseudoXVMSKEQZ_B:
8822 MskOpc = LoongArch::XVMSKNZ_B;
8823 NotOpc = LoongArch::XVNOR_V;
8824 RC = &LoongArch::LASX256RegClass;
8825 break;
8826 case LoongArch::PseudoXVMSKNEZ_B:
8827 MskOpc = LoongArch::XVMSKNZ_B;
8828 RC = &LoongArch::LASX256RegClass;
8829 break;
8830 }
8831
8832 Register Msk = MRI.createVirtualRegister(RC);
8833 if (NotOpc) {
8834 Register Tmp = MRI.createVirtualRegister(RC);
8835 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
8836 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
8837 .addReg(Tmp, RegState::Kill)
8838 .addReg(Tmp, RegState::Kill);
8839 } else {
8840 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
8841 }
8842
8843 if (TRI->getRegSizeInBits(*RC) > 128) {
8844 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8845 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8846 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
8847 .addReg(Msk)
8848 .addImm(0);
8849 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
8850 .addReg(Msk, RegState::Kill)
8851 .addImm(4);
8852 BuildMI(*BB, MI, DL,
8853 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
8854 : LoongArch::BSTRINS_W),
8855 Dst)
8858 .addImm(256 / EleBits - 1)
8859 .addImm(128 / EleBits);
8860 } else {
8861 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
8862 .addReg(Msk, RegState::Kill)
8863 .addImm(0);
8864 }
8865
8866 MI.eraseFromParent();
8867 return BB;
8868}
8869
8870static MachineBasicBlock *
8872 const LoongArchSubtarget &Subtarget) {
8873 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
8874 "Unexpected instruction");
8875
8876 MachineFunction &MF = *BB->getParent();
8877 DebugLoc DL = MI.getDebugLoc();
8879 Register LoReg = MI.getOperand(0).getReg();
8880 Register HiReg = MI.getOperand(1).getReg();
8881 Register SrcReg = MI.getOperand(2).getReg();
8882
8883 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
8884 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
8885 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
8886 MI.eraseFromParent(); // The pseudo instruction is gone now.
8887 return BB;
8888}
8889
8890static MachineBasicBlock *
8892 const LoongArchSubtarget &Subtarget) {
8893 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
8894 "Unexpected instruction");
8895
8896 MachineFunction &MF = *BB->getParent();
8897 DebugLoc DL = MI.getDebugLoc();
8900 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
8901 Register DstReg = MI.getOperand(0).getReg();
8902 Register LoReg = MI.getOperand(1).getReg();
8903 Register HiReg = MI.getOperand(2).getReg();
8904
8905 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
8906 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
8907 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
8908 .addReg(TmpReg, RegState::Kill)
8909 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
8910 MI.eraseFromParent(); // The pseudo instruction is gone now.
8911 return BB;
8912}
8913
8915 switch (MI.getOpcode()) {
8916 default:
8917 return false;
8918 case LoongArch::Select_GPR_Using_CC_GPR:
8919 return true;
8920 }
8921}
8922
8923static MachineBasicBlock *
8925 const LoongArchSubtarget &Subtarget) {
8926 // To "insert" Select_* instructions, we actually have to insert the triangle
8927 // control-flow pattern. The incoming instructions know the destination vreg
8928 // to set, the condition code register to branch on, the true/false values to
8929 // select between, and the condcode to use to select the appropriate branch.
8930 //
8931 // We produce the following control flow:
8932 // HeadMBB
8933 // | \
8934 // | IfFalseMBB
8935 // | /
8936 // TailMBB
8937 //
8938 // When we find a sequence of selects we attempt to optimize their emission
8939 // by sharing the control flow. Currently we only handle cases where we have
8940 // multiple selects with the exact same condition (same LHS, RHS and CC).
8941 // The selects may be interleaved with other instructions if the other
8942 // instructions meet some requirements we deem safe:
8943 // - They are not pseudo instructions.
8944 // - They are debug instructions. Otherwise,
8945 // - They do not have side-effects, do not access memory and their inputs do
8946 // not depend on the results of the select pseudo-instructions.
8947 // The TrueV/FalseV operands of the selects cannot depend on the result of
8948 // previous selects in the sequence.
8949 // These conditions could be further relaxed. See the X86 target for a
8950 // related approach and more information.
8951
8952 Register LHS = MI.getOperand(1).getReg();
8953 Register RHS;
8954 if (MI.getOperand(2).isReg())
8955 RHS = MI.getOperand(2).getReg();
8956 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
8957
8958 SmallVector<MachineInstr *, 4> SelectDebugValues;
8959 SmallSet<Register, 4> SelectDests;
8960 SelectDests.insert(MI.getOperand(0).getReg());
8961
8962 MachineInstr *LastSelectPseudo = &MI;
8963 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
8964 SequenceMBBI != E; ++SequenceMBBI) {
8965 if (SequenceMBBI->isDebugInstr())
8966 continue;
8967 if (isSelectPseudo(*SequenceMBBI)) {
8968 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
8969 !SequenceMBBI->getOperand(2).isReg() ||
8970 SequenceMBBI->getOperand(2).getReg() != RHS ||
8971 SequenceMBBI->getOperand(3).getImm() != CC ||
8972 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
8973 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
8974 break;
8975 LastSelectPseudo = &*SequenceMBBI;
8976 SequenceMBBI->collectDebugValues(SelectDebugValues);
8977 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
8978 continue;
8979 }
8980 if (SequenceMBBI->hasUnmodeledSideEffects() ||
8981 SequenceMBBI->mayLoadOrStore() ||
8982 SequenceMBBI->usesCustomInsertionHook())
8983 break;
8984 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
8985 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
8986 }))
8987 break;
8988 }
8989
8990 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
8991 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8992 DebugLoc DL = MI.getDebugLoc();
8994
8995 MachineBasicBlock *HeadMBB = BB;
8996 MachineFunction *F = BB->getParent();
8997 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
8998 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
8999
9000 F->insert(I, IfFalseMBB);
9001 F->insert(I, TailMBB);
9002
9003 // Set the call frame size on entry to the new basic blocks.
9004 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
9005 IfFalseMBB->setCallFrameSize(CallFrameSize);
9006 TailMBB->setCallFrameSize(CallFrameSize);
9007
9008 // Transfer debug instructions associated with the selects to TailMBB.
9009 for (MachineInstr *DebugInstr : SelectDebugValues) {
9010 TailMBB->push_back(DebugInstr->removeFromParent());
9011 }
9012
9013 // Move all instructions after the sequence to TailMBB.
9014 TailMBB->splice(TailMBB->end(), HeadMBB,
9015 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
9016 // Update machine-CFG edges by transferring all successors of the current
9017 // block to the new block which will contain the Phi nodes for the selects.
9018 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
9019 // Set the successors for HeadMBB.
9020 HeadMBB->addSuccessor(IfFalseMBB);
9021 HeadMBB->addSuccessor(TailMBB);
9022
9023 // Insert appropriate branch.
9024 if (MI.getOperand(2).isImm())
9025 BuildMI(HeadMBB, DL, TII.get(CC))
9026 .addReg(LHS)
9027 .addImm(MI.getOperand(2).getImm())
9028 .addMBB(TailMBB);
9029 else
9030 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
9031
9032 // IfFalseMBB just falls through to TailMBB.
9033 IfFalseMBB->addSuccessor(TailMBB);
9034
9035 // Create PHIs for all of the select pseudo-instructions.
9036 auto SelectMBBI = MI.getIterator();
9037 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
9038 auto InsertionPoint = TailMBB->begin();
9039 while (SelectMBBI != SelectEnd) {
9040 auto Next = std::next(SelectMBBI);
9041 if (isSelectPseudo(*SelectMBBI)) {
9042 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
9043 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
9044 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
9045 .addReg(SelectMBBI->getOperand(4).getReg())
9046 .addMBB(HeadMBB)
9047 .addReg(SelectMBBI->getOperand(5).getReg())
9048 .addMBB(IfFalseMBB);
9049 SelectMBBI->eraseFromParent();
9050 }
9051 SelectMBBI = Next;
9052 }
9053
9054 F->getProperties().resetNoPHIs();
9055 return TailMBB;
9056}
9057
9058MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
9059 MachineInstr &MI, MachineBasicBlock *BB) const {
9060 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9061 DebugLoc DL = MI.getDebugLoc();
9062
9063 switch (MI.getOpcode()) {
9064 default:
9065 llvm_unreachable("Unexpected instr type to insert");
9066 case LoongArch::DIV_W:
9067 case LoongArch::DIV_WU:
9068 case LoongArch::MOD_W:
9069 case LoongArch::MOD_WU:
9070 case LoongArch::DIV_D:
9071 case LoongArch::DIV_DU:
9072 case LoongArch::MOD_D:
9073 case LoongArch::MOD_DU:
9074 return insertDivByZeroTrap(MI, BB);
9075 break;
9076 case LoongArch::WRFCSR: {
9077 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
9078 LoongArch::FCSR0 + MI.getOperand(0).getImm())
9079 .addReg(MI.getOperand(1).getReg());
9080 MI.eraseFromParent();
9081 return BB;
9082 }
9083 case LoongArch::RDFCSR: {
9084 MachineInstr *ReadFCSR =
9085 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
9086 MI.getOperand(0).getReg())
9087 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
9088 ReadFCSR->getOperand(1).setIsUndef();
9089 MI.eraseFromParent();
9090 return BB;
9091 }
9092 case LoongArch::Select_GPR_Using_CC_GPR:
9093 return emitSelectPseudo(MI, BB, Subtarget);
9094 case LoongArch::BuildPairF64Pseudo:
9095 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
9096 case LoongArch::SplitPairF64Pseudo:
9097 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
9098 case LoongArch::PseudoVBZ:
9099 case LoongArch::PseudoVBZ_B:
9100 case LoongArch::PseudoVBZ_H:
9101 case LoongArch::PseudoVBZ_W:
9102 case LoongArch::PseudoVBZ_D:
9103 case LoongArch::PseudoVBNZ:
9104 case LoongArch::PseudoVBNZ_B:
9105 case LoongArch::PseudoVBNZ_H:
9106 case LoongArch::PseudoVBNZ_W:
9107 case LoongArch::PseudoVBNZ_D:
9108 case LoongArch::PseudoXVBZ:
9109 case LoongArch::PseudoXVBZ_B:
9110 case LoongArch::PseudoXVBZ_H:
9111 case LoongArch::PseudoXVBZ_W:
9112 case LoongArch::PseudoXVBZ_D:
9113 case LoongArch::PseudoXVBNZ:
9114 case LoongArch::PseudoXVBNZ_B:
9115 case LoongArch::PseudoXVBNZ_H:
9116 case LoongArch::PseudoXVBNZ_W:
9117 case LoongArch::PseudoXVBNZ_D:
9118 return emitVecCondBranchPseudo(MI, BB, Subtarget);
9119 case LoongArch::PseudoXVINSGR2VR_B:
9120 case LoongArch::PseudoXVINSGR2VR_H:
9121 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
9122 case LoongArch::PseudoCTPOP_B:
9123 case LoongArch::PseudoCTPOP_H:
9124 case LoongArch::PseudoCTPOP_W:
9125 case LoongArch::PseudoCTPOP_D:
9126 case LoongArch::PseudoCTPOP_H_LA32:
9127 case LoongArch::PseudoCTPOP_W_LA32:
9128 return emitPseudoCTPOP(MI, BB, Subtarget);
9129 case LoongArch::PseudoVMSKLTZ_B:
9130 case LoongArch::PseudoVMSKLTZ_H:
9131 case LoongArch::PseudoVMSKLTZ_W:
9132 case LoongArch::PseudoVMSKLTZ_D:
9133 case LoongArch::PseudoVMSKGEZ_B:
9134 case LoongArch::PseudoVMSKEQZ_B:
9135 case LoongArch::PseudoVMSKNEZ_B:
9136 case LoongArch::PseudoXVMSKLTZ_B:
9137 case LoongArch::PseudoXVMSKLTZ_H:
9138 case LoongArch::PseudoXVMSKLTZ_W:
9139 case LoongArch::PseudoXVMSKLTZ_D:
9140 case LoongArch::PseudoXVMSKGEZ_B:
9141 case LoongArch::PseudoXVMSKEQZ_B:
9142 case LoongArch::PseudoXVMSKNEZ_B:
9143 return emitPseudoVMSKCOND(MI, BB, Subtarget);
9144 case TargetOpcode::STATEPOINT:
9145 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
9146 // while bl call instruction (where statepoint will be lowered at the
9147 // end) has implicit def. This def is early-clobber as it will be set at
9148 // the moment of the call and earlier than any use is read.
9149 // Add this implicit dead def here as a workaround.
9150 MI.addOperand(*MI.getMF(),
9152 LoongArch::R1, /*isDef*/ true,
9153 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
9154 /*isUndef*/ false, /*isEarlyClobber*/ true));
9155 if (!Subtarget.is64Bit())
9156 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
9157 return emitPatchPoint(MI, BB);
9158 case LoongArch::PROBED_STACKALLOC_DYN:
9159 return emitDynamicProbedAlloc(MI, BB);
9160 }
9161}
9162
9164 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
9165 unsigned *Fast) const {
9166 if (!Subtarget.hasUAL())
9167 return false;
9168
9169 // TODO: set reasonable speed number.
9170 if (Fast)
9171 *Fast = 1;
9172 return true;
9173}
9174
9175//===----------------------------------------------------------------------===//
9176// Calling Convention Implementation
9177//===----------------------------------------------------------------------===//
9178
9179// Eight general-purpose registers a0-a7 used for passing integer arguments,
9180// with a0-a1 reused to return values. Generally, the GPRs are used to pass
9181// fixed-point arguments, and floating-point arguments when no FPR is available
9182// or with soft float ABI.
9183const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
9184 LoongArch::R7, LoongArch::R8, LoongArch::R9,
9185 LoongArch::R10, LoongArch::R11};
9186
9187// PreserveNone calling convention:
9188// Arguments may be passed in any general-purpose registers except:
9189// - R1 : return address register
9190// - R22 : frame pointer
9191// - R31 : base pointer
9192//
9193// All general-purpose registers are treated as caller-saved,
9194// except R1 (RA) and R22 (FP).
9195//
9196// Non-volatile registers are allocated first so that a function
9197// can call normal functions without having to spill and reload
9198// argument registers.
9200 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
9201 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
9202 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
9203 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
9204 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
9205 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
9206 LoongArch::R20};
9207
9208// Eight floating-point registers fa0-fa7 used for passing floating-point
9209// arguments, and fa0-fa1 are also used to return values.
9210const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
9211 LoongArch::F3, LoongArch::F4, LoongArch::F5,
9212 LoongArch::F6, LoongArch::F7};
9213// FPR32 and FPR64 alias each other.
9215 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
9216 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
9217
9218const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
9219 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
9220 LoongArch::VR6, LoongArch::VR7};
9221
9222const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
9223 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
9224 LoongArch::XR6, LoongArch::XR7};
9225
9227 switch (State.getCallingConv()) {
9229 if (!State.isVarArg())
9230 return State.AllocateReg(PreserveNoneArgGPRs);
9231 [[fallthrough]];
9232 default:
9233 return State.AllocateReg(ArgGPRs);
9234 }
9235}
9236
9237// Pass a 2*GRLen argument that has been split into two GRLen values through
9238// registers or the stack as necessary.
9239static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
9240 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
9241 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
9242 ISD::ArgFlagsTy ArgFlags2) {
9243 unsigned GRLenInBytes = GRLen / 8;
9244 if (Register Reg = allocateArgGPR(State)) {
9245 // At least one half can be passed via register.
9246 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
9247 VA1.getLocVT(), CCValAssign::Full));
9248 } else {
9249 // Both halves must be passed on the stack, with proper alignment.
9250 Align StackAlign =
9251 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
9252 State.addLoc(
9254 State.AllocateStack(GRLenInBytes, StackAlign),
9255 VA1.getLocVT(), CCValAssign::Full));
9256 State.addLoc(CCValAssign::getMem(
9257 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
9258 LocVT2, CCValAssign::Full));
9259 return false;
9260 }
9261 if (Register Reg = allocateArgGPR(State)) {
9262 // The second half can also be passed via register.
9263 State.addLoc(
9264 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
9265 } else {
9266 // The second half is passed via the stack, without additional alignment.
9267 State.addLoc(CCValAssign::getMem(
9268 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
9269 LocVT2, CCValAssign::Full));
9270 }
9271 return false;
9272}
9273
9274// Implements the LoongArch calling convention. Returns true upon failure.
9276 unsigned ValNo, MVT ValVT,
9277 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
9278 CCState &State, bool IsRet, Type *OrigTy) {
9279 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
9280 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
9281 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
9282 MVT LocVT = ValVT;
9283
9284 // Any return value split into more than two values can't be returned
9285 // directly.
9286 if (IsRet && ValNo > 1)
9287 return true;
9288
9289 // If passing a variadic argument, or if no FPR is available.
9290 bool UseGPRForFloat = true;
9291
9292 switch (ABI) {
9293 default:
9294 llvm_unreachable("Unexpected ABI");
9295 break;
9300 UseGPRForFloat = ArgFlags.isVarArg();
9301 break;
9304 break;
9305 }
9306
9307 // If this is a variadic argument, the LoongArch calling convention requires
9308 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
9309 // byte alignment. An aligned register should be used regardless of whether
9310 // the original argument was split during legalisation or not. The argument
9311 // will not be passed by registers if the original type is larger than
9312 // 2*GRLen, so the register alignment rule does not apply.
9313 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
9314 if (ArgFlags.isVarArg() &&
9315 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
9316 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
9317 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
9318 // Skip 'odd' register if necessary.
9319 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
9320 State.AllocateReg(ArgGPRs);
9321 }
9322
9323 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
9324 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
9325 State.getPendingArgFlags();
9326
9327 assert(PendingLocs.size() == PendingArgFlags.size() &&
9328 "PendingLocs and PendingArgFlags out of sync");
9329
9330 // FPR32 and FPR64 alias each other.
9331 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
9332 UseGPRForFloat = true;
9333
9334 if (UseGPRForFloat && ValVT == MVT::f32) {
9335 LocVT = GRLenVT;
9336 LocInfo = CCValAssign::BCvt;
9337 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
9338 LocVT = MVT::i64;
9339 LocInfo = CCValAssign::BCvt;
9340 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
9341 // Handle passing f64 on LA32D with a soft float ABI or when floating point
9342 // registers are exhausted.
9343 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
9344 // Depending on available argument GPRS, f64 may be passed in a pair of
9345 // GPRs, split between a GPR and the stack, or passed completely on the
9346 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
9347 // cases.
9348 MCRegister Reg = allocateArgGPR(State);
9349 if (!Reg) {
9350 int64_t StackOffset = State.AllocateStack(8, Align(8));
9351 State.addLoc(
9352 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9353 return false;
9354 }
9355 LocVT = MVT::i32;
9356 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9357 MCRegister HiReg = allocateArgGPR(State);
9358 if (HiReg) {
9359 State.addLoc(
9360 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
9361 } else {
9362 int64_t StackOffset = State.AllocateStack(4, Align(4));
9363 State.addLoc(
9364 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9365 }
9366 return false;
9367 }
9368
9369 // Split arguments might be passed indirectly, so keep track of the pending
9370 // values.
9371 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
9372 LocVT = GRLenVT;
9373 LocInfo = CCValAssign::Indirect;
9374 PendingLocs.push_back(
9375 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
9376 PendingArgFlags.push_back(ArgFlags);
9377 if (!ArgFlags.isSplitEnd()) {
9378 return false;
9379 }
9380 }
9381
9382 // If the split argument only had two elements, it should be passed directly
9383 // in registers or on the stack.
9384 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
9385 PendingLocs.size() <= 2) {
9386 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
9387 // Apply the normal calling convention rules to the first half of the
9388 // split argument.
9389 CCValAssign VA = PendingLocs[0];
9390 ISD::ArgFlagsTy AF = PendingArgFlags[0];
9391 PendingLocs.clear();
9392 PendingArgFlags.clear();
9393 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
9394 ArgFlags);
9395 }
9396
9397 // Allocate to a register if possible, or else a stack slot.
9398 Register Reg;
9399 unsigned StoreSizeBytes = GRLen / 8;
9400 Align StackAlign = Align(GRLen / 8);
9401
9402 if (ValVT == MVT::f32 && !UseGPRForFloat) {
9403 Reg = State.AllocateReg(ArgFPR32s);
9404 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
9405 Reg = State.AllocateReg(ArgFPR64s);
9406 } else if (ValVT.is128BitVector()) {
9407 Reg = State.AllocateReg(ArgVRs);
9408 UseGPRForFloat = false;
9409 StoreSizeBytes = 16;
9410 StackAlign = Align(16);
9411 } else if (ValVT.is256BitVector()) {
9412 Reg = State.AllocateReg(ArgXRs);
9413 UseGPRForFloat = false;
9414 StoreSizeBytes = 32;
9415 StackAlign = Align(32);
9416 } else {
9417 Reg = allocateArgGPR(State);
9418 }
9419
9420 unsigned StackOffset =
9421 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
9422
9423 // If we reach this point and PendingLocs is non-empty, we must be at the
9424 // end of a split argument that must be passed indirectly.
9425 if (!PendingLocs.empty()) {
9426 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
9427 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
9428 for (auto &It : PendingLocs) {
9429 if (Reg)
9430 It.convertToReg(Reg);
9431 else
9432 It.convertToMem(StackOffset);
9433 State.addLoc(It);
9434 }
9435 PendingLocs.clear();
9436 PendingArgFlags.clear();
9437 return false;
9438 }
9439 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
9440 "Expected an GRLenVT at this stage");
9441
9442 if (Reg) {
9443 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9444 return false;
9445 }
9446
9447 // When a floating-point value is passed on the stack, no bit-cast is needed.
9448 if (ValVT.isFloatingPoint()) {
9449 LocVT = ValVT;
9450 LocInfo = CCValAssign::Full;
9451 }
9452
9453 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9454 return false;
9455}
9456
9457void LoongArchTargetLowering::analyzeInputArgs(
9458 MachineFunction &MF, CCState &CCInfo,
9459 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
9460 LoongArchCCAssignFn Fn) const {
9461 FunctionType *FType = MF.getFunction().getFunctionType();
9462 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
9463 MVT ArgVT = Ins[i].VT;
9464 Type *ArgTy = nullptr;
9465 if (IsRet)
9466 ArgTy = FType->getReturnType();
9467 else if (Ins[i].isOrigArg())
9468 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
9470 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9471 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
9472 CCInfo, IsRet, ArgTy)) {
9473 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
9474 << '\n');
9475 llvm_unreachable("");
9476 }
9477 }
9478}
9479
9480void LoongArchTargetLowering::analyzeOutputArgs(
9481 MachineFunction &MF, CCState &CCInfo,
9482 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
9483 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
9484 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9485 MVT ArgVT = Outs[i].VT;
9486 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
9488 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9489 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
9490 CCInfo, IsRet, OrigTy)) {
9491 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
9492 << "\n");
9493 llvm_unreachable("");
9494 }
9495 }
9496}
9497
9498// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
9499// values.
9501 const CCValAssign &VA, const SDLoc &DL) {
9502 switch (VA.getLocInfo()) {
9503 default:
9504 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9505 case CCValAssign::Full:
9507 break;
9508 case CCValAssign::BCvt:
9509 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9510 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
9511 else
9512 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
9513 break;
9514 }
9515 return Val;
9516}
9517
9519 const CCValAssign &VA, const SDLoc &DL,
9520 const ISD::InputArg &In,
9521 const LoongArchTargetLowering &TLI) {
9524 EVT LocVT = VA.getLocVT();
9525 SDValue Val;
9526 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
9527 Register VReg = RegInfo.createVirtualRegister(RC);
9528 RegInfo.addLiveIn(VA.getLocReg(), VReg);
9529 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
9530
9531 // If input is sign extended from 32 bits, note it for the OptW pass.
9532 if (In.isOrigArg()) {
9533 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
9534 if (OrigArg->getType()->isIntegerTy()) {
9535 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
9536 // An input zero extended from i31 can also be considered sign extended.
9537 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
9538 (BitWidth < 32 && In.Flags.isZExt())) {
9541 LAFI->addSExt32Register(VReg);
9542 }
9543 }
9544 }
9545
9546 return convertLocVTToValVT(DAG, Val, VA, DL);
9547}
9548
9549// The caller is responsible for loading the full value if the argument is
9550// passed with CCValAssign::Indirect.
9552 const CCValAssign &VA, const SDLoc &DL) {
9554 MachineFrameInfo &MFI = MF.getFrameInfo();
9555 EVT ValVT = VA.getValVT();
9556 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
9557 /*IsImmutable=*/true);
9558 SDValue FIN = DAG.getFrameIndex(
9560
9561 ISD::LoadExtType ExtType;
9562 switch (VA.getLocInfo()) {
9563 default:
9564 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9565 case CCValAssign::Full:
9567 case CCValAssign::BCvt:
9568 ExtType = ISD::NON_EXTLOAD;
9569 break;
9570 }
9571 return DAG.getExtLoad(
9572 ExtType, DL, VA.getLocVT(), Chain, FIN,
9574}
9575
9577 const CCValAssign &VA,
9578 const CCValAssign &HiVA,
9579 const SDLoc &DL) {
9580 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9581 "Unexpected VA");
9583 MachineFrameInfo &MFI = MF.getFrameInfo();
9585
9586 assert(VA.isRegLoc() && "Expected register VA assignment");
9587
9588 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9589 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
9590 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
9591 SDValue Hi;
9592 if (HiVA.isMemLoc()) {
9593 // Second half of f64 is passed on the stack.
9594 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
9595 /*IsImmutable=*/true);
9596 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9597 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
9599 } else {
9600 // Second half of f64 is passed in another GPR.
9601 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9602 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
9603 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
9604 }
9605 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
9606}
9607
9609 const CCValAssign &VA, const SDLoc &DL) {
9610 EVT LocVT = VA.getLocVT();
9611
9612 switch (VA.getLocInfo()) {
9613 default:
9614 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9615 case CCValAssign::Full:
9616 break;
9617 case CCValAssign::BCvt:
9618 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9619 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
9620 else
9621 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
9622 break;
9623 }
9624 return Val;
9625}
9626
9627static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9628 CCValAssign::LocInfo LocInfo,
9629 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
9630 CCState &State) {
9631 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9632 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
9633 // s0 s1 s2 s3 s4 s5 s6 s7 s8
9634 static const MCPhysReg GPRList[] = {
9635 LoongArch::R23, LoongArch::R24, LoongArch::R25,
9636 LoongArch::R26, LoongArch::R27, LoongArch::R28,
9637 LoongArch::R29, LoongArch::R30, LoongArch::R31};
9638 if (MCRegister Reg = State.AllocateReg(GPRList)) {
9639 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9640 return false;
9641 }
9642 }
9643
9644 if (LocVT == MVT::f32) {
9645 // Pass in STG registers: F1, F2, F3, F4
9646 // fs0,fs1,fs2,fs3
9647 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
9648 LoongArch::F26, LoongArch::F27};
9649 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
9650 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9651 return false;
9652 }
9653 }
9654
9655 if (LocVT == MVT::f64) {
9656 // Pass in STG registers: D1, D2, D3, D4
9657 // fs4,fs5,fs6,fs7
9658 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
9659 LoongArch::F30_64, LoongArch::F31_64};
9660 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
9661 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9662 return false;
9663 }
9664 }
9665
9666 report_fatal_error("No registers left in GHC calling convention");
9667 return true;
9668}
9669
9670// Transform physical registers into virtual registers.
9672 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9673 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9674 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9675
9677
9678 switch (CallConv) {
9679 default:
9680 llvm_unreachable("Unsupported calling convention");
9681 case CallingConv::C:
9682 case CallingConv::Fast:
9685 break;
9686 case CallingConv::GHC:
9687 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
9688 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
9690 "GHC calling convention requires the F and D extensions");
9691 }
9692
9693 const Function &Func = MF.getFunction();
9694 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9695 MVT GRLenVT = Subtarget.getGRLenVT();
9696 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
9697
9698 // Check if this function has any musttail calls. If so, incoming indirect
9699 // arg pointers must be saved in virtual registers so they survive across
9700 // basic blocks (the SelectionDAG is cleared between BBs). Only do this
9701 // when needed to avoid adding register pressure to non-musttail functions.
9702 bool HasMusttail = llvm::any_of(Func, [](const BasicBlock &BB) {
9703 return llvm::any_of(BB, [](const Instruction &I) {
9704 if (const auto *CI = dyn_cast<CallInst>(&I))
9705 return CI->isMustTailCall();
9706 return false;
9707 });
9708 });
9709 // Used with varargs to acumulate store chains.
9710 std::vector<SDValue> OutChains;
9711
9712 // Assign locations to all of the incoming arguments.
9714 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9715
9716 if (CallConv == CallingConv::GHC)
9718 else
9719 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
9720
9721 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
9722 CCValAssign &VA = ArgLocs[i];
9723 SDValue ArgValue;
9724 // Passing f64 on LA32D with a soft float ABI must be handled as a special
9725 // case.
9726 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9727 assert(VA.needsCustom());
9728 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
9729 } else if (VA.isRegLoc())
9730 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
9731 else
9732 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9733 if (VA.getLocInfo() == CCValAssign::Indirect) {
9734 // If the original argument was split and passed by reference, we need to
9735 // load all parts of it here (using the same address).
9736 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9738 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
9739 if (HasMusttail) {
9742 Register VReg =
9743 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
9744 Chain = DAG.getCopyToReg(Chain, DL, VReg, ArgValue);
9745 LAFI->setIncomingIndirectArg(ArgIndex, VReg);
9746 }
9747 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
9748 assert(ArgPartOffset == 0);
9749 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
9750 CCValAssign &PartVA = ArgLocs[i + 1];
9751 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
9752 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9753 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9754 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9756 ++i;
9757 ++InsIdx;
9758 }
9759 continue;
9760 }
9761 InVals.push_back(ArgValue);
9762 }
9763
9764 if (IsVarArg) {
9766 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9767 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
9768 MachineFrameInfo &MFI = MF.getFrameInfo();
9769 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9770 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
9771
9772 // Offset of the first variable argument from stack pointer, and size of
9773 // the vararg save area. For now, the varargs save area is either zero or
9774 // large enough to hold a0-a7.
9775 int VaArgOffset, VarArgsSaveSize;
9776
9777 // If all registers are allocated, then all varargs must be passed on the
9778 // stack and we don't need to save any argregs.
9779 if (ArgRegs.size() == Idx) {
9780 VaArgOffset = CCInfo.getStackSize();
9781 VarArgsSaveSize = 0;
9782 } else {
9783 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
9784 VaArgOffset = -VarArgsSaveSize;
9785 }
9786
9787 // Record the frame index of the first variable argument
9788 // which is a value necessary to VASTART.
9789 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9790 LoongArchFI->setVarArgsFrameIndex(FI);
9791
9792 // If saving an odd number of registers then create an extra stack slot to
9793 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
9794 // offsets to even-numbered registered remain 2*GRLen-aligned.
9795 if (Idx % 2) {
9796 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
9797 true);
9798 VarArgsSaveSize += GRLenInBytes;
9799 }
9800
9801 // Copy the integer registers that may have been used for passing varargs
9802 // to the vararg save area.
9803 for (unsigned I = Idx; I < ArgRegs.size();
9804 ++I, VaArgOffset += GRLenInBytes) {
9805 const Register Reg = RegInfo.createVirtualRegister(RC);
9806 RegInfo.addLiveIn(ArgRegs[I], Reg);
9807 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
9808 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9809 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9810 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9812 cast<StoreSDNode>(Store.getNode())
9813 ->getMemOperand()
9814 ->setValue((Value *)nullptr);
9815 OutChains.push_back(Store);
9816 }
9817 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
9818 }
9819
9820 // All stores are grouped in one node to allow the matching between
9821 // the size of Ins and InVals. This only happens for vararg functions.
9822 if (!OutChains.empty()) {
9823 OutChains.push_back(Chain);
9824 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9825 }
9826
9827 return Chain;
9828}
9829
9831 return CI->isTailCall();
9832}
9833
9834// Check if the return value is used as only a return value, as otherwise
9835// we can't perform a tail-call.
9837 SDValue &Chain) const {
9838 if (N->getNumValues() != 1)
9839 return false;
9840 if (!N->hasNUsesOfValue(1, 0))
9841 return false;
9842
9843 SDNode *Copy = *N->user_begin();
9844 if (Copy->getOpcode() != ISD::CopyToReg)
9845 return false;
9846
9847 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
9848 // isn't safe to perform a tail call.
9849 if (Copy->getGluedNode())
9850 return false;
9851
9852 // The copy must be used by a LoongArchISD::RET, and nothing else.
9853 bool HasRet = false;
9854 for (SDNode *Node : Copy->users()) {
9855 if (Node->getOpcode() != LoongArchISD::RET)
9856 return false;
9857 HasRet = true;
9858 }
9859
9860 if (!HasRet)
9861 return false;
9862
9863 Chain = Copy->getOperand(0);
9864 return true;
9865}
9866
9867// Check whether the call is eligible for tail call optimization.
9868bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
9869 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9870 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
9871
9872 auto CalleeCC = CLI.CallConv;
9873 auto &Outs = CLI.Outs;
9874 auto &Caller = MF.getFunction();
9875 auto CallerCC = Caller.getCallingConv();
9876
9877 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
9878
9879 // Byval parameters hand the function a pointer directly into the stack area
9880 // we want to reuse during a tail call. Working around this *is* possible
9881 // but less efficient and uglier in LowerCall. For musttail, there is no
9882 // workaround today: a byval arg requires a local copy that becomes invalid
9883 // after the tail call deallocates the caller's frame, so rejecting here
9884 // (and triggering reportFatalInternalError in LowerCall) is safer than
9885 // miscompiling.
9886 for (auto &Arg : Outs)
9887 if (Arg.Flags.isByVal())
9888 return false;
9889
9890 // musttail bypasses the remaining checks: the checks either reject cases
9891 // we handle specially (indirect args are forwarded via incoming pointers,
9892 // stack-passed args reuse the matching incoming layout, sret is forwarded
9893 // like any other pointer arg) or are optimizations not applicable to
9894 // mandatory tail calls.
9895 if (IsMustTail)
9896 return true;
9897
9898 // Do not tail call opt if the stack is used to pass parameters.
9899 if (CCInfo.getStackSize() != 0)
9900 return false;
9901
9902 // Do not tail call opt if any parameters need to be passed indirectly.
9903 for (auto &VA : ArgLocs)
9904 if (VA.getLocInfo() == CCValAssign::Indirect)
9905 return false;
9906
9907 // Do not tail call opt if either caller or callee uses struct return
9908 // semantics.
9909 auto IsCallerStructRet = Caller.hasStructRetAttr();
9910 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
9911 if (IsCallerStructRet || IsCalleeStructRet)
9912 return false;
9913
9914 // The callee has to preserve all registers the caller needs to preserve.
9915 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
9916 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
9917 if (CalleeCC != CallerCC) {
9918 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
9919 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9920 return false;
9921 }
9922 return true;
9923}
9924
9926 return DAG.getDataLayout().getPrefTypeAlign(
9927 VT.getTypeForEVT(*DAG.getContext()));
9928}
9929
9930// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
9931// and output parameter nodes.
9932SDValue
9934 SmallVectorImpl<SDValue> &InVals) const {
9935 SelectionDAG &DAG = CLI.DAG;
9936 SDLoc &DL = CLI.DL;
9938 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
9940 SDValue Chain = CLI.Chain;
9941 SDValue Callee = CLI.Callee;
9942 CallingConv::ID CallConv = CLI.CallConv;
9943 bool IsVarArg = CLI.IsVarArg;
9944 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9945 MVT GRLenVT = Subtarget.getGRLenVT();
9946 bool &IsTailCall = CLI.IsTailCall;
9947
9949
9950 // Analyze the operands of the call, assigning locations to each operand.
9952 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9953
9954 if (CallConv == CallingConv::GHC)
9955 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
9956 else
9957 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
9958
9959 // Check if it's really possible to do a tail call.
9960 if (IsTailCall)
9961 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
9962
9963 if (IsTailCall)
9964 ++NumTailCalls;
9965 else if (CLI.CB && CLI.CB->isMustTailCall())
9966 report_fatal_error("failed to perform tail call elimination on a call "
9967 "site marked musttail");
9968
9969 // Get a count of how many bytes are to be pushed on the stack.
9970 unsigned NumBytes = ArgCCInfo.getStackSize();
9971
9972 // Create local copies for byval args.
9973 SmallVector<SDValue> ByValArgs;
9974 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9975 ISD::ArgFlagsTy Flags = Outs[i].Flags;
9976 if (!Flags.isByVal())
9977 continue;
9978
9979 SDValue Arg = OutVals[i];
9980 unsigned Size = Flags.getByValSize();
9981 Align Alignment = Flags.getNonZeroByValAlign();
9982
9983 int FI =
9984 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
9985 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9986 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
9987
9988 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, Alignment,
9989 /*IsVolatile=*/false,
9990 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
9992 ByValArgs.push_back(FIPtr);
9993 }
9994
9995 if (!IsTailCall)
9996 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
9997
9998 // Copy argument values to their designated locations.
10000 SmallVector<SDValue> MemOpChains;
10001 SDValue StackPtr;
10002 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
10003 ++i, ++OutIdx) {
10004 CCValAssign &VA = ArgLocs[i];
10005 SDValue ArgValue = OutVals[OutIdx];
10006 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
10007
10008 // Handle passing f64 on LA32D with a soft float ABI as a special case.
10009 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10010 assert(VA.isRegLoc() && "Expected register VA assignment");
10011 assert(VA.needsCustom());
10012 SDValue SplitF64 =
10013 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
10014 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
10015 SDValue Lo = SplitF64.getValue(0);
10016 SDValue Hi = SplitF64.getValue(1);
10017
10018 Register RegLo = VA.getLocReg();
10019 RegsToPass.push_back(std::make_pair(RegLo, Lo));
10020
10021 // Get the CCValAssign for the Hi part.
10022 CCValAssign &HiVA = ArgLocs[++i];
10023
10024 if (HiVA.isMemLoc()) {
10025 // Second half of f64 is passed on the stack.
10026 if (!StackPtr.getNode())
10027 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
10029 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
10030 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
10031 // Emit the store.
10032 MemOpChains.push_back(DAG.getStore(
10033 Chain, DL, Hi, Address,
10035 } else {
10036 // Second half of f64 is passed in another GPR.
10037 Register RegHigh = HiVA.getLocReg();
10038 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
10039 }
10040 continue;
10041 }
10042
10043 // Promote the value if needed.
10044 // For now, only handle fully promoted and indirect arguments.
10045 if (VA.getLocInfo() == CCValAssign::Indirect) {
10046 // For musttail calls, reuse incoming indirect pointers instead of
10047 // creating new stack temporaries. The incoming pointers point to the
10048 // caller's caller's frame, which remains valid after a tail call.
10049 if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
10052 unsigned CallArgIdx = Outs[OutIdx].OrigArgIndex;
10053
10054 // Resolve which formal parameter is being passed at this call
10055 // position.
10056 //
10057 // FIXME: Ins[].OrigArgIndex is Argument::getArgNo() (unfiltered),
10058 // but Outs[].OrigArgIndex is an index into a filtered arg list
10059 // (empty types removed, via CallLoweringInfo in the target-
10060 // independent layer). IncomingIndirectArgs is keyed by the
10061 // caller's unfiltered Argument::getArgNo(), so we have to walk
10062 // the caller's formals (same filter) to translate the index.
10063 // This target-independent asymmetry should be normalized so
10064 // backends do not need to re-derive the mapping.
10065 //
10066 // Steps:
10067 // 1. Find the call operand at filtered position CallArgIdx.
10068 // 2. If it is an Argument, use getArgNo() directly (same filter
10069 // for caller formals and call operands).
10070 // 3. Otherwise (computed value), walk the caller's formals and
10071 // skip empty types to map the filtered index to getArgNo().
10072 const Argument *FormalArg = nullptr;
10073 unsigned FilteredIdx = 0;
10074 for (const auto &CallArg : CLI.CB->args()) {
10075 if (CallArg->getType()->isEmptyTy())
10076 continue;
10077 if (FilteredIdx == CallArgIdx) {
10078 FormalArg = dyn_cast<Argument>(CallArg);
10079 break;
10080 }
10081 ++FilteredIdx;
10082 }
10083
10084 // For forwarded args, getArgNo() gives the unfiltered index directly.
10085 // For computed args, walk the caller's formals to resolve it.
10086 unsigned FormalArgIdx = CallArgIdx;
10087 if (FormalArg) {
10088 FormalArgIdx = FormalArg->getArgNo();
10089 } else {
10090 FilteredIdx = 0;
10091 for (const auto &Arg : MF.getFunction().args()) {
10092 if (Arg.getType()->isEmptyTy())
10093 continue;
10094 if (FilteredIdx == CallArgIdx) {
10095 FormalArgIdx = Arg.getArgNo();
10096 break;
10097 }
10098 ++FilteredIdx;
10099 }
10100 }
10101
10102 Register VReg = LAFI->getIncomingIndirectArg(FormalArgIdx);
10103 SDValue CopyOp = DAG.getCopyFromReg(Chain, DL, VReg, PtrVT);
10104 // Thread the CopyFromReg output chain through MemOpChains so the
10105 // TokenFactor below sequences the copy with any stores we emit
10106 // for this argument.
10107 MemOpChains.push_back(CopyOp.getValue(1));
10108 SDValue IncomingPtr = CopyOp;
10109
10110 if (!FormalArg) {
10111 // Computed value: store into the incoming indirect pointer for the
10112 // same-position formal parameter (musttail guarantees matching
10113 // prototypes, so types match). The pointer survives the tail call
10114 // since it points to the caller's caller's frame.
10115 //
10116 // The data-flow edge through IncomingPtr already prevents the
10117 // store from being scheduled before the CopyFromReg. Threading
10118 // CopyOp.getValue(1) (the copy's output chain) into the store
10119 // makes that ordering explicit on the chain edge as well, which
10120 // is the convention for memory ops chaining off their producers.
10121 MemOpChains.push_back(
10122 DAG.getStore(CopyOp.getValue(1), DL, ArgValue, IncomingPtr,
10124 // Store any split parts at their respective offsets.
10125 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
10126 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
10127 SDValue PartValue = OutVals[OutIdx + 1];
10128 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
10129 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
10130 SDValue Addr =
10131 DAG.getNode(ISD::ADD, DL, PtrVT, IncomingPtr, Offset);
10132 MemOpChains.push_back(
10133 DAG.getStore(CopyOp.getValue(1), DL, PartValue, Addr,
10135 ++i;
10136 ++OutIdx;
10137 }
10138 }
10139 ArgValue = IncomingPtr;
10140
10141 // Skip any remaining split parts (for forwarded args, they are
10142 // covered by the forwarded pointer).
10143 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
10144 ++i;
10145 ++OutIdx;
10146 }
10147 } else {
10148 // Store the argument in a stack slot and pass its address.
10149 Align StackAlign =
10150 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
10151 getPrefTypeAlign(ArgValue.getValueType(), DAG));
10152 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
10153 // If the original argument was split and passed by reference, we need
10154 // to store the required parts of it here (and pass just one address).
10155 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
10156 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
10157 assert(ArgPartOffset == 0);
10158 // Calculate the total size to store. We don't have access to what we're
10159 // actually storing other than performing the loop and collecting the
10160 // info.
10162 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
10163 SDValue PartValue = OutVals[OutIdx + 1];
10164 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
10165 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
10166 EVT PartVT = PartValue.getValueType();
10167 StoredSize += PartVT.getStoreSize();
10168 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
10169 Parts.push_back(std::make_pair(PartValue, Offset));
10170 ++i;
10171 ++OutIdx;
10172 }
10173 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
10174 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
10175 MemOpChains.push_back(
10176 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
10178 for (const auto &Part : Parts) {
10179 SDValue PartValue = Part.first;
10180 SDValue PartOffset = Part.second;
10182 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
10183 MemOpChains.push_back(
10184 DAG.getStore(Chain, DL, PartValue, Address,
10186 }
10187 ArgValue = SpillSlot;
10188 }
10189 } else {
10190 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
10191 }
10192
10193 // Use local copy if it is a byval arg.
10194 if (Flags.isByVal())
10195 ArgValue = ByValArgs[j++];
10196
10197 if (VA.isRegLoc()) {
10198 // Queue up the argument copies and emit them at the end.
10199 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
10200 } else {
10201 assert(VA.isMemLoc() && "Argument not register or memory");
10202 assert((!IsTailCall || (CLI.CB && CLI.CB->isMustTailCall())) &&
10203 "Tail call not allowed if stack is used for passing parameters");
10204
10205 // Work out the address of the stack slot.
10206 if (!StackPtr.getNode())
10207 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
10209 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
10211
10212 // Emit the store.
10213 MemOpChains.push_back(
10214 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
10215 }
10216 }
10217
10218 // Join the stores, which are independent of one another.
10219 if (!MemOpChains.empty())
10220 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
10221
10222 SDValue Glue;
10223
10224 // Build a sequence of copy-to-reg nodes, chained and glued together.
10225 for (auto &Reg : RegsToPass) {
10226 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
10227 Glue = Chain.getValue(1);
10228 }
10229
10230 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
10231 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
10232 // split it and then direct call can be matched by PseudoCALL_SMALL.
10234 const GlobalValue *GV = S->getGlobal();
10235 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
10238 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
10239 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
10240 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
10243 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
10244 }
10245
10246 // The first call operand is the chain and the second is the target address.
10248 Ops.push_back(Chain);
10249 Ops.push_back(Callee);
10250
10251 // Add argument registers to the end of the list so that they are
10252 // known live into the call.
10253 for (auto &Reg : RegsToPass)
10254 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
10255
10256 if (!IsTailCall) {
10257 // Add a register mask operand representing the call-preserved registers.
10258 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
10259 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
10260 assert(Mask && "Missing call preserved mask for calling convention");
10261 Ops.push_back(DAG.getRegisterMask(Mask));
10262 }
10263
10264 // Glue the call to the argument copies, if any.
10265 if (Glue.getNode())
10266 Ops.push_back(Glue);
10267
10268 // Emit the call.
10269 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10270 unsigned Op;
10271 switch (DAG.getTarget().getCodeModel()) {
10272 default:
10273 report_fatal_error("Unsupported code model");
10274 case CodeModel::Small:
10275 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
10276 break;
10277 case CodeModel::Medium:
10278 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
10279 break;
10280 case CodeModel::Large:
10281 assert(Subtarget.is64Bit() && "Large code model requires LA64");
10282 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
10283 break;
10284 }
10285
10286 if (IsTailCall) {
10288 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
10289 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
10290 return Ret;
10291 }
10292
10293 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
10294 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
10295 Glue = Chain.getValue(1);
10296
10297 // Mark the end of the call, which is glued to the call itself.
10298 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
10299 Glue = Chain.getValue(1);
10300
10301 // Assign locations to each value returned by this call.
10303 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
10304 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
10305
10306 // Copy all of the result registers out of their specified physreg.
10307 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
10308 auto &VA = RVLocs[i];
10309 // Copy the value out.
10310 SDValue RetValue =
10311 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
10312 // Glue the RetValue to the end of the call sequence.
10313 Chain = RetValue.getValue(1);
10314 Glue = RetValue.getValue(2);
10315
10316 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10317 assert(VA.needsCustom());
10318 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
10319 MVT::i32, Glue);
10320 Chain = RetValue2.getValue(1);
10321 Glue = RetValue2.getValue(2);
10322 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
10323 RetValue, RetValue2);
10324 } else
10325 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
10326
10327 InVals.push_back(RetValue);
10328 }
10329
10330 return Chain;
10331}
10332
10334 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
10335 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
10336 const Type *RetTy) const {
10338 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
10339
10340 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10341 LoongArchABI::ABI ABI =
10342 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
10343 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
10344 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
10345 return false;
10346 }
10347 return true;
10348}
10349
10351 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
10353 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
10354 SelectionDAG &DAG) const {
10355 // Stores the assignment of the return value to a location.
10357
10358 // Info about the registers and stack slot.
10359 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
10360 *DAG.getContext());
10361
10362 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
10363 nullptr, CC_LoongArch);
10364 if (CallConv == CallingConv::GHC && !RVLocs.empty())
10365 report_fatal_error("GHC functions return void only");
10366 SDValue Glue;
10367 SmallVector<SDValue, 4> RetOps(1, Chain);
10368
10369 // Copy the result values into the output registers.
10370 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
10371 SDValue Val = OutVals[OutIdx];
10372 CCValAssign &VA = RVLocs[i];
10373 assert(VA.isRegLoc() && "Can only return in registers!");
10374
10375 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10376 // Handle returning f64 on LA32D with a soft float ABI.
10377 assert(VA.isRegLoc() && "Expected return via registers");
10378 assert(VA.needsCustom());
10379 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
10380 DAG.getVTList(MVT::i32, MVT::i32), Val);
10381 SDValue Lo = SplitF64.getValue(0);
10382 SDValue Hi = SplitF64.getValue(1);
10383 Register RegLo = VA.getLocReg();
10384 Register RegHi = RVLocs[++i].getLocReg();
10385
10386 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
10387 Glue = Chain.getValue(1);
10388 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
10389 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
10390 Glue = Chain.getValue(1);
10391 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
10392 } else {
10393 // Handle a 'normal' return.
10394 Val = convertValVTToLocVT(DAG, Val, VA, DL);
10395 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
10396
10397 // Guarantee that all emitted copies are stuck together.
10398 Glue = Chain.getValue(1);
10399 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
10400 }
10401 }
10402
10403 RetOps[0] = Chain; // Update chain.
10404
10405 // Add the glue node if we have it.
10406 if (Glue.getNode())
10407 RetOps.push_back(Glue);
10408
10409 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
10410}
10411
10412// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
10413// Note: The following prefixes are excluded:
10414// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
10415// as they can be represented using [x]vrepli.[whb]
10417 const APInt &SplatValue, const unsigned SplatBitSize) const {
10418 uint64_t RequiredImm = 0;
10419 uint64_t V = SplatValue.getZExtValue();
10420 if (SplatBitSize == 16 && !(V & 0x00FF)) {
10421 // 4'b0101
10422 RequiredImm = (0b10101 << 8) | (V >> 8);
10423 return {true, RequiredImm};
10424 } else if (SplatBitSize == 32) {
10425 // 4'b0001
10426 if (!(V & 0xFFFF00FF)) {
10427 RequiredImm = (0b10001 << 8) | (V >> 8);
10428 return {true, RequiredImm};
10429 }
10430 // 4'b0010
10431 if (!(V & 0xFF00FFFF)) {
10432 RequiredImm = (0b10010 << 8) | (V >> 16);
10433 return {true, RequiredImm};
10434 }
10435 // 4'b0011
10436 if (!(V & 0x00FFFFFF)) {
10437 RequiredImm = (0b10011 << 8) | (V >> 24);
10438 return {true, RequiredImm};
10439 }
10440 // 4'b0110
10441 if ((V & 0xFFFF00FF) == 0xFF) {
10442 RequiredImm = (0b10110 << 8) | (V >> 8);
10443 return {true, RequiredImm};
10444 }
10445 // 4'b0111
10446 if ((V & 0xFF00FFFF) == 0xFFFF) {
10447 RequiredImm = (0b10111 << 8) | (V >> 16);
10448 return {true, RequiredImm};
10449 }
10450 // 4'b1010
10451 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
10452 RequiredImm =
10453 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10454 return {true, RequiredImm};
10455 }
10456 } else if (SplatBitSize == 64) {
10457 // 4'b1011
10458 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
10459 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
10460 RequiredImm =
10461 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10462 return {true, RequiredImm};
10463 }
10464 // 4'b1100
10465 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
10466 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
10467 RequiredImm =
10468 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
10469 return {true, RequiredImm};
10470 }
10471 // 4'b1001
10472 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
10473 uint8_t res = 0;
10474 for (int i = 0; i < 8; ++i) {
10475 uint8_t byte = x & 0xFF;
10476 if (byte == 0 || byte == 0xFF)
10477 res |= ((byte & 1) << i);
10478 else
10479 return {false, 0};
10480 x >>= 8;
10481 }
10482 return {true, res};
10483 };
10484 auto [IsSame, Suffix] = sameBitsPreByte(V);
10485 if (IsSame) {
10486 RequiredImm = (0b11001 << 8) | Suffix;
10487 return {true, RequiredImm};
10488 }
10489 }
10490 return {false, RequiredImm};
10491}
10492
10494 EVT VT) const {
10495 if (!Subtarget.hasExtLSX())
10496 return false;
10497
10498 if (VT == MVT::f32) {
10499 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
10500 return (masked == 0x3e000000 || masked == 0x40000000);
10501 }
10502
10503 if (VT == MVT::f64) {
10504 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
10505 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
10506 }
10507
10508 return false;
10509}
10510
10511bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
10512 bool ForCodeSize) const {
10513 // TODO: Maybe need more checks here after vector extension is supported.
10514 if (VT == MVT::f32 && !Subtarget.hasBasicF())
10515 return false;
10516 if (VT == MVT::f64 && !Subtarget.hasBasicD())
10517 return false;
10518 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
10519}
10520
10522 return true;
10523}
10524
10526 return true;
10527}
10528
10529bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
10530 const Instruction *I) const {
10531 if (!Subtarget.is64Bit())
10532 return isa<LoadInst>(I) || isa<StoreInst>(I);
10533
10534 if (isa<LoadInst>(I))
10535 return true;
10536
10537 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
10538 // require fences beacuse we can use amswap_db.[w/d].
10539 Type *Ty = I->getOperand(0)->getType();
10540 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
10541 unsigned Size = Ty->getIntegerBitWidth();
10542 return (Size == 8 || Size == 16);
10543 }
10544
10545 return false;
10546}
10547
10549 LLVMContext &Context,
10550 EVT VT) const {
10551 if (!VT.isVector())
10552 return getPointerTy(DL);
10554}
10555
10557 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
10558 // Do not merge to float value size (128 or 256 bits) if no implicit
10559 // float attribute is set.
10560 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
10561 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
10562 if (NoFloat)
10563 return MemVT.getSizeInBits() <= MaxIntSize;
10564
10565 // Make sure we don't merge greater than our maximum supported vector width.
10566 if (Subtarget.hasExtLASX())
10567 MaxIntSize = 256;
10568 else if (Subtarget.hasExtLSX())
10569 MaxIntSize = 128;
10570
10571 return MemVT.getSizeInBits() <= MaxIntSize;
10572}
10573
10575 EVT VT = Y.getValueType();
10576
10577 if (VT.isVector())
10578 return Subtarget.hasExtLSX() && VT.isInteger();
10579
10580 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
10581}
10582
10585 MachineFunction &MF, unsigned Intrinsic) const {
10586 switch (Intrinsic) {
10587 default:
10588 return;
10589 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
10590 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
10591 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
10592 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
10593 IntrinsicInfo Info;
10595 Info.memVT = MVT::i32;
10596 Info.ptrVal = I.getArgOperand(0);
10597 Info.offset = 0;
10598 Info.align = Align(4);
10601 Infos.push_back(Info);
10602 return;
10603 // TODO: Add more Intrinsics later.
10604 }
10605 }
10606}
10607
10608// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
10609// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
10610// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
10611// regression, we need to implement it manually.
10614
10616 Op == AtomicRMWInst::And) &&
10617 "Unable to expand");
10618 unsigned MinWordSize = 4;
10619
10620 IRBuilder<> Builder(AI);
10621 LLVMContext &Ctx = Builder.getContext();
10622 const DataLayout &DL = AI->getDataLayout();
10623 Type *ValueType = AI->getType();
10624 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
10625
10626 Value *Addr = AI->getPointerOperand();
10627 PointerType *PtrTy = cast<PointerType>(Addr->getType());
10628 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
10629
10630 Value *AlignedAddr = Builder.CreateIntrinsic(
10631 Intrinsic::ptrmask, {PtrTy, IntTy},
10632 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
10633 "AlignedAddr");
10634
10635 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
10636 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
10637 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
10638 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
10639 Value *Mask = Builder.CreateShl(
10640 ConstantInt::get(WordType,
10641 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
10642 ShiftAmt, "Mask");
10643 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
10644 Value *ValOperand_Shifted =
10645 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
10646 ShiftAmt, "ValOperand_Shifted");
10647 Value *NewOperand;
10648 if (Op == AtomicRMWInst::And)
10649 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
10650 else
10651 NewOperand = ValOperand_Shifted;
10652
10653 AtomicRMWInst *NewAI =
10654 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
10655 AI->getOrdering(), AI->getSyncScopeID());
10656
10657 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
10658 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
10659 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
10660 AI->replaceAllUsesWith(FinalOldResult);
10661 AI->eraseFromParent();
10662}
10663
10666 const AtomicRMWInst *AI) const {
10667 // TODO: Add more AtomicRMWInst that needs to be extended.
10668
10669 // Since floating-point operation requires a non-trivial set of data
10670 // operations, use CmpXChg to expand.
10671 if (AI->isFloatingPointOperation() ||
10677
10678 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
10681 AI->getOperation() == AtomicRMWInst::Sub)) {
10683 }
10684
10685 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10686 if (Subtarget.hasLAMCAS()) {
10687 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
10691 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
10693 }
10694
10695 if (Size == 8 || Size == 16)
10698}
10699
10700static Intrinsic::ID
10702 AtomicRMWInst::BinOp BinOp) {
10703 if (GRLen == 64) {
10704 switch (BinOp) {
10705 default:
10706 llvm_unreachable("Unexpected AtomicRMW BinOp");
10708 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
10709 case AtomicRMWInst::Add:
10710 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
10711 case AtomicRMWInst::Sub:
10712 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
10714 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
10716 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
10718 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
10719 case AtomicRMWInst::Max:
10720 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
10721 case AtomicRMWInst::Min:
10722 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
10723 // TODO: support other AtomicRMWInst.
10724 }
10725 }
10726
10727 if (GRLen == 32) {
10728 switch (BinOp) {
10729 default:
10730 llvm_unreachable("Unexpected AtomicRMW BinOp");
10732 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
10733 case AtomicRMWInst::Add:
10734 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
10735 case AtomicRMWInst::Sub:
10736 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
10738 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
10740 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
10742 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
10743 case AtomicRMWInst::Max:
10744 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
10745 case AtomicRMWInst::Min:
10746 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
10747 // TODO: support other AtomicRMWInst.
10748 }
10749 }
10750
10751 llvm_unreachable("Unexpected GRLen\n");
10752}
10753
10756 const AtomicCmpXchgInst *CI) const {
10757
10758 if (Subtarget.hasLAMCAS())
10760
10762 if (Size == 8 || Size == 16)
10765}
10766
10768 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10769 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10770 unsigned GRLen = Subtarget.getGRLen();
10771 AtomicOrdering FailOrd = CI->getFailureOrdering();
10772 Value *FailureOrdering =
10773 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
10774 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
10775 if (GRLen == 64) {
10776 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
10777 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
10778 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
10779 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10780 }
10781 Type *Tys[] = {AlignedAddr->getType()};
10782 Value *Result = Builder.CreateIntrinsic(
10783 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
10784 if (GRLen == 64)
10785 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10786 return Result;
10787}
10788
10790 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10791 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10792 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
10793 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
10794 // mask, as this produces better code than the LL/SC loop emitted by
10795 // int_loongarch_masked_atomicrmw_xchg.
10796 if (AI->getOperation() == AtomicRMWInst::Xchg &&
10799 if (CVal->isZero())
10800 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
10801 Builder.CreateNot(Mask, "Inv_Mask"),
10802 AI->getAlign(), Ord);
10803 if (CVal->isMinusOne())
10804 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
10805 AI->getAlign(), Ord);
10806 }
10807
10808 unsigned GRLen = Subtarget.getGRLen();
10809 Value *Ordering =
10810 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
10811 Type *Tys[] = {AlignedAddr->getType()};
10813 AI->getModule(),
10815
10816 if (GRLen == 64) {
10817 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10818 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10819 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10820 }
10821
10822 Value *Result;
10823
10824 // Must pass the shift amount needed to sign extend the loaded value prior
10825 // to performing a signed comparison for min/max. ShiftAmt is the number of
10826 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
10827 // is the number of bits to left+right shift the value in order to
10828 // sign-extend.
10829 if (AI->getOperation() == AtomicRMWInst::Min ||
10831 const DataLayout &DL = AI->getDataLayout();
10832 unsigned ValWidth =
10833 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
10834 Value *SextShamt =
10835 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
10836 Result = Builder.CreateCall(LlwOpScwLoop,
10837 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10838 } else {
10839 Result =
10840 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
10841 }
10842
10843 if (GRLen == 64)
10844 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10845 return Result;
10846}
10847
10849 const MachineFunction &MF, EVT VT) const {
10850 VT = VT.getScalarType();
10851
10852 if (!VT.isSimple())
10853 return false;
10854
10855 switch (VT.getSimpleVT().SimpleTy) {
10856 case MVT::f32:
10857 case MVT::f64:
10858 return true;
10859 default:
10860 break;
10861 }
10862
10863 return false;
10864}
10865
10867 const Constant *PersonalityFn) const {
10868 return LoongArch::R4;
10869}
10870
10872 const Constant *PersonalityFn) const {
10873 return LoongArch::R5;
10874}
10875
10876//===----------------------------------------------------------------------===//
10877// Target Optimization Hooks
10878//===----------------------------------------------------------------------===//
10879
10881 const LoongArchSubtarget &Subtarget) {
10882 // Feature FRECIPE instrucions relative accuracy is 2^-14.
10883 // IEEE float has 23 digits and double has 52 digits.
10884 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
10885 return RefinementSteps;
10886}
10887
10888static bool
10890 assert(Subtarget.hasFrecipe() &&
10891 "Reciprocal estimate queried on unsupported target");
10892
10893 if (!VT.isSimple())
10894 return false;
10895
10896 switch (VT.getSimpleVT().SimpleTy) {
10897 case MVT::f32:
10898 // f32 is the base type for reciprocal estimate instructions.
10899 return true;
10900
10901 case MVT::f64:
10902 return Subtarget.hasBasicD();
10903
10904 case MVT::v4f32:
10905 case MVT::v2f64:
10906 return Subtarget.hasExtLSX();
10907
10908 case MVT::v8f32:
10909 case MVT::v4f64:
10910 return Subtarget.hasExtLASX();
10911
10912 default:
10913 return false;
10914 }
10915}
10916
10918 SelectionDAG &DAG, int Enabled,
10919 int &RefinementSteps,
10920 bool &UseOneConstNR,
10921 bool Reciprocal) const {
10923 "Enabled should never be Disabled here");
10924
10925 if (!Subtarget.hasFrecipe())
10926 return SDValue();
10927
10928 SDLoc DL(Operand);
10929 EVT VT = Operand.getValueType();
10930
10931 // Check supported types.
10932 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10933 return SDValue();
10934
10935 // Handle refinement steps.
10936 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10937 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10938
10939 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
10940 UseOneConstNR = false;
10941 SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
10942
10943 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
10944 // are needed (which rely on the reciprocal form), return the raw reciprocal
10945 // estimate.
10946 if (Reciprocal || RefinementSteps > 0)
10947 return Rsqrt;
10948
10949 // Otherwise, return sqrt(x) by multiplying with the operand.
10950 return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
10951}
10952
10954 SelectionDAG &DAG,
10955 int Enabled,
10956 int &RefinementSteps) const {
10958 "Enabled should never be Disabled here");
10959
10960 if (!Subtarget.hasFrecipe())
10961 return SDValue();
10962
10963 SDLoc DL(Operand);
10964 EVT VT = Operand.getValueType();
10965
10966 // Check supported types.
10967 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10968 return SDValue();
10969
10970 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10971 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10972
10973 // FRECIPE computes 1.0 / x.
10974 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
10975}
10976
10977//===----------------------------------------------------------------------===//
10978// LoongArch Inline Assembly Support
10979//===----------------------------------------------------------------------===//
10980
10982LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
10983 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
10984 //
10985 // 'f': A floating-point register (if available).
10986 // 'k': A memory operand whose address is formed by a base register and
10987 // (optionally scaled) index register.
10988 // 'l': A signed 16-bit constant.
10989 // 'm': A memory operand whose address is formed by a base register and
10990 // offset that is suitable for use in instructions with the same
10991 // addressing mode as st.w and ld.w.
10992 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
10993 // instruction)
10994 // 'I': A signed 12-bit constant (for arithmetic instructions).
10995 // 'J': Integer zero.
10996 // 'K': An unsigned 12-bit constant (for logic instructions).
10997 // "ZB": An address that is held in a general-purpose register. The offset is
10998 // zero.
10999 // "ZC": A memory operand whose address is formed by a base register and
11000 // offset that is suitable for use in instructions with the same
11001 // addressing mode as ll.w and sc.w.
11002 if (Constraint.size() == 1) {
11003 switch (Constraint[0]) {
11004 default:
11005 break;
11006 case 'f':
11007 case 'q':
11008 return C_RegisterClass;
11009 case 'l':
11010 case 'I':
11011 case 'J':
11012 case 'K':
11013 return C_Immediate;
11014 case 'k':
11015 return C_Memory;
11016 }
11017 }
11018
11019 if (Constraint == "ZC" || Constraint == "ZB")
11020 return C_Memory;
11021
11022 // 'm' is handled here.
11023 return TargetLowering::getConstraintType(Constraint);
11024}
11025
11026InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
11027 StringRef ConstraintCode) const {
11028 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
11032 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
11033}
11034
11035std::pair<unsigned, const TargetRegisterClass *>
11036LoongArchTargetLowering::getRegForInlineAsmConstraint(
11037 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
11038 // First, see if this is a constraint that directly corresponds to a LoongArch
11039 // register class.
11040 if (Constraint.size() == 1) {
11041 switch (Constraint[0]) {
11042 case 'r':
11043 // TODO: Support fixed vectors up to GRLen?
11044 if (VT.isVector())
11045 break;
11046 return std::make_pair(0U, &LoongArch::GPRRegClass);
11047 case 'q':
11048 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
11049 case 'f':
11050 if (Subtarget.hasBasicF() && VT == MVT::f32)
11051 return std::make_pair(0U, &LoongArch::FPR32RegClass);
11052 if (Subtarget.hasBasicD() && VT == MVT::f64)
11053 return std::make_pair(0U, &LoongArch::FPR64RegClass);
11054 if (Subtarget.hasExtLSX() &&
11055 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
11056 return std::make_pair(0U, &LoongArch::LSX128RegClass);
11057 if (Subtarget.hasExtLASX() &&
11058 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
11059 return std::make_pair(0U, &LoongArch::LASX256RegClass);
11060 break;
11061 default:
11062 break;
11063 }
11064 }
11065
11066 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
11067 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
11068 // constraints while the official register name is prefixed with a '$'. So we
11069 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
11070 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
11071 // case insensitive, so no need to convert the constraint to upper case here.
11072 //
11073 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
11074 // decode the usage of register name aliases into their official names. And
11075 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
11076 // official register names.
11077 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
11078 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
11079 bool IsFP = Constraint[2] == 'f';
11080 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
11081 std::pair<unsigned, const TargetRegisterClass *> R;
11083 TRI, join_items("", Temp.first, Temp.second), VT);
11084 // Match those names to the widest floating point register type available.
11085 if (IsFP) {
11086 unsigned RegNo = R.first;
11087 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
11088 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
11089 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
11090 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
11091 }
11092 }
11093 }
11094 return R;
11095 }
11096
11097 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
11098}
11099
11100void LoongArchTargetLowering::LowerAsmOperandForConstraint(
11101 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
11102 SelectionDAG &DAG) const {
11103 // Currently only support length 1 constraints.
11104 if (Constraint.size() == 1) {
11105 switch (Constraint[0]) {
11106 case 'l':
11107 // Validate & create a 16-bit signed immediate operand.
11108 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11109 uint64_t CVal = C->getSExtValue();
11110 if (isInt<16>(CVal))
11111 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
11112 Subtarget.getGRLenVT()));
11113 }
11114 return;
11115 case 'I':
11116 // Validate & create a 12-bit signed immediate operand.
11117 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11118 uint64_t CVal = C->getSExtValue();
11119 if (isInt<12>(CVal))
11120 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
11121 Subtarget.getGRLenVT()));
11122 }
11123 return;
11124 case 'J':
11125 // Validate & create an integer zero operand.
11126 if (auto *C = dyn_cast<ConstantSDNode>(Op))
11127 if (C->getZExtValue() == 0)
11128 Ops.push_back(
11129 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
11130 return;
11131 case 'K':
11132 // Validate & create a 12-bit unsigned immediate operand.
11133 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11134 uint64_t CVal = C->getZExtValue();
11135 if (isUInt<12>(CVal))
11136 Ops.push_back(
11137 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
11138 }
11139 return;
11140 default:
11141 break;
11142 }
11143 }
11145}
11146
11147#define GET_REGISTER_MATCHER
11148#include "LoongArchGenAsmMatcher.inc"
11149
11152 const MachineFunction &MF) const {
11153 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
11154 std::string NewRegName = Name.second.str();
11155 Register Reg = MatchRegisterAltName(NewRegName);
11156 if (!Reg)
11157 Reg = MatchRegisterName(NewRegName);
11158 if (!Reg)
11159 return Reg;
11160 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
11161 if (!ReservedRegs.test(Reg))
11162 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
11163 StringRef(RegName) + "\"."));
11164 return Reg;
11165}
11166
11168 EVT VT, SDValue C) const {
11169 // TODO: Support vectors.
11170 if (!VT.isScalarInteger())
11171 return false;
11172
11173 // Omit the optimization if the data size exceeds GRLen.
11174 if (VT.getSizeInBits() > Subtarget.getGRLen())
11175 return false;
11176
11177 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
11178 const APInt &Imm = ConstNode->getAPIntValue();
11179 // Break MUL into (SLLI + ADD/SUB) or ALSL.
11180 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
11181 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
11182 return true;
11183 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
11184 if (ConstNode->hasOneUse() &&
11185 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
11186 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
11187 return true;
11188 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
11189 // in which the immediate has two set bits. Or Break (MUL x, imm)
11190 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
11191 // equals to (1 << s0) - (1 << s1).
11192 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
11193 unsigned Shifts = Imm.countr_zero();
11194 // Reject immediates which can be composed via a single LUI.
11195 if (Shifts >= 12)
11196 return false;
11197 // Reject multiplications can be optimized to
11198 // (SLLI (ALSL x, x, 1/2/3/4), s).
11199 APInt ImmPop = Imm.ashr(Shifts);
11200 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
11201 return false;
11202 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
11203 // since it needs one more instruction than other 3 cases.
11204 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
11205 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
11206 (ImmSmall - Imm).isPowerOf2())
11207 return true;
11208 }
11209 }
11210
11211 return false;
11212}
11213
11215 const AddrMode &AM,
11216 Type *Ty, unsigned AS,
11217 Instruction *I) const {
11218 // LoongArch has four basic addressing modes:
11219 // 1. reg
11220 // 2. reg + 12-bit signed offset
11221 // 3. reg + 14-bit signed offset left-shifted by 2
11222 // 4. reg1 + reg2
11223 // TODO: Add more checks after support vector extension.
11224
11225 // No global is ever allowed as a base.
11226 if (AM.BaseGV)
11227 return false;
11228
11229 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
11230 // with `UAL` feature.
11231 if (!isInt<12>(AM.BaseOffs) &&
11232 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
11233 return false;
11234
11235 switch (AM.Scale) {
11236 case 0:
11237 // "r+i" or just "i", depending on HasBaseReg.
11238 break;
11239 case 1:
11240 // "r+r+i" is not allowed.
11241 if (AM.HasBaseReg && AM.BaseOffs)
11242 return false;
11243 // Otherwise we have "r+r" or "r+i".
11244 break;
11245 case 2:
11246 // "2*r+r" or "2*r+i" is not allowed.
11247 if (AM.HasBaseReg || AM.BaseOffs)
11248 return false;
11249 // Allow "2*r" as "r+r".
11250 break;
11251 default:
11252 return false;
11253 }
11254
11255 return true;
11256}
11257
11259 return isInt<12>(Imm);
11260}
11261
11263 return isInt<12>(Imm);
11264}
11265
11267 // Zexts are free if they can be combined with a load.
11268 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
11269 // poorly with type legalization of compares preferring sext.
11270 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
11271 EVT MemVT = LD->getMemoryVT();
11272 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
11273 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
11274 LD->getExtensionType() == ISD::ZEXTLOAD))
11275 return true;
11276 }
11277
11278 return TargetLowering::isZExtFree(Val, VT2);
11279}
11280
11282 EVT DstVT) const {
11283 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
11284}
11285
11287 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
11288}
11289
11291 // TODO: Support vectors.
11292 if (Y.getValueType().isVector())
11293 return false;
11294
11295 return !isa<ConstantSDNode>(Y);
11296}
11297
11299 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
11300 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
11301}
11302
11304 Type *Ty, bool IsSigned) const {
11305 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
11306 return true;
11307
11308 return IsSigned;
11309}
11310
11312 // Return false to suppress the unnecessary extensions if the LibCall
11313 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
11314 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
11315 Type.getSizeInBits() < Subtarget.getGRLen()))
11316 return false;
11317 return true;
11318}
11319
11320// memcpy, and other memory intrinsics, typically tries to use wider load/store
11321// if the source/dest is aligned and the copy size is large enough. We therefore
11322// want to align such objects passed to memory intrinsics.
11324 unsigned &MinSize,
11325 Align &PrefAlign) const {
11326 if (!isa<MemIntrinsic>(CI))
11327 return false;
11328
11329 if (Subtarget.is64Bit()) {
11330 MinSize = 8;
11331 PrefAlign = Align(8);
11332 } else {
11333 MinSize = 4;
11334 PrefAlign = Align(4);
11335 }
11336
11337 return true;
11338}
11339
11342 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
11343 VT.getVectorElementType() != MVT::i1)
11344 return TypeWidenVector;
11345
11347}
11348
11349bool LoongArchTargetLowering::splitValueIntoRegisterParts(
11350 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
11351 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
11352 bool IsABIRegCopy = CC.has_value();
11353 EVT ValueVT = Val.getValueType();
11354
11355 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11356 PartVT == MVT::f32) {
11357 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
11358 // nan, and cast to f32.
11359 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
11360 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
11361 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
11362 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
11363 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
11364 Parts[0] = Val;
11365 return true;
11366 }
11367
11368 return false;
11369}
11370
11371SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
11372 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
11373 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
11374 bool IsABIRegCopy = CC.has_value();
11375
11376 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11377 PartVT == MVT::f32) {
11378 SDValue Val = Parts[0];
11379
11380 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
11381 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
11382 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
11383 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
11384 return Val;
11385 }
11386
11387 return SDValue();
11388}
11389
11390MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
11391 CallingConv::ID CC,
11392 EVT VT) const {
11393 // Use f32 to pass f16.
11394 if (VT == MVT::f16 && Subtarget.hasBasicF())
11395 return MVT::f32;
11396
11398}
11399
11400unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
11401 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
11402 // Use f32 to pass f16.
11403 if (VT == MVT::f16 && Subtarget.hasBasicF())
11404 return 1;
11405
11407}
11408
11410 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
11411 const SelectionDAG &DAG, unsigned Depth) const {
11412 unsigned Opc = Op.getOpcode();
11413 Known.resetAll();
11414 switch (Opc) {
11415 default:
11416 break;
11417 case LoongArchISD::VPICK_ZEXT_ELT: {
11418 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
11419 EVT VT = cast<VTSDNode>(Op->getOperand(2))->getVT();
11420 unsigned VTBits = VT.getScalarSizeInBits();
11421 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
11422 Known.Zero.setBitsFrom(VTBits);
11423 break;
11424 }
11425 }
11426}
11427
11429 SDValue Op, const APInt &OriginalDemandedBits,
11430 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
11431 unsigned Depth) const {
11432 EVT VT = Op.getValueType();
11433 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
11434 unsigned Opc = Op.getOpcode();
11435 switch (Opc) {
11436 default:
11437 break;
11438 case LoongArchISD::CRC_W_B_W:
11439 case LoongArchISD::CRC_W_H_W:
11440 case LoongArchISD::CRCC_W_B_W:
11441 case LoongArchISD::CRCC_W_H_W: {
11442 KnownBits KnownSrc;
11443 APInt DemandedSrcBits =
11444 APInt::getLowBitsSet(BitWidth, (Opc == LoongArchISD::CRC_W_B_W ||
11445 Opc == LoongArchISD::CRCC_W_B_W)
11446 ? 8
11447 : 16);
11448 return SimplifyDemandedBits(Op.getOperand(1), DemandedSrcBits,
11449 OriginalDemandedElts, KnownSrc, TLO, Depth + 1);
11450 }
11451 case LoongArchISD::VMSKLTZ:
11452 case LoongArchISD::XVMSKLTZ: {
11453 SDValue Src = Op.getOperand(0);
11454 MVT SrcVT = Src.getSimpleValueType();
11455 unsigned SrcBits = SrcVT.getScalarSizeInBits();
11456 unsigned NumElts = SrcVT.getVectorNumElements();
11457
11458 // If we don't need the sign bits at all just return zero.
11459 if (OriginalDemandedBits.countr_zero() >= NumElts)
11460 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
11461
11462 // Only demand the vector elements of the sign bits we need.
11463 APInt KnownUndef, KnownZero;
11464 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
11465 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
11466 TLO, Depth + 1))
11467 return true;
11468
11469 Known.Zero = KnownZero.zext(BitWidth);
11470 Known.Zero.setHighBits(BitWidth - NumElts);
11471
11472 // [X]VMSKLTZ only uses the MSB from each vector element.
11473 KnownBits KnownSrc;
11474 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
11475 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
11476 Depth + 1))
11477 return true;
11478
11479 if (KnownSrc.One[SrcBits - 1])
11480 Known.One.setLowBits(NumElts);
11481 else if (KnownSrc.Zero[SrcBits - 1])
11482 Known.Zero.setLowBits(NumElts);
11483
11484 // Attempt to avoid multi-use ops if we don't need anything from it.
11486 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
11487 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
11488 return false;
11489 }
11490 }
11491
11493 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
11494}
11495
11497 unsigned Opc = VecOp.getOpcode();
11498
11499 // Assume target opcodes can't be scalarized.
11500 // TODO - do we have any exceptions?
11501 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
11502 return false;
11503
11504 // If the vector op is not supported, try to convert to scalar.
11505 EVT VecVT = VecOp.getValueType();
11507 return true;
11508
11509 // If the vector op is supported, but the scalar op is not, the transform may
11510 // not be worthwhile.
11511 EVT ScalarVT = VecVT.getScalarType();
11512 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
11513}
11514
11516 unsigned Index) const {
11518 return false;
11519
11520 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
11521 return Index == 0;
11522}
11523
11525 unsigned Index) const {
11526 EVT EltVT = VT.getScalarType();
11527
11528 // Extract a scalar FP value from index 0 of a vector is free.
11529 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
11530}
11531
11533 const MachineFunction &MF) const {
11534
11535 // If the function specifically requests inline stack probes, emit them.
11536 if (MF.getFunction().hasFnAttribute("probe-stack"))
11537 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11538 "inline-asm";
11539
11540 return false;
11541}
11542
11544 Align StackAlign) const {
11545 // The default stack probe size is 4096 if the function has no
11546 // stack-probe-size attribute.
11547 const Function &Fn = MF.getFunction();
11548 unsigned StackProbeSize =
11549 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
11550 // Round down to the stack alignment.
11551 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
11552 return StackProbeSize ? StackProbeSize : StackAlign.value();
11553}
11554
11555SDValue
11556LoongArchTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
11557 SelectionDAG &DAG) const {
11559 if (!hasInlineStackProbe(MF))
11560 return SDValue();
11561
11562 const MVT GRLenVT = Subtarget.getGRLenVT();
11563 // Get the inputs.
11564 SDValue Chain = Op.getOperand(0);
11565 SDValue Size = Op.getOperand(1);
11566
11567 const MaybeAlign Align =
11568 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
11569 const SDLoc dl(Op);
11570 const EVT VT = Op.getValueType();
11571
11572 // Construct the new SP value in a GPR.
11573 SDValue SP = DAG.getCopyFromReg(Chain, dl, LoongArch::R3, GRLenVT);
11574 Chain = SP.getValue(1);
11575 SP = DAG.getNode(ISD::SUB, dl, GRLenVT, SP, Size);
11576 if (Align)
11577 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11578 DAG.getSignedConstant(-Align->value(), dl, VT));
11579
11580 // Set the real SP to the new value with a probing loop.
11581 Chain = DAG.getNode(LoongArchISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
11582 return DAG.getMergeValues({SP, Chain}, dl);
11583}
11584
11587 MachineBasicBlock *MBB) const {
11588 MachineFunction &MF = *MBB->getParent();
11589 MachineBasicBlock::iterator MBBI = MI.getIterator();
11590 DebugLoc DL = MBB->findDebugLoc(MBBI);
11591 const Register TargetReg = MI.getOperand(0).getReg();
11592
11593 const LoongArchInstrInfo *TII = Subtarget.getInstrInfo();
11594 const bool IsLA64 = Subtarget.is64Bit();
11595 const Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
11596 const LoongArchTargetLowering *TLI = Subtarget.getTargetLowering();
11597 const uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
11598
11599 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
11600 MachineBasicBlock *const LoopTestMBB =
11601 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11602 MF.insert(MBBInsertPoint, LoopTestMBB);
11603 MachineBasicBlock *const ExitMBB =
11604 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11605 MF.insert(MBBInsertPoint, ExitMBB);
11606 const Register SPReg = LoongArch::R3;
11607 const Register ScratchReg =
11608 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
11609
11610 // ScratchReg = ProbeSize
11611 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
11612
11613 // LoopTest:
11614 // sub.{w/d} $sp, $sp, ScratchReg
11615 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11616 TII->get(IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W), SPReg)
11617 .addReg(SPReg)
11618 .addReg(ScratchReg);
11619
11620 // st.{w/d} $zero, $sp, 0
11621 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11622 TII->get(IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
11623 .addReg(LoongArch::R0)
11624 .addReg(SPReg)
11625 .addImm(0);
11626
11627 // bltu TargetReg, $sp, LoopTest
11628 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(LoongArch::BLTU))
11629 .addReg(TargetReg)
11630 .addReg(SPReg)
11631 .addMBB(LoopTestMBB);
11632
11633 // move $sp, TargetReg
11634 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(LoongArch::OR), SPReg)
11635 .addReg(TargetReg)
11636 .addReg(LoongArch::R0);
11637
11638 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
11640
11641 LoopTestMBB->addSuccessor(ExitMBB);
11642 LoopTestMBB->addSuccessor(LoopTestMBB);
11643 MBB->addSuccessor(LoopTestMBB);
11644
11645 MI.eraseFromParent();
11646 MF.getInfo<LoongArchMachineFunctionInfo>()->setDynamicAllocation();
11647 return ExitMBB->begin()->getParent();
11648}
static MCRegister MatchRegisterName(StringRef Name)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSELECT_CCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static bool isSigned(unsigned Opcode)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue performHorizWideningCombine(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static SDValue performDemandedBitsCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static bool buildVPERMIInfo(ArrayRef< int > Mask, SDValue V1, SDValue V2, SmallVectorImpl< SDValue > &SrcVec, unsigned &MaskImm)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue matchLowHalfOf128BitLanes(SDValue N)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue performFP_TO_INTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue matchDeinterleaveBuildVector(SDValue N, unsigned &StartIndex)
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1527
bool isZero() const
Definition APFloat.h:1540
APInt bitcastToAPInt() const
Definition APFloat.h:1436
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition Argument.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
size_type count() const
Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:501
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
iterator_range< arg_iterator > args()
Definition Function.h:892
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:759
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:771
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2900
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
void setIncomingIndirectArg(unsigned ArgIndex, Register Reg)
Register getIncomingIndirectArg(unsigned ArgIndex) const
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this function.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:827
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:787
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:861
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:888
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:918
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:804
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:858
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:819
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:896
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:986
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:813
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:934
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:967
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:929
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:864
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:841
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:235
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...