LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
357 }
358 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
360 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
362 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
365 }
367 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
375 VT, Expand);
383 }
385 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
386 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
387 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
388 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
389
390 for (MVT VT :
391 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
392 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
402 }
405 // We want to legalize this to an f64 load rather than an i64 load.
406 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
407 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16})
409 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v16i32, MVT::v8i64,
410 MVT::v16i64})
412 }
413
414 // Set operations for 'LASX' feature.
415
416 if (Subtarget.hasExtLASX()) {
417 for (MVT VT : LASXVTs) {
421
427
431 }
432 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
435 Legal);
437 VT, Legal);
444 Expand);
460 }
461 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
463 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
465 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
468 }
469 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
477 VT, Expand);
485 }
488 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) {
491 }
492 for (MVT VT :
493 {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) {
496 }
497 }
498
499 // Set DAG combine for LA32 and LA64.
500 if (Subtarget.hasBasicF()) {
502 }
503
508
509 // Set DAG combine for 'LSX' feature.
510
511 if (Subtarget.hasExtLSX()) {
520 }
521
522 // Set DAG combine for 'LASX' feature.
523 if (Subtarget.hasExtLASX()) {
528 }
529
530 // Compute derived properties from the register classes.
531 computeRegisterProperties(Subtarget.getRegisterInfo());
532
534
537
538 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
539
541
542 // Function alignments.
544 // Set preferred alignments.
545 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
546 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
547 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
548
549 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
550 if (Subtarget.hasLAMCAS())
552
553 if (Subtarget.hasSCQ()) {
556 }
557
558 // Disable strict node mutation.
559 IsStrictFPEnabled = true;
560}
561
563 const GlobalAddressSDNode *GA) const {
564 // In order to maximise the opportunity for common subexpression elimination,
565 // keep a separate ADD node for the global address offset instead of folding
566 // it in the global address node. Later peephole optimisations may choose to
567 // fold it back in when profitable.
568 return false;
569}
570
572 SelectionDAG &DAG) const {
573 switch (Op.getOpcode()) {
575 return lowerATOMIC_FENCE(Op, DAG);
577 return lowerEH_DWARF_CFA(Op, DAG);
579 return lowerGlobalAddress(Op, DAG);
581 return lowerGlobalTLSAddress(Op, DAG);
583 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
585 return lowerINTRINSIC_W_CHAIN(Op, DAG);
587 return lowerINTRINSIC_VOID(Op, DAG);
589 return lowerBlockAddress(Op, DAG);
590 case ISD::JumpTable:
591 return lowerJumpTable(Op, DAG);
592 case ISD::SHL_PARTS:
593 return lowerShiftLeftParts(Op, DAG);
594 case ISD::SRA_PARTS:
595 return lowerShiftRightParts(Op, DAG, true);
596 case ISD::SRL_PARTS:
597 return lowerShiftRightParts(Op, DAG, false);
599 return lowerConstantPool(Op, DAG);
600 case ISD::FP_TO_SINT:
601 return lowerFP_TO_SINT(Op, DAG);
602 case ISD::FP_TO_UINT:
603 return lowerFP_TO_UINT(Op, DAG);
604 case ISD::BITCAST:
605 return lowerBITCAST(Op, DAG);
606 case ISD::UINT_TO_FP:
607 return lowerUINT_TO_FP(Op, DAG);
608 case ISD::SINT_TO_FP:
609 return lowerSINT_TO_FP(Op, DAG);
610 case ISD::VASTART:
611 return lowerVASTART(Op, DAG);
612 case ISD::FRAMEADDR:
613 return lowerFRAMEADDR(Op, DAG);
614 case ISD::RETURNADDR:
615 return lowerRETURNADDR(Op, DAG);
617 return lowerWRITE_REGISTER(Op, DAG);
619 return lowerINSERT_VECTOR_ELT(Op, DAG);
621 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
623 return lowerBUILD_VECTOR(Op, DAG);
625 return lowerCONCAT_VECTORS(Op, DAG);
627 return lowerVECTOR_SHUFFLE(Op, DAG);
628 case ISD::BITREVERSE:
629 return lowerBITREVERSE(Op, DAG);
631 return lowerSCALAR_TO_VECTOR(Op, DAG);
632 case ISD::PREFETCH:
633 return lowerPREFETCH(Op, DAG);
634 case ISD::SELECT:
635 return lowerSELECT(Op, DAG);
636 case ISD::BRCOND:
637 return lowerBRCOND(Op, DAG);
638 case ISD::FP_TO_FP16:
639 return lowerFP_TO_FP16(Op, DAG);
640 case ISD::FP16_TO_FP:
641 return lowerFP16_TO_FP(Op, DAG);
642 case ISD::FP_TO_BF16:
643 return lowerFP_TO_BF16(Op, DAG);
644 case ISD::BF16_TO_FP:
645 return lowerBF16_TO_FP(Op, DAG);
647 return lowerVECREDUCE_ADD(Op, DAG);
648 case ISD::ROTL:
649 case ISD::ROTR:
650 return lowerRotate(Op, DAG);
658 return lowerVECREDUCE(Op, DAG);
659 case ISD::ConstantFP:
660 return lowerConstantFP(Op, DAG);
661 case ISD::SETCC:
662 return lowerSETCC(Op, DAG);
663 case ISD::FP_ROUND:
664 return lowerFP_ROUND(Op, DAG);
665 case ISD::FP_EXTEND:
666 return lowerFP_EXTEND(Op, DAG);
668 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
670 return lowerDYNAMIC_STACKALLOC(Op, DAG);
671 }
672 return SDValue();
673}
674
675// Helper to attempt to return a cheaper, bit-inverted version of \p V.
677 // TODO: don't always ignore oneuse constraints.
678 V = peekThroughBitcasts(V);
679 EVT VT = V.getValueType();
680
681 // Match not(xor X, -1) -> X.
682 if (V.getOpcode() == ISD::XOR &&
683 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
684 isAllOnesConstant(V.getOperand(1))))
685 return V.getOperand(0);
686
687 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
688 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
689 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
690 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
691 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
692 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
693 V.getOperand(1));
694 }
695 }
696
697 // Match not(SplatVector(not(X)) -> SplatVector(X).
698 if (V.getOpcode() == ISD::BUILD_VECTOR) {
699 if (SDValue SplatValue =
700 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
701 if (!V->isOnlyUserOf(SplatValue.getNode()))
702 return SDValue();
703
704 if (SDValue Not = isNOT(SplatValue, DAG)) {
705 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
706 return DAG.getSplat(VT, SDLoc(Not), Not);
707 }
708 }
709 }
710
711 // Match not(or(not(X),not(Y))) -> and(X, Y).
712 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
713 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
714 // TODO: Handle cases with single NOT operand -> VANDN
715 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
716 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
717 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
718 DAG.getBitcast(VT, Op1));
719 }
720
721 // TODO: Add more matching patterns. Such as,
722 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
723 // not(slt(C, X)) -> slt(X - 1, C)
724 return SDValue();
725}
726
727// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
728// LoongArchISD::VFCVT. For example:
729// x1 = fp_round x, 0
730// y1 = fp_round y, 0
731// z = concat_vectors x1, y1
732// Or
733// x1 = LoongArch::VFCVT undef, x
734// y1 = LoongArch::VFCVT undef, y
735// z = LoongArchISD::VPACKEV y1, x1; or LoongArchISD::VPERMI y1, x1, 68
736// can be combined to:
737// z = LoongArch::VFCVT y, x
739 const LoongArchSubtarget &Subtarget) {
740 assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
741 (N->getOpcode() == LoongArchISD::VPACKEV) ||
742 (N->getOpcode() == LoongArchISD::VPERMI)) &&
743 "Invalid Node");
744
745 SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
746 SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
747 unsigned Opcode0 = Op0.getOpcode();
748 unsigned Opcode1 = Op1.getOpcode();
749 if (Opcode0 != Opcode1)
750 return SDValue();
751
752 if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
753 return SDValue();
754
755 // Check if two nodes have only one use.
756 if (!Op0.hasOneUse() || !Op1.hasOneUse())
757 return SDValue();
758
759 EVT VT = N.getValueType();
760 EVT SVT0 = Op0.getValueType();
761 EVT SVT1 = Op1.getValueType();
762 // Check if two nodes have the same result type.
763 if (SVT0 != SVT1)
764 return SDValue();
765
766 // Check if two nodes have the same operand type.
767 EVT SSVT0 = Op0.getOperand(0).getValueType();
768 EVT SSVT1 = Op1.getOperand(0).getValueType();
769 if (SSVT0 != SSVT1)
770 return SDValue();
771
772 if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
773 if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
774 SSVT0 == MVT::v4f64) {
775 // A vector_shuffle is required in the final step, as xvfcvt instruction
776 // operates on each 128-bit segament as a lane.
777 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
778 Op1.getOperand(0), Op0.getOperand(0));
779 SDValue Undef = DAG.getUNDEF(Res.getValueType());
780 // After VFCVT, the high part of Res comes from the high parts of Op0 and
781 // Op1, and the low part comes from the low parts of Op0 and Op1. However,
782 // the desired order requires Op0 to fully occupy the lower half and Op1
783 // the upper half of Res. The Mask reorders the elements of Res to achieve
784 // this:
785 // - The first four elements (0, 1, 4, 5) come from Op0.
786 // - The next four elements (2, 3, 6, 7) come from Op1.
787 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
788 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
789 return DAG.getBitcast(VT, Res);
790 }
791 }
792
793 if ((N->getOpcode() == LoongArchISD::VPACKEV ||
794 N->getOpcode() == LoongArchISD::VPERMI) &&
795 Opcode0 == LoongArchISD::VFCVT) {
796 // For VPACKEV or VPERMI, check if the first operation of VFCVT is undef.
797 if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
798 return SDValue();
799
800 if (!Subtarget.hasExtLSX() || SVT0 != MVT::v4f32 || SSVT0 != MVT::v2f64)
801 return SDValue();
802
803 if (N->getOpcode() == LoongArchISD::VPACKEV &&
804 (VT == MVT::v2i64 || VT == MVT::v2f64)) {
805 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
806 Op0.getOperand(1), Op1.getOperand(1));
807 return DAG.getBitcast(VT, Res);
808 }
809
810 if (N->getOpcode() == LoongArchISD::VPERMI && VT == MVT::v4f32) {
811 int64_t Imm = cast<ConstantSDNode>(N->getOperand(2))->getSExtValue();
812 if (Imm != 68)
813 return SDValue();
814 return DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Op0.getOperand(1),
815 Op1.getOperand(1));
816 }
817 }
818
819 return SDValue();
820}
821
822SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
823 SelectionDAG &DAG) const {
824 SDLoc DL(Op);
825 SDValue In = Op.getOperand(0);
826 MVT VT = Op.getSimpleValueType();
827 MVT SVT = In.getSimpleValueType();
828
829 if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
830 SDValue Lo, Hi;
831 std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
832 return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
833 }
834
835 return SDValue();
836}
837
838SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op,
839 SelectionDAG &DAG) const {
840
841 SDLoc DL(Op);
842 EVT VT = Op.getValueType();
843 SDValue Src = Op->getOperand(0);
844 EVT SVT = Src.getValueType();
845
846 bool V2F32ToV2F64 =
847 VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX();
848 bool V4F32ToV4F64 =
849 VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX();
850 if (!V2F32ToV2F64 && !V4F32ToV4F64)
851 return SDValue();
852
853 // Check if Op is the high part of vector.
854 auto CheckVecHighPart = [](SDValue Op) {
856 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
857 SDValue SOp = Op.getOperand(0);
858 EVT SVT = SOp.getValueType();
859 if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0))
860 return SDValue();
861
862 const uint64_t Imm = Op.getConstantOperandVal(1);
863 if (Imm == SVT.getVectorNumElements() / 2)
864 return SOp;
865 return SDValue();
866 }
867 return SDValue();
868 };
869
870 unsigned Opcode;
871 SDValue VFCVTOp;
872 EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT();
873 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
874
875 // If the operand of ISD::FP_EXTEND comes from the high part of vector,
876 // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL.
877 if (SDValue V = CheckVecHighPart(Src)) {
878 assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() &&
879 "Unexpected wide vector");
880 Opcode = LoongArchISD::VFCVTH;
881 VFCVTOp = DAG.getBitcast(WideOpVT, V);
882 } else {
883 Opcode = LoongArchISD::VFCVTL;
884 VFCVTOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideOpVT,
885 DAG.getUNDEF(WideOpVT), Src, ZeroIdx);
886 }
887
888 // v2f64 = fp_extend v2f32
889 if (V2F32ToV2F64)
890 return DAG.getNode(Opcode, DL, VT, VFCVTOp);
891
892 // v4f64 = fp_extend v4f32
893 if (V4F32ToV4F64) {
894 // XVFCVT instruction operates on each 128-bit segment as a lane, so a
895 // vector_shuffle is required firstly.
896 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
897 SDValue Res = DAG.getVectorShuffle(WideOpVT, DL, VFCVTOp,
898 DAG.getUNDEF(WideOpVT), Mask);
899 Res = DAG.getNode(Opcode, DL, VT, Res);
900 return Res;
901 }
902
903 return SDValue();
904}
905
906SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
907 SelectionDAG &DAG) const {
908 EVT VT = Op.getValueType();
909 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
910 const APFloat &FPVal = CFP->getValueAPF();
911 SDLoc DL(CFP);
912
913 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
914 (VT == MVT::f64 && Subtarget.hasBasicD()));
915
916 // If value is 0.0 or -0.0, just ignore it.
917 if (FPVal.isZero())
918 return SDValue();
919
920 // If lsx enabled, use cheaper 'vldi' instruction if possible.
921 if (isFPImmVLDILegal(FPVal, VT))
922 return SDValue();
923
924 // Construct as integer, and move to float register.
925 APInt INTVal = FPVal.bitcastToAPInt();
926
927 // If more than MaterializeFPImmInsNum instructions will be used to
928 // generate the INTVal and move it to float register, fallback to
929 // use floating point load from the constant pool.
931 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
932 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
933 return SDValue();
934
935 switch (VT.getSimpleVT().SimpleTy) {
936 default:
937 llvm_unreachable("Unexpected floating point type!");
938 break;
939 case MVT::f32: {
940 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
941 if (Subtarget.is64Bit())
942 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
943 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
944 : LoongArchISD::MOVGR2FR_W,
945 DL, VT, NewVal);
946 }
947 case MVT::f64: {
948 if (Subtarget.is64Bit()) {
949 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
950 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
951 }
952 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
953 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
954 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
955 }
956 }
957
958 return SDValue();
959}
960
961// Ensure SETCC result and operand have the same bit width; isel does not
962// support mismatched widths.
963SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
964 SelectionDAG &DAG) const {
965 SDLoc DL(Op);
966 EVT ResultVT = Op.getValueType();
967 EVT OperandVT = Op.getOperand(0).getValueType();
968
969 EVT SetCCResultVT =
970 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
971
972 if (ResultVT == SetCCResultVT)
973 return Op;
974
975 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
976 "SETCC operands must have the same type!");
977
978 SDValue SetCCNode =
979 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
980 Op.getOperand(1), Op.getOperand(2));
981
982 if (ResultVT.bitsGT(SetCCResultVT))
983 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
984 else if (ResultVT.bitsLT(SetCCResultVT))
985 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
986
987 return SetCCNode;
988}
989
990// Lower sext_invec using vslti instructions.
991// For example:
992// %b = sext <4 x i16> %a to <4 x i32>
993// can be lowered to:
994// VSLTI_H vr2, vr1, 0
995// VILVL.H vr1, vr2, vr1
996SDValue LoongArchTargetLowering::lowerSIGN_EXTEND_VECTOR_INREG(
997 SDValue Op, SelectionDAG &DAG) const {
998 SDLoc DL(Op);
999 SDValue Src = Op.getOperand(0);
1000 MVT SrcVT = Src.getSimpleValueType();
1001 MVT DstVT = Op.getSimpleValueType();
1002
1003 if (!SrcVT.is128BitVector())
1004 return SDValue();
1005
1006 // lower to VSLTI + VILVL if extend could be done in single step.
1007 if (DstVT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits() == 2) {
1008 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1009 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Src, Zero,
1010 DAG.getCondCode(ISD::SETLT));
1011 SDValue LoInterleaved =
1012 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Src);
1013
1014 return DAG.getBitcast(DstVT, LoInterleaved);
1015 }
1016
1017 return SDValue();
1018}
1019
1020// Lower vecreduce_add using vhaddw instructions.
1021// For Example:
1022// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
1023// can be lowered to:
1024// VHADDW_D_W vr0, vr0, vr0
1025// VHADDW_Q_D vr0, vr0, vr0
1026// VPICKVE2GR_D a0, vr0, 0
1027// ADDI_W a0, a0, 0
1028SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
1029 SelectionDAG &DAG) const {
1030
1031 SDLoc DL(Op);
1032 MVT OpVT = Op.getSimpleValueType();
1033 SDValue Val = Op.getOperand(0);
1034
1035 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1036 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1037 unsigned ResBits = OpVT.getScalarSizeInBits();
1038
1039 unsigned LegalVecSize = 128;
1040 bool isLASX256Vector =
1041 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
1042
1043 // Ensure operand type legal or enable it legal.
1044 while (!isTypeLegal(Val.getSimpleValueType())) {
1045 Val = DAG.WidenVector(Val, DL);
1046 }
1047
1048 // NumEles is designed for iterations count, v4i32 for LSX
1049 // and v8i32 for LASX should have the same count.
1050 if (isLASX256Vector) {
1051 NumEles /= 2;
1052 LegalVecSize = 256;
1053 }
1054
1055 EleBits *= 2;
1056 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
1057 EleBits = std::min(EleBits, 64u);
1058 MVT IntTy = MVT::getIntegerVT(EleBits);
1059 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
1060 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
1061 }
1062
1063 if (isLASX256Vector) {
1064 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
1065 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
1066 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
1067 }
1068
1069 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
1070 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1071 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
1072}
1073
1074// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
1075// For Example:
1076// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
1077// can be lowered to:
1078// VBSRL_V vr1, vr0, 8
1079// VMAX_W vr0, vr1, vr0
1080// VBSRL_V vr1, vr0, 4
1081// VMAX_W vr0, vr1, vr0
1082// VPICKVE2GR_W a0, vr0, 0
1083// For 256 bit vector, it is illegal and will be spilt into
1084// two 128 bit vector by default then processed by this.
1085SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
1086 SelectionDAG &DAG) const {
1087 SDLoc DL(Op);
1088
1089 MVT OpVT = Op.getSimpleValueType();
1090 SDValue Val = Op.getOperand(0);
1091
1092 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1093 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1094
1095 // Ensure operand type legal or enable it legal.
1096 while (!isTypeLegal(Val.getSimpleValueType())) {
1097 Val = DAG.WidenVector(Val, DL);
1098 }
1099
1100 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
1101 MVT VecTy = Val.getSimpleValueType();
1102 MVT GRLenVT = Subtarget.getGRLenVT();
1103
1104 for (int i = NumEles; i > 1; i /= 2) {
1105 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
1106 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
1107 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
1108 }
1109
1110 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1111 DAG.getConstant(0, DL, GRLenVT));
1112}
1113
1114SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
1115 SelectionDAG &DAG) const {
1116 unsigned IsData = Op.getConstantOperandVal(4);
1117
1118 // We don't support non-data prefetch.
1119 // Just preserve the chain.
1120 if (!IsData)
1121 return Op.getOperand(0);
1122
1123 return Op;
1124}
1125
1126SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
1127 SelectionDAG &DAG) const {
1128 MVT VT = Op.getSimpleValueType();
1129 assert(VT.isVector() && "Unexpected type");
1130
1131 SDLoc DL(Op);
1132 SDValue R = Op.getOperand(0);
1133 SDValue Amt = Op.getOperand(1);
1134 unsigned Opcode = Op.getOpcode();
1135 unsigned EltSizeInBits = VT.getScalarSizeInBits();
1136
1137 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
1138 if (V.getOpcode() != ISD::BUILD_VECTOR)
1139 return false;
1140 if (SDValue SplatValue =
1141 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
1142 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
1143 CstSplatValue = C->getAPIntValue();
1144 return true;
1145 }
1146 }
1147 return false;
1148 };
1149
1150 // Check for constant splat rotation amount.
1151 APInt CstSplatValue;
1152 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
1153 bool isROTL = Opcode == ISD::ROTL;
1154
1155 // Check for splat rotate by zero.
1156 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
1157 return R;
1158
1159 // LoongArch targets always prefer ISD::ROTR.
1160 if (isROTL) {
1161 SDValue Zero = DAG.getConstant(0, DL, VT);
1162 return DAG.getNode(ISD::ROTR, DL, VT, R,
1163 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
1164 }
1165
1166 // Rotate by a immediate.
1167 if (IsCstSplat) {
1168 // ISD::ROTR: Attemp to rotate by a positive immediate.
1169 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
1170 if (SDValue Urem =
1171 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
1172 return DAG.getNode(Opcode, DL, VT, R, Urem);
1173 }
1174
1175 return Op;
1176}
1177
1178// Return true if Val is equal to (setcc LHS, RHS, CC).
1179// Return false if Val is the inverse of (setcc LHS, RHS, CC).
1180// Otherwise, return std::nullopt.
1181static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
1182 ISD::CondCode CC, SDValue Val) {
1183 assert(Val->getOpcode() == ISD::SETCC);
1184 SDValue LHS2 = Val.getOperand(0);
1185 SDValue RHS2 = Val.getOperand(1);
1186 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
1187
1188 if (LHS == LHS2 && RHS == RHS2) {
1189 if (CC == CC2)
1190 return true;
1191 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1192 return false;
1193 } else if (LHS == RHS2 && RHS == LHS2) {
1195 if (CC == CC2)
1196 return true;
1197 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1198 return false;
1199 }
1200
1201 return std::nullopt;
1202}
1203
1205 const LoongArchSubtarget &Subtarget) {
1206 SDValue CondV = N->getOperand(0);
1207 SDValue TrueV = N->getOperand(1);
1208 SDValue FalseV = N->getOperand(2);
1209 MVT VT = N->getSimpleValueType(0);
1210 SDLoc DL(N);
1211
1212 // (select c, -1, y) -> -c | y
1213 if (isAllOnesConstant(TrueV)) {
1214 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1215 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
1216 }
1217 // (select c, y, -1) -> (c-1) | y
1218 if (isAllOnesConstant(FalseV)) {
1219 SDValue Neg =
1220 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1221 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
1222 }
1223
1224 // (select c, 0, y) -> (c-1) & y
1225 if (isNullConstant(TrueV)) {
1226 SDValue Neg =
1227 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1228 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
1229 }
1230 // (select c, y, 0) -> -c & y
1231 if (isNullConstant(FalseV)) {
1232 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1233 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
1234 }
1235
1236 // select c, ~x, x --> xor -c, x
1237 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
1238 const APInt &TrueVal = TrueV->getAsAPIntVal();
1239 const APInt &FalseVal = FalseV->getAsAPIntVal();
1240 if (~TrueVal == FalseVal) {
1241 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1242 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
1243 }
1244 }
1245
1246 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1247 // when both truev and falsev are also setcc.
1248 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1249 FalseV.getOpcode() == ISD::SETCC) {
1250 SDValue LHS = CondV.getOperand(0);
1251 SDValue RHS = CondV.getOperand(1);
1252 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1253
1254 // (select x, x, y) -> x | y
1255 // (select !x, x, y) -> x & y
1256 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1257 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1258 DAG.getFreeze(FalseV));
1259 }
1260 // (select x, y, x) -> x & y
1261 // (select !x, y, x) -> x | y
1262 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1263 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1264 DAG.getFreeze(TrueV), FalseV);
1265 }
1266 }
1267
1268 return SDValue();
1269}
1270
1271// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1272// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1273// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1274// being `0` or `-1`. In such cases we can replace `select` with `and`.
1275// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1276// than `c0`?
1277static SDValue
1279 const LoongArchSubtarget &Subtarget) {
1280 unsigned SelOpNo = 0;
1281 SDValue Sel = BO->getOperand(0);
1282 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1283 SelOpNo = 1;
1284 Sel = BO->getOperand(1);
1285 }
1286
1287 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1288 return SDValue();
1289
1290 unsigned ConstSelOpNo = 1;
1291 unsigned OtherSelOpNo = 2;
1292 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1293 ConstSelOpNo = 2;
1294 OtherSelOpNo = 1;
1295 }
1296 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1297 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1298 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1299 return SDValue();
1300
1301 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1302 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1303 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1304 return SDValue();
1305
1306 SDLoc DL(Sel);
1307 EVT VT = BO->getValueType(0);
1308
1309 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1310 if (SelOpNo == 1)
1311 std::swap(NewConstOps[0], NewConstOps[1]);
1312
1313 SDValue NewConstOp =
1314 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1315 if (!NewConstOp)
1316 return SDValue();
1317
1318 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1319 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1320 return SDValue();
1321
1322 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1323 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1324 if (SelOpNo == 1)
1325 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1326 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1327
1328 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1329 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1330 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1331}
1332
1333// Changes the condition code and swaps operands if necessary, so the SetCC
1334// operation matches one of the comparisons supported directly by branches
1335// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1336// compare with 1/-1.
1338 ISD::CondCode &CC, SelectionDAG &DAG) {
1339 // If this is a single bit test that can't be handled by ANDI, shift the
1340 // bit to be tested to the MSB and perform a signed compare with 0.
1341 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1342 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1343 isa<ConstantSDNode>(LHS.getOperand(1))) {
1344 uint64_t Mask = LHS.getConstantOperandVal(1);
1345 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1346 unsigned ShAmt = 0;
1347 if (isPowerOf2_64(Mask)) {
1348 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1349 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1350 } else {
1351 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1352 }
1353
1354 LHS = LHS.getOperand(0);
1355 if (ShAmt != 0)
1356 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1357 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1358 return;
1359 }
1360 }
1361
1362 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1363 int64_t C = RHSC->getSExtValue();
1364 switch (CC) {
1365 default:
1366 break;
1367 case ISD::SETGT:
1368 // Convert X > -1 to X >= 0.
1369 if (C == -1) {
1370 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1371 CC = ISD::SETGE;
1372 return;
1373 }
1374 break;
1375 case ISD::SETLT:
1376 // Convert X < 1 to 0 >= X.
1377 if (C == 1) {
1378 RHS = LHS;
1379 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1380 CC = ISD::SETGE;
1381 return;
1382 }
1383 break;
1384 }
1385 }
1386
1387 switch (CC) {
1388 default:
1389 break;
1390 case ISD::SETGT:
1391 case ISD::SETLE:
1392 case ISD::SETUGT:
1393 case ISD::SETULE:
1395 std::swap(LHS, RHS);
1396 break;
1397 }
1398}
1399
1400SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1401 SelectionDAG &DAG) const {
1402 SDValue CondV = Op.getOperand(0);
1403 SDValue TrueV = Op.getOperand(1);
1404 SDValue FalseV = Op.getOperand(2);
1405 SDLoc DL(Op);
1406 MVT VT = Op.getSimpleValueType();
1407 MVT GRLenVT = Subtarget.getGRLenVT();
1408
1409 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1410 return V;
1411
1412 if (Op.hasOneUse()) {
1413 unsigned UseOpc = Op->user_begin()->getOpcode();
1414 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1415 SDNode *BinOp = *Op->user_begin();
1416 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1417 DAG, Subtarget)) {
1418 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1419 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1420 // may return a constant node and cause crash in lowerSELECT.
1421 if (NewSel.getOpcode() == ISD::SELECT)
1422 return lowerSELECT(NewSel, DAG);
1423 return NewSel;
1424 }
1425 }
1426 }
1427
1428 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1429 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1430 // (select condv, truev, falsev)
1431 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1432 if (CondV.getOpcode() != ISD::SETCC ||
1433 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1434 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1435 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1436
1437 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1438
1439 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1440 }
1441
1442 // If the CondV is the output of a SETCC node which operates on GRLenVT
1443 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1444 // to take advantage of the integer compare+branch instructions. i.e.: (select
1445 // (setcc lhs, rhs, cc), truev, falsev)
1446 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1447 SDValue LHS = CondV.getOperand(0);
1448 SDValue RHS = CondV.getOperand(1);
1449 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1450
1451 // Special case for a select of 2 constants that have a difference of 1.
1452 // Normally this is done by DAGCombine, but if the select is introduced by
1453 // type legalization or op legalization, we miss it. Restricting to SETLT
1454 // case for now because that is what signed saturating add/sub need.
1455 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1456 // but we would probably want to swap the true/false values if the condition
1457 // is SETGE/SETLE to avoid an XORI.
1458 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1459 CCVal == ISD::SETLT) {
1460 const APInt &TrueVal = TrueV->getAsAPIntVal();
1461 const APInt &FalseVal = FalseV->getAsAPIntVal();
1462 if (TrueVal - 1 == FalseVal)
1463 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1464 if (TrueVal + 1 == FalseVal)
1465 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1466 }
1467
1468 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1469 // 1 < x ? x : 1 -> 0 < x ? x : 1
1470 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1471 RHS == TrueV && LHS == FalseV) {
1472 LHS = DAG.getConstant(0, DL, VT);
1473 // 0 <u x is the same as x != 0.
1474 if (CCVal == ISD::SETULT) {
1475 std::swap(LHS, RHS);
1476 CCVal = ISD::SETNE;
1477 }
1478 }
1479
1480 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1481 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1482 RHS == FalseV) {
1483 RHS = DAG.getConstant(0, DL, VT);
1484 }
1485
1486 SDValue TargetCC = DAG.getCondCode(CCVal);
1487
1488 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1489 // (select (setcc lhs, rhs, CC), constant, falsev)
1490 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1491 std::swap(TrueV, FalseV);
1492 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1493 }
1494
1495 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1496 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1497}
1498
1499SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1500 SelectionDAG &DAG) const {
1501 SDValue CondV = Op.getOperand(1);
1502 SDLoc DL(Op);
1503 MVT GRLenVT = Subtarget.getGRLenVT();
1504
1505 if (CondV.getOpcode() == ISD::SETCC) {
1506 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1507 SDValue LHS = CondV.getOperand(0);
1508 SDValue RHS = CondV.getOperand(1);
1509 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1510
1511 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1512
1513 SDValue TargetCC = DAG.getCondCode(CCVal);
1514 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1515 Op.getOperand(0), LHS, RHS, TargetCC,
1516 Op.getOperand(2));
1517 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1518 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1519 Op.getOperand(0), CondV, Op.getOperand(2));
1520 }
1521 }
1522
1523 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1524 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1525 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1526}
1527
1528SDValue
1529LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1530 SelectionDAG &DAG) const {
1531 SDLoc DL(Op);
1532 MVT OpVT = Op.getSimpleValueType();
1533
1534 SDValue Vector = DAG.getUNDEF(OpVT);
1535 SDValue Val = Op.getOperand(0);
1536 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1537
1538 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1539}
1540
1541SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1542 SelectionDAG &DAG) const {
1543 EVT ResTy = Op->getValueType(0);
1544 SDValue Src = Op->getOperand(0);
1545 SDLoc DL(Op);
1546
1547 // LoongArchISD::BITREV_8B is not supported on LA32.
1548 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1549 return SDValue();
1550
1551 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1552 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1553 unsigned int NewEltNum = NewVT.getVectorNumElements();
1554
1555 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1556
1558 for (unsigned int i = 0; i < NewEltNum; i++) {
1559 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1560 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1561 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1562 ? (unsigned)LoongArchISD::BITREV_8B
1563 : (unsigned)ISD::BITREVERSE;
1564 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1565 }
1566 SDValue Res =
1567 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1568
1569 switch (ResTy.getSimpleVT().SimpleTy) {
1570 default:
1571 return SDValue();
1572 case MVT::v16i8:
1573 case MVT::v32i8:
1574 return Res;
1575 case MVT::v8i16:
1576 case MVT::v16i16:
1577 case MVT::v4i32:
1578 case MVT::v8i32: {
1580 for (unsigned int i = 0; i < NewEltNum; i++)
1581 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1582 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1583 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1584 }
1585 }
1586}
1587
1588// Widen element type to get a new mask value (if possible).
1589// For example:
1590// shufflevector <4 x i32> %a, <4 x i32> %b,
1591// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1592// is equivalent to:
1593// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1594// can be lowered to:
1595// VPACKOD_D vr0, vr0, vr1
1597 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1598 unsigned EltBits = VT.getScalarSizeInBits();
1599
1600 if (EltBits > 32 || EltBits == 1)
1601 return SDValue();
1602
1603 SmallVector<int, 8> NewMask;
1604 if (widenShuffleMaskElts(Mask, NewMask)) {
1605 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1606 : MVT::getIntegerVT(EltBits * 2);
1607 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1608 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1609 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1610 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1611 return DAG.getBitcast(
1612 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1613 }
1614 }
1615
1616 return SDValue();
1617}
1618
1619/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1620/// instruction.
1621// The funciton matches elements from one of the input vector shuffled to the
1622// left or right with zeroable elements 'shifted in'. It handles both the
1623// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1624// lane.
1625// Mostly copied from X86.
1626static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1627 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1628 int MaskOffset, const APInt &Zeroable) {
1629 int Size = Mask.size();
1630 unsigned SizeInBits = Size * ScalarSizeInBits;
1631
1632 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1633 for (int i = 0; i < Size; i += Scale)
1634 for (int j = 0; j < Shift; ++j)
1635 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1636 return false;
1637
1638 return true;
1639 };
1640
1641 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1642 int Step = 1) {
1643 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1644 if (!(Mask[i] == -1 || Mask[i] == Low))
1645 return false;
1646 return true;
1647 };
1648
1649 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1650 for (int i = 0; i != Size; i += Scale) {
1651 unsigned Pos = Left ? i + Shift : i;
1652 unsigned Low = Left ? i : i + Shift;
1653 unsigned Len = Scale - Shift;
1654 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1655 return -1;
1656 }
1657
1658 int ShiftEltBits = ScalarSizeInBits * Scale;
1659 bool ByteShift = ShiftEltBits > 64;
1660 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1661 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1662 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1663
1664 // Normalize the scale for byte shifts to still produce an i64 element
1665 // type.
1666 Scale = ByteShift ? Scale / 2 : Scale;
1667
1668 // We need to round trip through the appropriate type for the shift.
1669 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1670 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1671 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1672 return (int)ShiftAmt;
1673 };
1674
1675 unsigned MaxWidth = 128;
1676 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1677 for (int Shift = 1; Shift != Scale; ++Shift)
1678 for (bool Left : {true, false})
1679 if (CheckZeros(Shift, Scale, Left)) {
1680 int ShiftAmt = MatchShift(Shift, Scale, Left);
1681 if (0 < ShiftAmt)
1682 return ShiftAmt;
1683 }
1684
1685 // no match
1686 return -1;
1687}
1688
1689/// Lower VECTOR_SHUFFLE as shift (if possible).
1690///
1691/// For example:
1692/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1693/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1694/// is lowered to:
1695/// (VBSLL_V $v0, $v0, 4)
1696///
1697/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1698/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1699/// is lowered to:
1700/// (VSLLI_D $v0, $v0, 32)
1702 MVT VT, SDValue V1, SDValue V2,
1703 SelectionDAG &DAG,
1704 const LoongArchSubtarget &Subtarget,
1705 const APInt &Zeroable) {
1706 int Size = Mask.size();
1707 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1708
1709 MVT ShiftVT;
1710 SDValue V = V1;
1711 unsigned Opcode;
1712
1713 // Try to match shuffle against V1 shift.
1714 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1715 Mask, 0, Zeroable);
1716
1717 // If V1 failed, try to match shuffle against V2 shift.
1718 if (ShiftAmt < 0) {
1719 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1720 Mask, Size, Zeroable);
1721 V = V2;
1722 }
1723
1724 if (ShiftAmt < 0)
1725 return SDValue();
1726
1727 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1728 "Illegal integer vector type");
1729 V = DAG.getBitcast(ShiftVT, V);
1730 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1731 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1732 return DAG.getBitcast(VT, V);
1733}
1734
1735/// Determine whether a range fits a regular pattern of values.
1736/// This function accounts for the possibility of jumping over the End iterator.
1737template <typename ValType>
1738static bool
1740 unsigned CheckStride,
1742 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1743 auto &I = Begin;
1744
1745 while (I != End) {
1746 if (*I != -1 && *I != ExpectedIndex)
1747 return false;
1748 ExpectedIndex += ExpectedIndexStride;
1749
1750 // Incrementing past End is undefined behaviour so we must increment one
1751 // step at a time and check for End at each step.
1752 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1753 ; // Empty loop body.
1754 }
1755 return true;
1756}
1757
1758/// Compute whether each element of a shuffle is zeroable.
1759///
1760/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1762 SDValue V2, APInt &KnownUndef,
1763 APInt &KnownZero) {
1764 int Size = Mask.size();
1765 KnownUndef = KnownZero = APInt::getZero(Size);
1766
1768 V2 = peekThroughBitcasts(V2);
1769
1770 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1771 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1772
1773 int VectorSizeInBits = V1.getValueSizeInBits();
1774 int ScalarSizeInBits = VectorSizeInBits / Size;
1775 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1776 (void)ScalarSizeInBits;
1777
1778 for (int i = 0; i < Size; ++i) {
1779 int M = Mask[i];
1780 if (M < 0) {
1781 KnownUndef.setBit(i);
1782 continue;
1783 }
1784 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1785 KnownZero.setBit(i);
1786 continue;
1787 }
1788 }
1789}
1790
1791/// Test whether a shuffle mask is equivalent within each sub-lane.
1792///
1793/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1794/// non-trivial to compute in the face of undef lanes. The representation is
1795/// suitable for use with existing 128-bit shuffles as entries from the second
1796/// vector have been remapped to [LaneSize, 2*LaneSize).
1797static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1798 ArrayRef<int> Mask,
1799 SmallVectorImpl<int> &RepeatedMask) {
1800 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1801 RepeatedMask.assign(LaneSize, -1);
1802 int Size = Mask.size();
1803 for (int i = 0; i < Size; ++i) {
1804 assert(Mask[i] == -1 || Mask[i] >= 0);
1805 if (Mask[i] < 0)
1806 continue;
1807 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1808 // This entry crosses lanes, so there is no way to model this shuffle.
1809 return false;
1810
1811 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1812 // Adjust second vector indices to start at LaneSize instead of Size.
1813 int LocalM =
1814 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1815 if (RepeatedMask[i % LaneSize] < 0)
1816 // This is the first non-undef entry in this slot of a 128-bit lane.
1817 RepeatedMask[i % LaneSize] = LocalM;
1818 else if (RepeatedMask[i % LaneSize] != LocalM)
1819 // Found a mismatch with the repeated mask.
1820 return false;
1821 }
1822 return true;
1823}
1824
1825/// Attempts to match vector shuffle as byte rotation.
1827 ArrayRef<int> Mask) {
1828
1829 SDValue Lo, Hi;
1830 SmallVector<int, 16> RepeatedMask;
1831
1832 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1833 return -1;
1834
1835 int NumElts = RepeatedMask.size();
1836 int Rotation = 0;
1837 int Scale = 16 / NumElts;
1838
1839 for (int i = 0; i < NumElts; ++i) {
1840 int M = RepeatedMask[i];
1841 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1842 "Unexpected mask index.");
1843 if (M < 0)
1844 continue;
1845
1846 // Determine where a rotated vector would have started.
1847 int StartIdx = i - (M % NumElts);
1848 if (StartIdx == 0)
1849 return -1;
1850
1851 // If we found the tail of a vector the rotation must be the missing
1852 // front. If we found the head of a vector, it must be how much of the
1853 // head.
1854 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1855
1856 if (Rotation == 0)
1857 Rotation = CandidateRotation;
1858 else if (Rotation != CandidateRotation)
1859 return -1;
1860
1861 // Compute which value this mask is pointing at.
1862 SDValue MaskV = M < NumElts ? V1 : V2;
1863
1864 // Compute which of the two target values this index should be assigned
1865 // to. This reflects whether the high elements are remaining or the low
1866 // elements are remaining.
1867 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1868
1869 // Either set up this value if we've not encountered it before, or check
1870 // that it remains consistent.
1871 if (!TargetV)
1872 TargetV = MaskV;
1873 else if (TargetV != MaskV)
1874 return -1;
1875 }
1876
1877 // Check that we successfully analyzed the mask, and normalize the results.
1878 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1879 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1880 if (!Lo)
1881 Lo = Hi;
1882 else if (!Hi)
1883 Hi = Lo;
1884
1885 V1 = Lo;
1886 V2 = Hi;
1887
1888 return Rotation * Scale;
1889}
1890
1891/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1892///
1893/// For example:
1894/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1895/// <2 x i32> <i32 3, i32 0>
1896/// is lowered to:
1897/// (VBSRL_V $v1, $v1, 8)
1898/// (VBSLL_V $v0, $v0, 8)
1899/// (VOR_V $v0, $V0, $v1)
1900static SDValue
1902 SDValue V1, SDValue V2, SelectionDAG &DAG,
1903 const LoongArchSubtarget &Subtarget) {
1904
1905 SDValue Lo = V1, Hi = V2;
1906 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1907 if (ByteRotation <= 0)
1908 return SDValue();
1909
1910 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1911 Lo = DAG.getBitcast(ByteVT, Lo);
1912 Hi = DAG.getBitcast(ByteVT, Hi);
1913
1914 int LoByteShift = 16 - ByteRotation;
1915 int HiByteShift = ByteRotation;
1916 MVT GRLenVT = Subtarget.getGRLenVT();
1917
1918 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1919 DAG.getConstant(LoByteShift, DL, GRLenVT));
1920 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1921 DAG.getConstant(HiByteShift, DL, GRLenVT));
1922 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1923}
1924
1925/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1926///
1927/// For example:
1928/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1929/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1930/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1931/// is lowered to:
1932/// (VREPLI $v1, 0)
1933/// (VILVL $v0, $v1, $v0)
1935 ArrayRef<int> Mask, MVT VT,
1936 SDValue V1, SDValue V2,
1937 SelectionDAG &DAG,
1938 const APInt &Zeroable) {
1939 int Bits = VT.getSizeInBits();
1940 int EltBits = VT.getScalarSizeInBits();
1941 int NumElements = VT.getVectorNumElements();
1942
1943 if (Zeroable.isAllOnes())
1944 return DAG.getConstant(0, DL, VT);
1945
1946 // Define a helper function to check a particular ext-scale and lower to it if
1947 // valid.
1948 auto Lower = [&](int Scale) -> SDValue {
1949 SDValue InputV;
1950 bool AnyExt = true;
1951 int Offset = 0;
1952 for (int i = 0; i < NumElements; i++) {
1953 int M = Mask[i];
1954 if (M < 0)
1955 continue;
1956 if (i % Scale != 0) {
1957 // Each of the extended elements need to be zeroable.
1958 if (!Zeroable[i])
1959 return SDValue();
1960
1961 AnyExt = false;
1962 continue;
1963 }
1964
1965 // Each of the base elements needs to be consecutive indices into the
1966 // same input vector.
1967 SDValue V = M < NumElements ? V1 : V2;
1968 M = M % NumElements;
1969 if (!InputV) {
1970 InputV = V;
1971 Offset = M - (i / Scale);
1972
1973 // These offset can't be handled
1974 if (Offset % (NumElements / Scale))
1975 return SDValue();
1976 } else if (InputV != V)
1977 return SDValue();
1978
1979 if (M != (Offset + (i / Scale)))
1980 return SDValue(); // Non-consecutive strided elements.
1981 }
1982
1983 // If we fail to find an input, we have a zero-shuffle which should always
1984 // have already been handled.
1985 if (!InputV)
1986 return SDValue();
1987
1988 do {
1989 unsigned VilVLoHi = LoongArchISD::VILVL;
1990 if (Offset >= (NumElements / 2)) {
1991 VilVLoHi = LoongArchISD::VILVH;
1992 Offset -= (NumElements / 2);
1993 }
1994
1995 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1996 SDValue Ext =
1997 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1998 InputV = DAG.getBitcast(InputVT, InputV);
1999 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
2000 Scale /= 2;
2001 EltBits *= 2;
2002 NumElements /= 2;
2003 } while (Scale > 1);
2004 return DAG.getBitcast(VT, InputV);
2005 };
2006
2007 // Each iteration, try extending the elements half as much, but into twice as
2008 // many elements.
2009 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
2010 NumExtElements *= 2) {
2011 if (SDValue V = Lower(NumElements / NumExtElements))
2012 return V;
2013 }
2014 return SDValue();
2015}
2016
2017/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
2018///
2019/// VREPLVEI performs vector broadcast based on an element specified by an
2020/// integer immediate, with its mask being similar to:
2021/// <x, x, x, ...>
2022/// where x is any valid index.
2023///
2024/// When undef's appear in the mask they are treated as if they were whatever
2025/// value is necessary in order to fit the above form.
2026static SDValue
2028 SDValue V1, SelectionDAG &DAG,
2029 const LoongArchSubtarget &Subtarget) {
2030 int SplatIndex = -1;
2031 for (const auto &M : Mask) {
2032 if (M != -1) {
2033 SplatIndex = M;
2034 break;
2035 }
2036 }
2037
2038 if (SplatIndex == -1)
2039 return DAG.getUNDEF(VT);
2040
2041 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2042 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
2043 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2044 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2045 }
2046
2047 return SDValue();
2048}
2049
2050/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
2051///
2052/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
2053/// elements according to a <4 x i2> constant (encoded as an integer immediate).
2054///
2055/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
2056/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2057/// When undef's appear they are treated as if they were whatever value is
2058/// necessary in order to fit the above forms.
2059///
2060/// For example:
2061/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2062/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2063/// i32 7, i32 6, i32 5, i32 4>
2064/// is lowered to:
2065/// (VSHUF4I_H $v0, $v1, 27)
2066/// where the 27 comes from:
2067/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2068static SDValue
2070 SDValue V1, SDValue V2, SelectionDAG &DAG,
2071 const LoongArchSubtarget &Subtarget) {
2072
2073 unsigned SubVecSize = 4;
2074 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2075 SubVecSize = 2;
2076
2077 int SubMask[4] = {-1, -1, -1, -1};
2078 for (unsigned i = 0; i < SubVecSize; ++i) {
2079 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
2080 int M = Mask[j];
2081
2082 // Convert from vector index to 4-element subvector index
2083 // If an index refers to an element outside of the subvector then give up
2084 if (M != -1) {
2085 M -= 4 * (j / SubVecSize);
2086 if (M < 0 || M >= 4)
2087 return SDValue();
2088 }
2089
2090 // If the mask has an undef, replace it with the current index.
2091 // Note that it might still be undef if the current index is also undef
2092 if (SubMask[i] == -1)
2093 SubMask[i] = M;
2094 // Check that non-undef values are the same as in the mask. If they
2095 // aren't then give up
2096 else if (M != -1 && M != SubMask[i])
2097 return SDValue();
2098 }
2099 }
2100
2101 // Calculate the immediate. Replace any remaining undefs with zero
2102 int Imm = 0;
2103 for (int i = SubVecSize - 1; i >= 0; --i) {
2104 int M = SubMask[i];
2105
2106 if (M == -1)
2107 M = 0;
2108
2109 Imm <<= 2;
2110 Imm |= M & 0x3;
2111 }
2112
2113 MVT GRLenVT = Subtarget.getGRLenVT();
2114
2115 // Return vshuf4i.d
2116 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2117 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2118 DAG.getConstant(Imm, DL, GRLenVT));
2119
2120 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
2121 DAG.getConstant(Imm, DL, GRLenVT));
2122}
2123
2124/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
2125///
2126/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
2127/// reverse whose mask likes:
2128/// <7, 6, 5, 4, 3, 2, 1, 0>
2129///
2130/// When undef's appear in the mask they are treated as if they were whatever
2131/// value is necessary in order to fit the above forms.
2132static SDValue
2134 SDValue V1, SelectionDAG &DAG,
2135 const LoongArchSubtarget &Subtarget) {
2136 // Only vectors with i8/i16 elements which cannot match other patterns
2137 // directly needs to do this.
2138 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
2139 VT != MVT::v16i16)
2140 return SDValue();
2141
2142 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
2143 return SDValue();
2144
2145 int WidenNumElts = VT.getVectorNumElements() / 4;
2146 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
2147 for (int i = 0; i < WidenNumElts; ++i)
2148 WidenMask[i] = WidenNumElts - 1 - i;
2149
2150 MVT WidenVT = MVT::getVectorVT(
2151 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
2152 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
2153 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
2154 DAG.getUNDEF(WidenVT), WidenMask);
2155
2156 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
2157 DAG.getBitcast(VT, WidenRev),
2158 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
2159}
2160
2161/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
2162///
2163/// VPACKEV interleaves the even elements from each vector.
2164///
2165/// It is possible to lower into VPACKEV when the mask consists of two of the
2166/// following forms interleaved:
2167/// <0, 2, 4, ...>
2168/// <n, n+2, n+4, ...>
2169/// where n is the number of elements in the vector.
2170/// For example:
2171/// <0, 0, 2, 2, 4, 4, ...>
2172/// <0, n, 2, n+2, 4, n+4, ...>
2173///
2174/// When undef's appear in the mask they are treated as if they were whatever
2175/// value is necessary in order to fit the above forms.
2177 MVT VT, SDValue V1, SDValue V2,
2178 SelectionDAG &DAG) {
2179
2180 const auto &Begin = Mask.begin();
2181 const auto &End = Mask.end();
2182 SDValue OriV1 = V1, OriV2 = V2;
2183
2184 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2185 V1 = OriV1;
2186 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
2187 V1 = OriV2;
2188 else
2189 return SDValue();
2190
2191 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2192 V2 = OriV1;
2193 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
2194 V2 = OriV2;
2195 else
2196 return SDValue();
2197
2198 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
2199}
2200
2201/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
2202///
2203/// VPACKOD interleaves the odd elements from each vector.
2204///
2205/// It is possible to lower into VPACKOD when the mask consists of two of the
2206/// following forms interleaved:
2207/// <1, 3, 5, ...>
2208/// <n+1, n+3, n+5, ...>
2209/// where n is the number of elements in the vector.
2210/// For example:
2211/// <1, 1, 3, 3, 5, 5, ...>
2212/// <1, n+1, 3, n+3, 5, n+5, ...>
2213///
2214/// When undef's appear in the mask they are treated as if they were whatever
2215/// value is necessary in order to fit the above forms.
2217 MVT VT, SDValue V1, SDValue V2,
2218 SelectionDAG &DAG) {
2219
2220 const auto &Begin = Mask.begin();
2221 const auto &End = Mask.end();
2222 SDValue OriV1 = V1, OriV2 = V2;
2223
2224 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2225 V1 = OriV1;
2226 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
2227 V1 = OriV2;
2228 else
2229 return SDValue();
2230
2231 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2232 V2 = OriV1;
2233 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
2234 V2 = OriV2;
2235 else
2236 return SDValue();
2237
2238 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
2239}
2240
2241/// Lower VECTOR_SHUFFLE into VILVH (if possible).
2242///
2243/// VILVH interleaves consecutive elements from the left (highest-indexed) half
2244/// of each vector.
2245///
2246/// It is possible to lower into VILVH when the mask consists of two of the
2247/// following forms interleaved:
2248/// <x, x+1, x+2, ...>
2249/// <n+x, n+x+1, n+x+2, ...>
2250/// where n is the number of elements in the vector and x is half n.
2251/// For example:
2252/// <x, x, x+1, x+1, x+2, x+2, ...>
2253/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2254///
2255/// When undef's appear in the mask they are treated as if they were whatever
2256/// value is necessary in order to fit the above forms.
2258 MVT VT, SDValue V1, SDValue V2,
2259 SelectionDAG &DAG) {
2260
2261 const auto &Begin = Mask.begin();
2262 const auto &End = Mask.end();
2263 unsigned HalfSize = Mask.size() / 2;
2264 SDValue OriV1 = V1, OriV2 = V2;
2265
2266 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2267 V1 = OriV1;
2268 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2269 V1 = OriV2;
2270 else
2271 return SDValue();
2272
2273 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2274 V2 = OriV1;
2275 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2276 1))
2277 V2 = OriV2;
2278 else
2279 return SDValue();
2280
2281 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2282}
2283
2284/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2285///
2286/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2287/// of each vector.
2288///
2289/// It is possible to lower into VILVL when the mask consists of two of the
2290/// following forms interleaved:
2291/// <0, 1, 2, ...>
2292/// <n, n+1, n+2, ...>
2293/// where n is the number of elements in the vector.
2294/// For example:
2295/// <0, 0, 1, 1, 2, 2, ...>
2296/// <0, n, 1, n+1, 2, n+2, ...>
2297///
2298/// When undef's appear in the mask they are treated as if they were whatever
2299/// value is necessary in order to fit the above forms.
2301 MVT VT, SDValue V1, SDValue V2,
2302 SelectionDAG &DAG) {
2303
2304 const auto &Begin = Mask.begin();
2305 const auto &End = Mask.end();
2306 SDValue OriV1 = V1, OriV2 = V2;
2307
2308 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2309 V1 = OriV1;
2310 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2311 V1 = OriV2;
2312 else
2313 return SDValue();
2314
2315 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2316 V2 = OriV1;
2317 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2318 V2 = OriV2;
2319 else
2320 return SDValue();
2321
2322 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2323}
2324
2325/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2326///
2327/// VPICKEV copies the even elements of each vector into the result vector.
2328///
2329/// It is possible to lower into VPICKEV when the mask consists of two of the
2330/// following forms concatenated:
2331/// <0, 2, 4, ...>
2332/// <n, n+2, n+4, ...>
2333/// where n is the number of elements in the vector.
2334/// For example:
2335/// <0, 2, 4, ..., 0, 2, 4, ...>
2336/// <0, 2, 4, ..., n, n+2, n+4, ...>
2337///
2338/// When undef's appear in the mask they are treated as if they were whatever
2339/// value is necessary in order to fit the above forms.
2341 MVT VT, SDValue V1, SDValue V2,
2342 SelectionDAG &DAG) {
2343
2344 const auto &Begin = Mask.begin();
2345 const auto &Mid = Mask.begin() + Mask.size() / 2;
2346 const auto &End = Mask.end();
2347 SDValue OriV1 = V1, OriV2 = V2;
2348
2349 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2350 V1 = OriV1;
2351 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2352 V1 = OriV2;
2353 else
2354 return SDValue();
2355
2356 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2357 V2 = OriV1;
2358 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2359 V2 = OriV2;
2360
2361 else
2362 return SDValue();
2363
2364 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2365}
2366
2367/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2368///
2369/// VPICKOD copies the odd elements of each vector into the result vector.
2370///
2371/// It is possible to lower into VPICKOD when the mask consists of two of the
2372/// following forms concatenated:
2373/// <1, 3, 5, ...>
2374/// <n+1, n+3, n+5, ...>
2375/// where n is the number of elements in the vector.
2376/// For example:
2377/// <1, 3, 5, ..., 1, 3, 5, ...>
2378/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2379///
2380/// When undef's appear in the mask they are treated as if they were whatever
2381/// value is necessary in order to fit the above forms.
2383 MVT VT, SDValue V1, SDValue V2,
2384 SelectionDAG &DAG) {
2385
2386 const auto &Begin = Mask.begin();
2387 const auto &Mid = Mask.begin() + Mask.size() / 2;
2388 const auto &End = Mask.end();
2389 SDValue OriV1 = V1, OriV2 = V2;
2390
2391 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2392 V1 = OriV1;
2393 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2394 V1 = OriV2;
2395 else
2396 return SDValue();
2397
2398 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2399 V2 = OriV1;
2400 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2401 V2 = OriV2;
2402 else
2403 return SDValue();
2404
2405 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2406}
2407
2408/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2409///
2410/// VEXTRINS copies one element of a vector into any place of the result
2411/// vector and makes no change to the rest elements of the result vector.
2412///
2413/// It is possible to lower into VEXTRINS when the mask takes the form:
2414/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2415/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2416/// where n is the number of elements in the vector and i is in [0, n).
2417/// For example:
2418/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2419/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2420///
2421/// When undef's appear in the mask they are treated as if they were whatever
2422/// value is necessary in order to fit the above forms.
2423static SDValue
2425 SDValue V1, SDValue V2, SelectionDAG &DAG,
2426 const LoongArchSubtarget &Subtarget) {
2427 unsigned NumElts = VT.getVectorNumElements();
2428 MVT EltVT = VT.getVectorElementType();
2429 MVT GRLenVT = Subtarget.getGRLenVT();
2430
2431 if (Mask.size() != NumElts)
2432 return SDValue();
2433
2434 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2435 int DiffCount = 0;
2436 int DiffPos = -1;
2437 for (unsigned i = 0; i < NumElts; ++i) {
2438 if (Mask[i] == -1)
2439 continue;
2440 if (Mask[i] != int(Base + i)) {
2441 ++DiffCount;
2442 DiffPos = int(i);
2443 if (DiffCount > 1)
2444 return SDValue();
2445 }
2446 }
2447
2448 // Need exactly one differing element to lower into VEXTRINS.
2449 if (DiffCount != 1)
2450 return SDValue();
2451
2452 // DiffMask must be in [0, 2N).
2453 int DiffMask = Mask[DiffPos];
2454 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2455 return SDValue();
2456
2457 // Determine source vector and source index.
2458 SDValue SrcVec;
2459 unsigned SrcIdx;
2460 if (unsigned(DiffMask) < NumElts) {
2461 SrcVec = V1;
2462 SrcIdx = unsigned(DiffMask);
2463 } else {
2464 SrcVec = V2;
2465 SrcIdx = unsigned(DiffMask) - NumElts;
2466 }
2467
2468 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2469 // patterns of VEXTRINS in tablegen.
2470 SDValue Extracted = DAG.getNode(
2471 ISD::EXTRACT_VECTOR_ELT, DL, EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2472 SrcVec, DAG.getConstant(SrcIdx, DL, GRLenVT));
2473 SDValue Result =
2474 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, (Base == 0) ? V1 : V2,
2475 Extracted, DAG.getConstant(DiffPos, DL, GRLenVT));
2476
2477 return Result;
2478 };
2479
2480 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2481 if (SDValue Result = tryLowerToExtrAndIns(0))
2482 return Result;
2483 return tryLowerToExtrAndIns(NumElts);
2484}
2485
2486// Check the Mask and then build SrcVec and MaskImm infos which will
2487// be used to build LoongArchISD nodes for VPERMI_W or XVPERMI_W.
2488// On success, return true. Otherwise, return false.
2491 unsigned &MaskImm) {
2492 unsigned MaskSize = Mask.size();
2493
2494 auto isValid = [&](int M, int Off) {
2495 return (M == -1) || (M >= Off && M < Off + 4);
2496 };
2497
2498 auto buildImm = [&](int MLo, int MHi, unsigned Off, unsigned I) {
2499 auto immPart = [&](int M, unsigned Off) {
2500 return (M == -1 ? 0 : (M - Off)) & 0x3;
2501 };
2502 MaskImm |= immPart(MLo, Off) << (I * 2);
2503 MaskImm |= immPart(MHi, Off) << ((I + 1) * 2);
2504 };
2505
2506 for (unsigned i = 0; i < 4; i += 2) {
2507 int MLo = Mask[i];
2508 int MHi = Mask[i + 1];
2509
2510 if (MaskSize == 8) { // Only v8i32/v8f32 need this check.
2511 int M2Lo = Mask[i + 4];
2512 int M2Hi = Mask[i + 5];
2513 if (M2Lo != MLo + 4 || M2Hi != MHi + 4)
2514 return false;
2515 }
2516
2517 if (isValid(MLo, 0) && isValid(MHi, 0)) {
2518 SrcVec.push_back(V1);
2519 buildImm(MLo, MHi, 0, i);
2520 } else if (isValid(MLo, MaskSize) && isValid(MHi, MaskSize)) {
2521 SrcVec.push_back(V2);
2522 buildImm(MLo, MHi, MaskSize, i);
2523 } else {
2524 return false;
2525 }
2526 }
2527
2528 return true;
2529}
2530
2531/// Lower VECTOR_SHUFFLE into VPERMI (if possible).
2532///
2533/// VPERMI selects two elements from each of the two vectors based on the
2534/// mask and places them in the corresponding positions of the result vector
2535/// in order. Only v4i32 and v4f32 types are allowed.
2536///
2537/// It is possible to lower into VPERMI when the mask consists of two of the
2538/// following forms concatenated:
2539/// <i, j, u, v>
2540/// <u, v, i, j>
2541/// where i,j are in [0,4) and u,v are in [4, 8).
2542/// For example:
2543/// <2, 3, 4, 5>
2544/// <5, 7, 0, 2>
2545///
2546/// When undef's appear in the mask they are treated as if they were whatever
2547/// value is necessary in order to fit the above forms.
2549 MVT VT, SDValue V1, SDValue V2,
2550 SelectionDAG &DAG,
2551 const LoongArchSubtarget &Subtarget) {
2552 if ((VT != MVT::v4i32 && VT != MVT::v4f32) ||
2553 Mask.size() != VT.getVectorNumElements())
2554 return SDValue();
2555
2557 unsigned MaskImm = 0;
2558 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2559 return SDValue();
2560
2561 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2562 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2563}
2564
2565/// Lower VECTOR_SHUFFLE into VSHUF.
2566///
2567/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2568/// adding it as an operand to the resulting VSHUF.
2570 MVT VT, SDValue V1, SDValue V2,
2571 SelectionDAG &DAG,
2572 const LoongArchSubtarget &Subtarget) {
2573
2575 for (auto M : Mask)
2576 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2577
2578 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2579 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2580
2581 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2582 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2583 // VSHF concatenates the vectors in a bitwise fashion:
2584 // <0b00, 0b01> + <0b10, 0b11> ->
2585 // 0b0100 + 0b1110 -> 0b01001110
2586 // <0b10, 0b11, 0b00, 0b01>
2587 // We must therefore swap the operands to get the correct result.
2588 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2589}
2590
2591/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2592///
2593/// This routine breaks down the specific type of 128-bit shuffle and
2594/// dispatches to the lowering routines accordingly.
2596 SDValue V1, SDValue V2, SelectionDAG &DAG,
2597 const LoongArchSubtarget &Subtarget) {
2598 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2599 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2600 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2601 "Vector type is unsupported for lsx!");
2602 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2603 "Two operands have different types!");
2604 assert(VT.getVectorNumElements() == Mask.size() &&
2605 "Unexpected mask size for shuffle!");
2606 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2607
2608 APInt KnownUndef, KnownZero;
2609 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2610 APInt Zeroable = KnownUndef | KnownZero;
2611
2612 SDValue Result;
2613 // TODO: Add more comparison patterns.
2614 if (V2.isUndef()) {
2615 if ((Result =
2616 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2617 return Result;
2618 if ((Result =
2619 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2620 return Result;
2621 if ((Result =
2622 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2623 return Result;
2624
2625 // TODO: This comment may be enabled in the future to better match the
2626 // pattern for instruction selection.
2627 /* V2 = V1; */
2628 }
2629
2630 // It is recommended not to change the pattern comparison order for better
2631 // performance.
2632 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2633 return Result;
2634 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2635 return Result;
2636 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2637 return Result;
2638 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2639 return Result;
2640 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2641 return Result;
2642 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2643 return Result;
2644 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2645 (Result =
2646 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2647 return Result;
2648 if ((Result =
2649 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2650 return Result;
2651 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2652 Zeroable)))
2653 return Result;
2654 if ((Result =
2655 lowerVECTOR_SHUFFLE_VPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2656 return Result;
2657 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2658 Zeroable)))
2659 return Result;
2660 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2661 Subtarget)))
2662 return Result;
2663 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2664 return NewShuffle;
2665 if ((Result =
2666 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2667 return Result;
2668 return SDValue();
2669}
2670
2671/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2672///
2673/// It is a XVREPLVEI when the mask is:
2674/// <x, x, x, ..., x+n, x+n, x+n, ...>
2675/// where the number of x is equal to n and n is half the length of vector.
2676///
2677/// When undef's appear in the mask they are treated as if they were whatever
2678/// value is necessary in order to fit the above form.
2679static SDValue
2681 SDValue V1, SelectionDAG &DAG,
2682 const LoongArchSubtarget &Subtarget) {
2683 int SplatIndex = -1;
2684 for (const auto &M : Mask) {
2685 if (M != -1) {
2686 SplatIndex = M;
2687 break;
2688 }
2689 }
2690
2691 if (SplatIndex == -1)
2692 return DAG.getUNDEF(VT);
2693
2694 const auto &Begin = Mask.begin();
2695 const auto &End = Mask.end();
2696 int HalfSize = Mask.size() / 2;
2697
2698 if (SplatIndex >= HalfSize)
2699 return SDValue();
2700
2701 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2702 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2703 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2704 0)) {
2705 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2706 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2707 }
2708
2709 return SDValue();
2710}
2711
2712/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2713static SDValue
2715 SDValue V1, SDValue V2, SelectionDAG &DAG,
2716 const LoongArchSubtarget &Subtarget) {
2717 // XVSHUF4I_D must be handled separately because it is different from other
2718 // types of [X]VSHUF4I instructions.
2719 if (Mask.size() == 4) {
2720 unsigned MaskImm = 0;
2721 for (int i = 1; i >= 0; --i) {
2722 int MLo = Mask[i];
2723 int MHi = Mask[i + 2];
2724 if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) ||
2725 !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7)))
2726 return SDValue();
2727 if (MHi != -1 && MLo != -1 && MHi != MLo + 2)
2728 return SDValue();
2729
2730 MaskImm <<= 2;
2731 if (MLo != -1)
2732 MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3;
2733 else if (MHi != -1)
2734 MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3;
2735 }
2736
2737 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2738 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2739 }
2740
2741 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2742}
2743
2744/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2745static SDValue
2747 SDValue V1, SDValue V2, SelectionDAG &DAG,
2748 const LoongArchSubtarget &Subtarget) {
2749 MVT GRLenVT = Subtarget.getGRLenVT();
2750 unsigned MaskSize = Mask.size();
2751 if (MaskSize != VT.getVectorNumElements())
2752 return SDValue();
2753
2754 // Consider XVPERMI_W.
2755 if (VT == MVT::v8i32 || VT == MVT::v8f32) {
2757 unsigned MaskImm = 0;
2758 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2759 return SDValue();
2760
2761 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2762 DAG.getConstant(MaskImm, DL, GRLenVT));
2763 }
2764
2765 // Consider XVPERMI_D.
2766 if (VT == MVT::v4i64 || VT == MVT::v4f64) {
2767 unsigned MaskImm = 0;
2768 for (unsigned i = 0; i < MaskSize; ++i) {
2769 if (Mask[i] == -1)
2770 continue;
2771 if (Mask[i] >= (int)MaskSize)
2772 return SDValue();
2773 MaskImm |= Mask[i] << (i * 2);
2774 }
2775
2776 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2777 DAG.getConstant(MaskImm, DL, GRLenVT));
2778 }
2779
2780 return SDValue();
2781}
2782
2783/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2785 MVT VT, SDValue V1, SelectionDAG &DAG,
2786 const LoongArchSubtarget &Subtarget) {
2787 // LoongArch LASX only have XVPERM_W.
2788 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2789 return SDValue();
2790
2791 unsigned NumElts = VT.getVectorNumElements();
2792 unsigned HalfSize = NumElts / 2;
2793 bool FrontLo = true, FrontHi = true;
2794 bool BackLo = true, BackHi = true;
2795
2796 auto inRange = [](int val, int low, int high) {
2797 return (val == -1) || (val >= low && val < high);
2798 };
2799
2800 for (unsigned i = 0; i < HalfSize; ++i) {
2801 int Fronti = Mask[i];
2802 int Backi = Mask[i + HalfSize];
2803
2804 FrontLo &= inRange(Fronti, 0, HalfSize);
2805 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2806 BackLo &= inRange(Backi, 0, HalfSize);
2807 BackHi &= inRange(Backi, HalfSize, NumElts);
2808 }
2809
2810 // If both the lower and upper 128-bit parts access only one half of the
2811 // vector (either lower or upper), avoid using xvperm.w. The latency of
2812 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2813 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2814 return SDValue();
2815
2817 MVT GRLenVT = Subtarget.getGRLenVT();
2818 for (unsigned i = 0; i < NumElts; ++i)
2819 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2820 : DAG.getConstant(Mask[i], DL, GRLenVT));
2821 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2822
2823 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2824}
2825
2826/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2828 MVT VT, SDValue V1, SDValue V2,
2829 SelectionDAG &DAG) {
2830 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2831}
2832
2833/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2835 MVT VT, SDValue V1, SDValue V2,
2836 SelectionDAG &DAG) {
2837 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2838}
2839
2840/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2842 MVT VT, SDValue V1, SDValue V2,
2843 SelectionDAG &DAG) {
2844
2845 const auto &Begin = Mask.begin();
2846 const auto &End = Mask.end();
2847 unsigned HalfSize = Mask.size() / 2;
2848 unsigned LeftSize = HalfSize / 2;
2849 SDValue OriV1 = V1, OriV2 = V2;
2850
2851 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2852 1) &&
2853 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2854 V1 = OriV1;
2855 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2856 Mask.size() + HalfSize - LeftSize, 1) &&
2857 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2858 Mask.size() + HalfSize + LeftSize, 1))
2859 V1 = OriV2;
2860 else
2861 return SDValue();
2862
2863 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2864 1) &&
2865 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2866 1))
2867 V2 = OriV1;
2868 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2869 Mask.size() + HalfSize - LeftSize, 1) &&
2870 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2871 Mask.size() + HalfSize + LeftSize, 1))
2872 V2 = OriV2;
2873 else
2874 return SDValue();
2875
2876 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2877}
2878
2879/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2881 MVT VT, SDValue V1, SDValue V2,
2882 SelectionDAG &DAG) {
2883
2884 const auto &Begin = Mask.begin();
2885 const auto &End = Mask.end();
2886 unsigned HalfSize = Mask.size() / 2;
2887 SDValue OriV1 = V1, OriV2 = V2;
2888
2889 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2890 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2891 V1 = OriV1;
2892 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2893 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2894 Mask.size() + HalfSize, 1))
2895 V1 = OriV2;
2896 else
2897 return SDValue();
2898
2899 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2900 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2901 V2 = OriV1;
2902 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2903 1) &&
2904 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2905 Mask.size() + HalfSize, 1))
2906 V2 = OriV2;
2907 else
2908 return SDValue();
2909
2910 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2911}
2912
2913/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2915 MVT VT, SDValue V1, SDValue V2,
2916 SelectionDAG &DAG) {
2917
2918 const auto &Begin = Mask.begin();
2919 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2920 const auto &Mid = Mask.begin() + Mask.size() / 2;
2921 const auto &RightMid = Mask.end() - Mask.size() / 4;
2922 const auto &End = Mask.end();
2923 unsigned HalfSize = Mask.size() / 2;
2924 SDValue OriV1 = V1, OriV2 = V2;
2925
2926 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2927 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2928 V1 = OriV1;
2929 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2930 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2931 V1 = OriV2;
2932 else
2933 return SDValue();
2934
2935 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2936 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2937 V2 = OriV1;
2938 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2939 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2940 V2 = OriV2;
2941
2942 else
2943 return SDValue();
2944
2945 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2946}
2947
2948/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2950 MVT VT, SDValue V1, SDValue V2,
2951 SelectionDAG &DAG) {
2952
2953 const auto &Begin = Mask.begin();
2954 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2955 const auto &Mid = Mask.begin() + Mask.size() / 2;
2956 const auto &RightMid = Mask.end() - Mask.size() / 4;
2957 const auto &End = Mask.end();
2958 unsigned HalfSize = Mask.size() / 2;
2959 SDValue OriV1 = V1, OriV2 = V2;
2960
2961 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2962 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2963 V1 = OriV1;
2964 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2965 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2966 2))
2967 V1 = OriV2;
2968 else
2969 return SDValue();
2970
2971 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2972 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2973 V2 = OriV1;
2974 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2975 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2976 2))
2977 V2 = OriV2;
2978 else
2979 return SDValue();
2980
2981 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2982}
2983
2984/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
2985static SDValue
2987 SDValue V1, SDValue V2, SelectionDAG &DAG,
2988 const LoongArchSubtarget &Subtarget) {
2989 int NumElts = VT.getVectorNumElements();
2990 int HalfSize = NumElts / 2;
2991 MVT EltVT = VT.getVectorElementType();
2992 MVT GRLenVT = Subtarget.getGRLenVT();
2993
2994 if ((int)Mask.size() != NumElts)
2995 return SDValue();
2996
2997 auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
2998 SmallVector<int> DiffPos;
2999 for (int i = 0; i < NumElts; ++i) {
3000 if (Mask[i] == -1)
3001 continue;
3002 if (Mask[i] != Base + i) {
3003 DiffPos.push_back(i);
3004 if (DiffPos.size() > 2)
3005 return SDValue();
3006 }
3007 }
3008
3009 // Need exactly two differing element to lower into XVEXTRINS.
3010 // If only one differing element, the element at a distance of
3011 // HalfSize from it must be undef.
3012 if (DiffPos.size() == 1) {
3013 if (DiffPos[0] < HalfSize && Mask[DiffPos[0] + HalfSize] == -1)
3014 DiffPos.push_back(DiffPos[0] + HalfSize);
3015 else if (DiffPos[0] >= HalfSize && Mask[DiffPos[0] - HalfSize] == -1)
3016 DiffPos.insert(DiffPos.begin(), DiffPos[0] - HalfSize);
3017 else
3018 return SDValue();
3019 }
3020 if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + HalfSize)
3021 return SDValue();
3022
3023 // DiffMask must be in its low or high part.
3024 int DiffMaskLo = Mask[DiffPos[0]];
3025 int DiffMaskHi = Mask[DiffPos[1]];
3026 DiffMaskLo = DiffMaskLo == -1 ? DiffMaskHi - HalfSize : DiffMaskLo;
3027 DiffMaskHi = DiffMaskHi == -1 ? DiffMaskLo + HalfSize : DiffMaskHi;
3028 if (!(DiffMaskLo >= 0 && DiffMaskLo < HalfSize) &&
3029 !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + HalfSize))
3030 return SDValue();
3031 if (!(DiffMaskHi >= HalfSize && DiffMaskHi < NumElts) &&
3032 !(DiffMaskHi >= NumElts + HalfSize && DiffMaskHi < 2 * NumElts))
3033 return SDValue();
3034 if (DiffMaskHi != DiffMaskLo + HalfSize)
3035 return SDValue();
3036
3037 // Determine source vector and source index.
3038 SDValue SrcVec = (DiffMaskLo < HalfSize) ? V1 : V2;
3039 int SrcIdxLo =
3040 (DiffMaskLo < HalfSize) ? DiffMaskLo : (DiffMaskLo - NumElts);
3041 bool IsEltFP = EltVT.isFloatingPoint();
3042
3043 // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
3044 // the patterns of XVEXTRINS in tablegen.
3045 SDValue BaseVec = (Base == 0) ? V1 : V2;
3046 SDValue EltLo =
3047 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3048 SrcVec, DAG.getConstant(SrcIdxLo, DL, GRLenVT));
3049 SDValue InsLo = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, BaseVec, EltLo,
3050 DAG.getConstant(DiffPos[0], DL, GRLenVT));
3051 SDValue EltHi =
3052 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3053 SrcVec, DAG.getConstant(SrcIdxLo + HalfSize, DL, GRLenVT));
3054 SDValue Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsLo, EltHi,
3055 DAG.getConstant(DiffPos[1], DL, GRLenVT));
3056
3057 return Result;
3058 };
3059
3060 // Try [0, n-1) insertion then [n, 2n-1) insertion.
3061 if (SDValue Result = tryLowerToExtrAndIns(0))
3062 return Result;
3063 return tryLowerToExtrAndIns(NumElts);
3064}
3065
3066/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
3067static SDValue
3069 SDValue V1, SDValue V2, SelectionDAG &DAG,
3070 const LoongArchSubtarget &Subtarget) {
3071 // LoongArch LASX only supports xvinsve0.{w/d}.
3072 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
3073 VT != MVT::v4f64)
3074 return SDValue();
3075
3076 MVT GRLenVT = Subtarget.getGRLenVT();
3077 int MaskSize = Mask.size();
3078 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
3079
3080 // Check if exactly one element of the Mask is replaced by 'Replaced', while
3081 // all other elements are either 'Base + i' or undef (-1). On success, return
3082 // the index of the replaced element. Otherwise, just return -1.
3083 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
3084 int Idx = -1;
3085 for (int i = 0; i < MaskSize; ++i) {
3086 if (Mask[i] == Base + i || Mask[i] == -1)
3087 continue;
3088 if (Mask[i] != Replaced)
3089 return -1;
3090 if (Idx == -1)
3091 Idx = i;
3092 else
3093 return -1;
3094 }
3095 return Idx;
3096 };
3097
3098 // Case 1: the lowest element of V2 replaces one element in V1.
3099 int Idx = checkReplaceOne(0, MaskSize);
3100 if (Idx != -1)
3101 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
3102 DAG.getConstant(Idx, DL, GRLenVT));
3103
3104 // Case 2: the lowest element of V1 replaces one element in V2.
3105 Idx = checkReplaceOne(MaskSize, 0);
3106 if (Idx != -1)
3107 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
3108 DAG.getConstant(Idx, DL, GRLenVT));
3109
3110 return SDValue();
3111}
3112
3113/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
3115 MVT VT, SDValue V1, SDValue V2,
3116 SelectionDAG &DAG) {
3117
3118 int MaskSize = Mask.size();
3119 int HalfSize = Mask.size() / 2;
3120 const auto &Begin = Mask.begin();
3121 const auto &Mid = Mask.begin() + HalfSize;
3122 const auto &End = Mask.end();
3123
3124 // VECTOR_SHUFFLE concatenates the vectors:
3125 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
3126 // shuffling ->
3127 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
3128 //
3129 // XVSHUF concatenates the vectors:
3130 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
3131 // shuffling ->
3132 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
3133 SmallVector<SDValue, 8> MaskAlloc;
3134 for (auto it = Begin; it < Mid; it++) {
3135 if (*it < 0) // UNDEF
3136 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3137 else if ((*it >= 0 && *it < HalfSize) ||
3138 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
3139 int M = *it < HalfSize ? *it : *it - HalfSize;
3140 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3141 } else
3142 return SDValue();
3143 }
3144 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
3145
3146 for (auto it = Mid; it < End; it++) {
3147 if (*it < 0) // UNDEF
3148 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3149 else if ((*it >= HalfSize && *it < MaskSize) ||
3150 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
3151 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
3152 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3153 } else
3154 return SDValue();
3155 }
3156 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
3157
3158 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
3159 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
3160 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
3161}
3162
3163/// Shuffle vectors by lane to generate more optimized instructions.
3164/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
3165///
3166/// Therefore, except for the following four cases, other cases are regarded
3167/// as cross-lane shuffles, where optimization is relatively limited.
3168///
3169/// - Shuffle high, low lanes of two inputs vector
3170/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
3171/// - Shuffle low, high lanes of two inputs vector
3172/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
3173/// - Shuffle low, low lanes of two inputs vector
3174/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
3175/// - Shuffle high, high lanes of two inputs vector
3176/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
3177///
3178/// The first case is the closest to LoongArch instructions and the other
3179/// cases need to be converted to it for processing.
3180///
3181/// This function will return true for the last three cases above and will
3182/// modify V1, V2 and Mask. Otherwise, return false for the first case and
3183/// cross-lane shuffle cases.
3185 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
3186 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
3187
3188 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
3189
3190 int MaskSize = Mask.size();
3191 int HalfSize = Mask.size() / 2;
3192 MVT GRLenVT = Subtarget.getGRLenVT();
3193
3194 HalfMaskType preMask = None, postMask = None;
3195
3196 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3197 return M < 0 || (M >= 0 && M < HalfSize) ||
3198 (M >= MaskSize && M < MaskSize + HalfSize);
3199 }))
3200 preMask = HighLaneTy;
3201 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3202 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3203 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3204 }))
3205 preMask = LowLaneTy;
3206
3207 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3208 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3209 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3210 }))
3211 postMask = LowLaneTy;
3212 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3213 return M < 0 || (M >= 0 && M < HalfSize) ||
3214 (M >= MaskSize && M < MaskSize + HalfSize);
3215 }))
3216 postMask = HighLaneTy;
3217
3218 // The pre-half of mask is high lane type, and the post-half of mask
3219 // is low lane type, which is closest to the LoongArch instructions.
3220 //
3221 // Note: In the LoongArch architecture, the high lane of mask corresponds
3222 // to the lower 128-bit of vector register, and the low lane of mask
3223 // corresponds the higher 128-bit of vector register.
3224 if (preMask == HighLaneTy && postMask == LowLaneTy) {
3225 return false;
3226 }
3227 if (preMask == LowLaneTy && postMask == HighLaneTy) {
3228 V1 = DAG.getBitcast(MVT::v4i64, V1);
3229 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3230 DAG.getConstant(0b01001110, DL, GRLenVT));
3231 V1 = DAG.getBitcast(VT, V1);
3232
3233 if (!V2.isUndef()) {
3234 V2 = DAG.getBitcast(MVT::v4i64, V2);
3235 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3236 DAG.getConstant(0b01001110, DL, GRLenVT));
3237 V2 = DAG.getBitcast(VT, V2);
3238 }
3239
3240 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3241 *it = *it < 0 ? *it : *it - HalfSize;
3242 }
3243 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3244 *it = *it < 0 ? *it : *it + HalfSize;
3245 }
3246 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
3247 V1 = DAG.getBitcast(MVT::v4i64, V1);
3248 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3249 DAG.getConstant(0b11101110, DL, GRLenVT));
3250 V1 = DAG.getBitcast(VT, V1);
3251
3252 if (!V2.isUndef()) {
3253 V2 = DAG.getBitcast(MVT::v4i64, V2);
3254 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3255 DAG.getConstant(0b11101110, DL, GRLenVT));
3256 V2 = DAG.getBitcast(VT, V2);
3257 }
3258
3259 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3260 *it = *it < 0 ? *it : *it - HalfSize;
3261 }
3262 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
3263 V1 = DAG.getBitcast(MVT::v4i64, V1);
3264 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3265 DAG.getConstant(0b01000100, DL, GRLenVT));
3266 V1 = DAG.getBitcast(VT, V1);
3267
3268 if (!V2.isUndef()) {
3269 V2 = DAG.getBitcast(MVT::v4i64, V2);
3270 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3271 DAG.getConstant(0b01000100, DL, GRLenVT));
3272 V2 = DAG.getBitcast(VT, V2);
3273 }
3274
3275 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3276 *it = *it < 0 ? *it : *it + HalfSize;
3277 }
3278 } else { // cross-lane
3279 return false;
3280 }
3281
3282 return true;
3283}
3284
3285/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
3286/// Only for 256-bit vector.
3287///
3288/// For example:
3289/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
3290/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
3291/// is lowerded to:
3292/// (XVPERMI $xr2, $xr0, 78)
3293/// (XVSHUF $xr1, $xr2, $xr0)
3294/// (XVORI $xr0, $xr1, 0)
3296 ArrayRef<int> Mask,
3297 MVT VT, SDValue V1,
3298 SDValue V2,
3299 SelectionDAG &DAG) {
3300 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
3301 int Size = Mask.size();
3302 int LaneSize = Size / 2;
3303
3304 bool LaneCrossing[2] = {false, false};
3305 for (int i = 0; i < Size; ++i)
3306 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
3307 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
3308
3309 // Ensure that all lanes ared involved.
3310 if (!LaneCrossing[0] && !LaneCrossing[1])
3311 return SDValue();
3312
3313 SmallVector<int> InLaneMask;
3314 InLaneMask.assign(Mask.begin(), Mask.end());
3315 for (int i = 0; i < Size; ++i) {
3316 int &M = InLaneMask[i];
3317 if (M < 0)
3318 continue;
3319 if (((M % Size) / LaneSize) != (i / LaneSize))
3320 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
3321 }
3322
3323 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
3324 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
3325 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
3326 Flipped = DAG.getBitcast(VT, Flipped);
3327 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
3328}
3329
3330/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
3331///
3332/// This routine breaks down the specific type of 256-bit shuffle and
3333/// dispatches to the lowering routines accordingly.
3335 SDValue V1, SDValue V2, SelectionDAG &DAG,
3336 const LoongArchSubtarget &Subtarget) {
3337 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
3338 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
3339 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
3340 "Vector type is unsupported for lasx!");
3341 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
3342 "Two operands have different types!");
3343 assert(VT.getVectorNumElements() == Mask.size() &&
3344 "Unexpected mask size for shuffle!");
3345 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
3346 assert(Mask.size() >= 4 && "Mask size is less than 4.");
3347
3348 APInt KnownUndef, KnownZero;
3349 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
3350 APInt Zeroable = KnownUndef | KnownZero;
3351
3352 SDValue Result;
3353 // TODO: Add more comparison patterns.
3354 if (V2.isUndef()) {
3355 if ((Result =
3356 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
3357 return Result;
3358 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
3359 Subtarget)))
3360 return Result;
3361 // Try to widen vectors to gain more optimization opportunities.
3362 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
3363 return NewShuffle;
3364 if ((Result =
3365 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3366 return Result;
3367 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
3368 return Result;
3369 if ((Result =
3370 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
3371 return Result;
3372
3373 // TODO: This comment may be enabled in the future to better match the
3374 // pattern for instruction selection.
3375 /* V2 = V1; */
3376 }
3377
3378 // It is recommended not to change the pattern comparison order for better
3379 // performance.
3380 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
3381 return Result;
3382 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
3383 return Result;
3384 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
3385 return Result;
3386 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
3387 return Result;
3388 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
3389 return Result;
3390 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
3391 return Result;
3392 if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
3393 (Result =
3394 lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3395 return Result;
3396 if ((Result =
3397 lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3398 return Result;
3399 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
3400 Zeroable)))
3401 return Result;
3402 if ((Result =
3403 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3404 return Result;
3405 if ((Result =
3406 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3407 return Result;
3408 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
3409 Subtarget)))
3410 return Result;
3411
3412 // canonicalize non cross-lane shuffle vector
3413 SmallVector<int> NewMask(Mask);
3414 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
3415 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
3416
3417 // FIXME: Handling the remaining cases earlier can degrade performance
3418 // in some situations. Further analysis is required to enable more
3419 // effective optimizations.
3420 if (V2.isUndef()) {
3421 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
3422 V1, V2, DAG)))
3423 return Result;
3424 }
3425
3426 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
3427 return NewShuffle;
3428 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
3429 return Result;
3430
3431 return SDValue();
3432}
3433
3434SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3435 SelectionDAG &DAG) const {
3436 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
3437 ArrayRef<int> OrigMask = SVOp->getMask();
3438 SDValue V1 = Op.getOperand(0);
3439 SDValue V2 = Op.getOperand(1);
3440 MVT VT = Op.getSimpleValueType();
3441 int NumElements = VT.getVectorNumElements();
3442 SDLoc DL(Op);
3443
3444 bool V1IsUndef = V1.isUndef();
3445 bool V2IsUndef = V2.isUndef();
3446 if (V1IsUndef && V2IsUndef)
3447 return DAG.getUNDEF(VT);
3448
3449 // When we create a shuffle node we put the UNDEF node to second operand,
3450 // but in some cases the first operand may be transformed to UNDEF.
3451 // In this case we should just commute the node.
3452 if (V1IsUndef)
3453 return DAG.getCommutedVectorShuffle(*SVOp);
3454
3455 // Check for non-undef masks pointing at an undef vector and make the masks
3456 // undef as well. This makes it easier to match the shuffle based solely on
3457 // the mask.
3458 if (V2IsUndef &&
3459 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
3460 SmallVector<int, 8> NewMask(OrigMask);
3461 for (int &M : NewMask)
3462 if (M >= NumElements)
3463 M = -1;
3464 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
3465 }
3466
3467 // Check for illegal shuffle mask element index values.
3468 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3469 (void)MaskUpperLimit;
3470 assert(llvm::all_of(OrigMask,
3471 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3472 "Out of bounds shuffle index");
3473
3474 // For each vector width, delegate to a specialized lowering routine.
3475 if (VT.is128BitVector())
3476 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3477
3478 if (VT.is256BitVector())
3479 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3480
3481 return SDValue();
3482}
3483
3484SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3485 SelectionDAG &DAG) const {
3486 // Custom lower to ensure the libcall return is passed in an FPR on hard
3487 // float ABIs.
3488 SDLoc DL(Op);
3489 MakeLibCallOptions CallOptions;
3490 SDValue Op0 = Op.getOperand(0);
3491 SDValue Chain = SDValue();
3492 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
3493 SDValue Res;
3494 std::tie(Res, Chain) =
3495 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
3496 if (Subtarget.is64Bit())
3497 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3498 return DAG.getBitcast(MVT::i32, Res);
3499}
3500
3501SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3502 SelectionDAG &DAG) const {
3503 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3504 // float ABIs.
3505 SDLoc DL(Op);
3506 MakeLibCallOptions CallOptions;
3507 SDValue Op0 = Op.getOperand(0);
3508 SDValue Chain = SDValue();
3509 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3510 DL, MVT::f32, Op0)
3511 : DAG.getBitcast(MVT::f32, Op0);
3512 SDValue Res;
3513 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
3514 CallOptions, DL, Chain);
3515 return Res;
3516}
3517
3518SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3519 SelectionDAG &DAG) const {
3520 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3521 SDLoc DL(Op);
3522 MakeLibCallOptions CallOptions;
3523 RTLIB::Libcall LC =
3524 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
3525 SDValue Res =
3526 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
3527 if (Subtarget.is64Bit())
3528 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3529 return DAG.getBitcast(MVT::i32, Res);
3530}
3531
3532SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3533 SelectionDAG &DAG) const {
3534 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3535 MVT VT = Op.getSimpleValueType();
3536 SDLoc DL(Op);
3537 Op = DAG.getNode(
3538 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
3539 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3540 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3541 DL, MVT::f32, Op)
3542 : DAG.getBitcast(MVT::f32, Op);
3543 if (VT != MVT::f32)
3544 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3545 return Res;
3546}
3547
3548// Lower BUILD_VECTOR as broadcast load (if possible).
3549// For example:
3550// %a = load i8, ptr %ptr
3551// %b = build_vector %a, %a, %a, %a
3552// is lowered to :
3553// (VLDREPL_B $a0, 0)
3555 const SDLoc &DL,
3556 SelectionDAG &DAG) {
3557 MVT VT = BVOp->getSimpleValueType(0);
3558 int NumOps = BVOp->getNumOperands();
3559
3560 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3561 "Unsupported vector type for broadcast.");
3562
3563 SDValue IdentitySrc;
3564 bool IsIdeneity = true;
3565
3566 for (int i = 0; i != NumOps; i++) {
3567 SDValue Op = BVOp->getOperand(i);
3568 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3569 IsIdeneity = false;
3570 break;
3571 }
3572 IdentitySrc = BVOp->getOperand(0);
3573 }
3574
3575 // make sure that this load is valid and only has one user.
3576 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3577 return SDValue();
3578
3579 auto *LN = cast<LoadSDNode>(IdentitySrc);
3580 auto ExtType = LN->getExtensionType();
3581
3582 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3583 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3584 // Indexed loads and stores are not supported on LoongArch.
3585 assert(LN->isUnindexed() && "Unexpected indexed load.");
3586
3587 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3588 // The offset operand of unindexed load is always undefined, so there is
3589 // no need to pass it to VLDREPL.
3590 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3591 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3592 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3593 return BCast;
3594 }
3595 return SDValue();
3596}
3597
3598// Sequentially insert elements from Ops into Vector, from low to high indices.
3599// Note: Ops can have fewer elements than Vector.
3601 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3602 EVT ResTy) {
3603 assert(Ops.size() <= ResTy.getVectorNumElements());
3604
3605 SDValue Op0 = Ops[0];
3606 if (!Op0.isUndef())
3607 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3608 for (unsigned i = 1; i < Ops.size(); ++i) {
3609 SDValue Opi = Ops[i];
3610 if (Opi.isUndef())
3611 continue;
3612 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3613 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3614 }
3615}
3616
3617// Build a ResTy subvector from Node, taking NumElts elements starting at index
3618// 'first'.
3620 SelectionDAG &DAG, SDLoc DL,
3621 const LoongArchSubtarget &Subtarget,
3622 EVT ResTy, unsigned first) {
3623 unsigned NumElts = ResTy.getVectorNumElements();
3624
3625 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3626
3627 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3628 Node->op_begin() + first + NumElts);
3629 SDValue Vector = DAG.getUNDEF(ResTy);
3630 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3631 return Vector;
3632}
3633
3634SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3635 SelectionDAG &DAG) const {
3636 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3637 MVT VT = Node->getSimpleValueType(0);
3638 EVT ResTy = Op->getValueType(0);
3639 unsigned NumElts = ResTy.getVectorNumElements();
3640 SDLoc DL(Op);
3641 APInt SplatValue, SplatUndef;
3642 unsigned SplatBitSize;
3643 bool HasAnyUndefs;
3644 bool IsConstant = false;
3645 bool UseSameConstant = true;
3646 SDValue ConstantValue;
3647 bool Is128Vec = ResTy.is128BitVector();
3648 bool Is256Vec = ResTy.is256BitVector();
3649
3650 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3651 (!Subtarget.hasExtLASX() || !Is256Vec))
3652 return SDValue();
3653
3654 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3655 return Result;
3656
3657 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3658 /*MinSplatBits=*/8) &&
3659 SplatBitSize <= 64) {
3660 // We can only cope with 8, 16, 32, or 64-bit elements.
3661 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3662 SplatBitSize != 64)
3663 return SDValue();
3664
3665 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3666 // We can only handle 64-bit elements that are within
3667 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3668 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3669 if (!SplatValue.isSignedIntN(10) &&
3670 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3671 return SDValue();
3672 if ((Is128Vec && ResTy == MVT::v4i32) ||
3673 (Is256Vec && ResTy == MVT::v8i32))
3674 return Op;
3675 }
3676
3677 EVT ViaVecTy;
3678
3679 switch (SplatBitSize) {
3680 default:
3681 return SDValue();
3682 case 8:
3683 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3684 break;
3685 case 16:
3686 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3687 break;
3688 case 32:
3689 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3690 break;
3691 case 64:
3692 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3693 break;
3694 }
3695
3696 // SelectionDAG::getConstant will promote SplatValue appropriately.
3697 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3698
3699 // Bitcast to the type we originally wanted.
3700 if (ViaVecTy != ResTy)
3701 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3702
3703 return Result;
3704 }
3705
3706 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3707 return Op;
3708
3709 for (unsigned i = 0; i < NumElts; ++i) {
3710 SDValue Opi = Node->getOperand(i);
3711 if (isIntOrFPConstant(Opi)) {
3712 IsConstant = true;
3713 if (!ConstantValue.getNode())
3714 ConstantValue = Opi;
3715 else if (ConstantValue != Opi)
3716 UseSameConstant = false;
3717 }
3718 }
3719
3720 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3721 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3722 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3723 for (unsigned i = 0; i < NumElts; ++i) {
3724 SDValue Opi = Node->getOperand(i);
3725 if (!isIntOrFPConstant(Opi))
3726 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3727 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3728 }
3729 return Result;
3730 }
3731
3732 if (!IsConstant) {
3733 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3734 // the sub-sequence of the vector and then broadcast the sub-sequence.
3735 //
3736 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3737 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3738 // generates worse code in some cases. This could be further optimized
3739 // with more consideration.
3741 BitVector UndefElements;
3742 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3743 UndefElements.count() == 0) {
3744 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3745 // because the high part can be simply treated as undef.
3746 SDValue Vector = DAG.getUNDEF(ResTy);
3747 EVT FillTy = Is256Vec
3749 : ResTy;
3750 SDValue FillVec =
3751 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3752
3753 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3754
3755 unsigned SeqLen = Sequence.size();
3756 unsigned SplatLen = NumElts / SeqLen;
3757 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3758 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3759
3760 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3761 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3762 if (SplatEltTy == MVT::i128)
3763 SplatTy = MVT::v4i64;
3764
3765 SDValue SplatVec;
3766 SDValue SrcVec = DAG.getBitcast(
3767 SplatTy,
3768 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3769 if (Is256Vec) {
3770 SplatVec =
3771 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3772 : LoongArchISD::XVREPLVE0,
3773 DL, SplatTy, SrcVec);
3774 } else {
3775 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3776 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3777 }
3778
3779 return DAG.getBitcast(ResTy, SplatVec);
3780 }
3781
3782 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3783 // using memory operations is much lower.
3784 //
3785 // For 256-bit vectors, normally split into two halves and concatenate.
3786 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3787 // one non-undef element, skip spliting to avoid a worse result.
3788 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3789 ResTy == MVT::v4f64) {
3790 unsigned NonUndefCount = 0;
3791 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3792 if (!Node->getOperand(i).isUndef()) {
3793 ++NonUndefCount;
3794 if (NonUndefCount > 1)
3795 break;
3796 }
3797 }
3798 if (NonUndefCount == 1)
3799 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3800 }
3801
3802 EVT VecTy =
3803 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3804 SDValue Vector =
3805 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3806
3807 if (Is128Vec)
3808 return Vector;
3809
3810 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3811 VecTy, NumElts / 2);
3812
3813 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3814 }
3815
3816 return SDValue();
3817}
3818
3819SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3820 SelectionDAG &DAG) const {
3821 SDLoc DL(Op);
3822 MVT ResVT = Op.getSimpleValueType();
3823 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3824
3825 unsigned NumOperands = Op.getNumOperands();
3826 unsigned NumFreezeUndef = 0;
3827 unsigned NumZero = 0;
3828 unsigned NumNonZero = 0;
3829 unsigned NonZeros = 0;
3830 SmallSet<SDValue, 4> Undefs;
3831 for (unsigned i = 0; i != NumOperands; ++i) {
3832 SDValue SubVec = Op.getOperand(i);
3833 if (SubVec.isUndef())
3834 continue;
3835 if (ISD::isFreezeUndef(SubVec.getNode())) {
3836 // If the freeze(undef) has multiple uses then we must fold to zero.
3837 if (SubVec.hasOneUse()) {
3838 ++NumFreezeUndef;
3839 } else {
3840 ++NumZero;
3841 Undefs.insert(SubVec);
3842 }
3843 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3844 ++NumZero;
3845 else {
3846 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3847 NonZeros |= 1 << i;
3848 ++NumNonZero;
3849 }
3850 }
3851
3852 // If we have more than 2 non-zeros, build each half separately.
3853 if (NumNonZero > 2) {
3854 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3855 ArrayRef<SDUse> Ops = Op->ops();
3856 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3857 Ops.slice(0, NumOperands / 2));
3858 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3859 Ops.slice(NumOperands / 2));
3860 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3861 }
3862
3863 // Otherwise, build it up through insert_subvectors.
3864 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3865 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3866 : DAG.getUNDEF(ResVT));
3867
3868 // Replace Undef operands with ZeroVector.
3869 for (SDValue U : Undefs)
3870 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3871
3872 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3873 unsigned NumSubElems = SubVT.getVectorNumElements();
3874 for (unsigned i = 0; i != NumOperands; ++i) {
3875 if ((NonZeros & (1 << i)) == 0)
3876 continue;
3877
3878 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3879 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3880 }
3881
3882 return Vec;
3883}
3884
3885SDValue
3886LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3887 SelectionDAG &DAG) const {
3888 MVT EltVT = Op.getSimpleValueType();
3889 SDValue Vec = Op->getOperand(0);
3890 EVT VecTy = Vec->getValueType(0);
3891 SDValue Idx = Op->getOperand(1);
3892 SDLoc DL(Op);
3893 MVT GRLenVT = Subtarget.getGRLenVT();
3894
3895 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3896
3897 if (isa<ConstantSDNode>(Idx))
3898 return Op;
3899
3900 switch (VecTy.getSimpleVT().SimpleTy) {
3901 default:
3902 llvm_unreachable("Unexpected type");
3903 case MVT::v32i8:
3904 case MVT::v16i16:
3905 case MVT::v4i64:
3906 case MVT::v4f64: {
3907 // Extract the high half subvector and place it to the low half of a new
3908 // vector. It doesn't matter what the high half of the new vector is.
3909 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3910 SDValue VecHi =
3911 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3912 SDValue TmpVec =
3913 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3914 VecHi, DAG.getConstant(0, DL, GRLenVT));
3915
3916 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3917 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3918 // desired element.
3919 SDValue IdxCp =
3920 Subtarget.is64Bit()
3921 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3922 : DAG.getBitcast(MVT::f32, Idx);
3923 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3924 SDValue MaskVec =
3925 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3926 SDValue ResVec =
3927 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3928
3929 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3930 DAG.getConstant(0, DL, GRLenVT));
3931 }
3932 case MVT::v8i32:
3933 case MVT::v8f32: {
3934 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3935 SDValue SplatValue =
3936 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3937
3938 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3939 DAG.getConstant(0, DL, GRLenVT));
3940 }
3941 }
3942}
3943
3944SDValue
3945LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3946 SelectionDAG &DAG) const {
3947 MVT VT = Op.getSimpleValueType();
3948 MVT EltVT = VT.getVectorElementType();
3949 unsigned NumElts = VT.getVectorNumElements();
3950 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3951 SDLoc DL(Op);
3952 SDValue Op0 = Op.getOperand(0);
3953 SDValue Op1 = Op.getOperand(1);
3954 SDValue Op2 = Op.getOperand(2);
3955
3956 if (isa<ConstantSDNode>(Op2))
3957 return Op;
3958
3959 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3960 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3961
3962 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3963 return SDValue();
3964
3965 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3966 SmallVector<SDValue, 32> RawIndices;
3967 SDValue SplatIdx;
3968 SDValue Indices;
3969
3970 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3971 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3972 for (unsigned i = 0; i < NumElts; ++i) {
3973 RawIndices.push_back(Op2);
3974 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3975 }
3976 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3977 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3978
3979 RawIndices.clear();
3980 for (unsigned i = 0; i < NumElts; ++i) {
3981 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3982 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3983 }
3984 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3985 Indices = DAG.getBitcast(IdxVTy, Indices);
3986 } else {
3987 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3988
3989 for (unsigned i = 0; i < NumElts; ++i)
3990 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3991 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3992 }
3993
3994 // insert vec, elt, idx
3995 // =>
3996 // select (splatidx == {0,1,2...}) ? splatelt : vec
3997 SDValue SelectCC =
3998 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3999 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
4000}
4001
4002SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4003 SelectionDAG &DAG) const {
4004 SDLoc DL(Op);
4005 SyncScope::ID FenceSSID =
4006 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4007
4008 // singlethread fences only synchronize with signal handlers on the same
4009 // thread and thus only need to preserve instruction order, not actually
4010 // enforce memory ordering.
4011 if (FenceSSID == SyncScope::SingleThread)
4012 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4013 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4014
4015 return Op;
4016}
4017
4018SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
4019 SelectionDAG &DAG) const {
4020
4021 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
4022 DAG.getContext()->emitError(
4023 "On LA64, only 64-bit registers can be written.");
4024 return Op.getOperand(0);
4025 }
4026
4027 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
4028 DAG.getContext()->emitError(
4029 "On LA32, only 32-bit registers can be written.");
4030 return Op.getOperand(0);
4031 }
4032
4033 return Op;
4034}
4035
4036SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
4037 SelectionDAG &DAG) const {
4038 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
4039 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
4040 "be a constant integer");
4041 return SDValue();
4042 }
4043
4044 MachineFunction &MF = DAG.getMachineFunction();
4046 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
4047 EVT VT = Op.getValueType();
4048 SDLoc DL(Op);
4049 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
4050 unsigned Depth = Op.getConstantOperandVal(0);
4051 int GRLenInBytes = Subtarget.getGRLen() / 8;
4052
4053 while (Depth--) {
4054 int Offset = -(GRLenInBytes * 2);
4055 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
4056 DAG.getSignedConstant(Offset, DL, VT));
4057 FrameAddr =
4058 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
4059 }
4060 return FrameAddr;
4061}
4062
4063SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
4064 SelectionDAG &DAG) const {
4065 // Currently only support lowering return address for current frame.
4066 if (Op.getConstantOperandVal(0) != 0) {
4067 DAG.getContext()->emitError(
4068 "return address can only be determined for the current frame");
4069 return SDValue();
4070 }
4071
4072 MachineFunction &MF = DAG.getMachineFunction();
4074 MVT GRLenVT = Subtarget.getGRLenVT();
4075
4076 // Return the value of the return address register, marking it an implicit
4077 // live-in.
4078 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
4079 getRegClassFor(GRLenVT));
4080 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
4081}
4082
4083SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
4084 SelectionDAG &DAG) const {
4085 MachineFunction &MF = DAG.getMachineFunction();
4086 auto Size = Subtarget.getGRLen() / 8;
4087 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
4088 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4089}
4090
4091SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
4092 SelectionDAG &DAG) const {
4093 MachineFunction &MF = DAG.getMachineFunction();
4094 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
4095
4096 SDLoc DL(Op);
4097 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4099
4100 // vastart just stores the address of the VarArgsFrameIndex slot into the
4101 // memory location argument.
4102 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4103 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
4104 MachinePointerInfo(SV));
4105}
4106
4107SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
4108 SelectionDAG &DAG) const {
4109 SDLoc DL(Op);
4110 SDValue Op0 = Op.getOperand(0);
4111 EVT VT = Op.getValueType();
4112 EVT Op0VT = Op0.getValueType();
4113
4114 if ((DAG.SignBitIsZero(Op0) || Op->getFlags().hasNonNeg()) &&
4117 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, Op0);
4118
4119 if (Subtarget.hasExtLSX() && Op0VT == MVT::i64 &&
4120 (VT == MVT::f32 || VT == MVT::f64)) {
4121 Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2i64, Op0);
4122 SDValue Conv = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::v2f64, Op0);
4123 Conv = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Conv,
4124 DAG.getIntPtrConstant(0, DL));
4125 if (VT == MVT::f32)
4126 Conv = DAG.getFPExtendOrRound(Conv, DL, VT);
4127 return Conv;
4128 }
4129
4130 if (!Subtarget.is64Bit() || !Subtarget.hasBasicF() || Subtarget.hasBasicD())
4131 return SDValue();
4132
4133 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4134 !Subtarget.hasBasicD() && "unexpected target features");
4135
4136 if (Op0->getOpcode() == ISD::AND) {
4137 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
4138 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
4139 return Op;
4140 }
4141
4142 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
4143 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
4144 Op0.getConstantOperandVal(2) == UINT64_C(0))
4145 return Op;
4146
4147 if (Op0.getOpcode() == ISD::AssertZext &&
4148 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
4149 return Op;
4150
4151 EVT OpVT = Op0.getValueType();
4152 EVT RetVT = Op.getValueType();
4153 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
4154 MakeLibCallOptions CallOptions;
4155 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4156 SDValue Chain = SDValue();
4158 std::tie(Result, Chain) =
4159 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4160 return Result;
4161}
4162
4163SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
4164 SelectionDAG &DAG) const {
4165 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4166 !Subtarget.hasBasicD() && "unexpected target features");
4167
4168 SDLoc DL(Op);
4169 SDValue Op0 = Op.getOperand(0);
4170
4171 if ((Op0.getOpcode() == ISD::AssertSext ||
4173 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
4174 return Op;
4175
4176 EVT OpVT = Op0.getValueType();
4177 EVT RetVT = Op.getValueType();
4178 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
4179 MakeLibCallOptions CallOptions;
4180 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4181 SDValue Chain = SDValue();
4183 std::tie(Result, Chain) =
4184 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4185 return Result;
4186}
4187
4188SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
4189 SelectionDAG &DAG) const {
4190
4191 SDLoc DL(Op);
4192 EVT VT = Op.getValueType();
4193 SDValue Op0 = Op.getOperand(0);
4194 EVT Op0VT = Op0.getValueType();
4195
4196 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
4197 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
4198 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4199 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
4200 }
4201 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
4202 SDValue Lo, Hi;
4203 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
4204 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
4205 }
4206 return Op;
4207}
4208
4209SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
4210 SelectionDAG &DAG) const {
4211
4212 SDLoc DL(Op);
4213 SDValue Op0 = Op.getOperand(0);
4214
4215 if (Op0.getValueType() == MVT::f16)
4216 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
4217
4218 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
4219 !Subtarget.hasBasicD()) {
4220 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
4221 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
4222 }
4223
4224 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
4225 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
4226 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
4227}
4228
4229SDValue LoongArchTargetLowering::lowerFP_TO_UINT(SDValue Op,
4230 SelectionDAG &DAG) const {
4231 if (!Subtarget.hasExtLSX())
4232 return SDValue();
4233
4234 SDLoc DL(Op);
4235 SDValue Src = Op.getOperand(0);
4236 EVT VT = Op.getValueType();
4237 EVT SrcVT = Src.getValueType();
4238
4239 if (VT != MVT::i64)
4240 return SDValue();
4241
4242 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
4243 return SDValue();
4244
4245 if (SrcVT == MVT::f32)
4246 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Src);
4247 Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, Src);
4248 SDValue Conv = DAG.getNode(ISD::FP_TO_UINT, DL, MVT::v2i64, Src);
4249 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Conv,
4250 DAG.getIntPtrConstant(0, DL));
4251}
4252
4254 SelectionDAG &DAG, unsigned Flags) {
4255 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
4256}
4257
4259 SelectionDAG &DAG, unsigned Flags) {
4260 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
4261 Flags);
4262}
4263
4265 SelectionDAG &DAG, unsigned Flags) {
4266 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
4267 N->getOffset(), Flags);
4268}
4269
4271 SelectionDAG &DAG, unsigned Flags) {
4272 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
4273}
4274
4275template <class NodeTy>
4276SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4278 bool IsLocal) const {
4279 SDLoc DL(N);
4280 EVT Ty = getPointerTy(DAG.getDataLayout());
4281 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4282 SDValue Load;
4283
4284 switch (M) {
4285 default:
4286 report_fatal_error("Unsupported code model");
4287
4288 case CodeModel::Large: {
4289 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4290
4291 // This is not actually used, but is necessary for successfully matching
4292 // the PseudoLA_*_LARGE nodes.
4293 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4294 if (IsLocal) {
4295 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
4296 // eventually becomes the desired 5-insn code sequence.
4297 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
4298 Tmp, Addr),
4299 0);
4300 } else {
4301 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
4302 // eventually becomes the desired 5-insn code sequence.
4303 Load = SDValue(
4304 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
4305 0);
4306 }
4307 break;
4308 }
4309
4310 case CodeModel::Small:
4311 case CodeModel::Medium:
4312 if (IsLocal) {
4313 // This generates the pattern (PseudoLA_PCREL sym), which
4314 //
4315 // for la32r expands to:
4316 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4317 //
4318 // for la32s and la64 expands to:
4319 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
4320 Load = SDValue(
4321 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
4322 } else {
4323 // This generates the pattern (PseudoLA_GOT sym), which
4324 //
4325 // for la32r expands to:
4326 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4327 //
4328 // for la32s and la64 expands to:
4329 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
4330 Load =
4331 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
4332 }
4333 }
4334
4335 if (!IsLocal) {
4336 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4337 MachineFunction &MF = DAG.getMachineFunction();
4338 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4342 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4343 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
4344 }
4345
4346 return Load;
4347}
4348
4349SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
4350 SelectionDAG &DAG) const {
4351 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
4352 DAG.getTarget().getCodeModel());
4353}
4354
4355SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
4356 SelectionDAG &DAG) const {
4357 return getAddr(cast<JumpTableSDNode>(Op), DAG,
4358 DAG.getTarget().getCodeModel());
4359}
4360
4361SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
4362 SelectionDAG &DAG) const {
4363 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
4364 DAG.getTarget().getCodeModel());
4365}
4366
4367SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
4368 SelectionDAG &DAG) const {
4369 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4370 assert(N->getOffset() == 0 && "unexpected offset in global node");
4371 auto CM = DAG.getTarget().getCodeModel();
4372 const GlobalValue *GV = N->getGlobal();
4373
4374 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
4375 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
4376 CM = *GCM;
4377 }
4378
4379 return getAddr(N, DAG, CM, GV->isDSOLocal());
4380}
4381
4382SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
4383 SelectionDAG &DAG,
4384 unsigned Opc, bool UseGOT,
4385 bool Large) const {
4386 SDLoc DL(N);
4387 EVT Ty = getPointerTy(DAG.getDataLayout());
4388 MVT GRLenVT = Subtarget.getGRLenVT();
4389
4390 // This is not actually used, but is necessary for successfully matching the
4391 // PseudoLA_*_LARGE nodes.
4392 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4393 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4394
4395 // Only IE needs an extra argument for large code model.
4396 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
4397 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4398 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4399
4400 // If it is LE for normal/medium code model, the add tp operation will occur
4401 // during the pseudo-instruction expansion.
4402 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
4403 return Offset;
4404
4405 if (UseGOT) {
4406 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4407 MachineFunction &MF = DAG.getMachineFunction();
4408 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4412 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4413 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
4414 }
4415
4416 // Add the thread pointer.
4417 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
4418 DAG.getRegister(LoongArch::R2, GRLenVT));
4419}
4420
4421SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
4422 SelectionDAG &DAG,
4423 unsigned Opc,
4424 bool Large) const {
4425 SDLoc DL(N);
4426 EVT Ty = getPointerTy(DAG.getDataLayout());
4427 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
4428
4429 // This is not actually used, but is necessary for successfully matching the
4430 // PseudoLA_*_LARGE nodes.
4431 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4432
4433 // Use a PC-relative addressing mode to access the dynamic GOT address.
4434 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4435 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4436 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4437
4438 // Prepare argument list to generate call.
4440 Args.emplace_back(Load, CallTy);
4441
4442 // Setup call to __tls_get_addr.
4443 TargetLowering::CallLoweringInfo CLI(DAG);
4444 CLI.setDebugLoc(DL)
4445 .setChain(DAG.getEntryNode())
4446 .setLibCallee(CallingConv::C, CallTy,
4447 DAG.getExternalSymbol("__tls_get_addr", Ty),
4448 std::move(Args));
4449
4450 return LowerCallTo(CLI).first;
4451}
4452
4453SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
4454 SelectionDAG &DAG, unsigned Opc,
4455 bool Large) const {
4456 SDLoc DL(N);
4457 EVT Ty = getPointerTy(DAG.getDataLayout());
4458 const GlobalValue *GV = N->getGlobal();
4459
4460 // This is not actually used, but is necessary for successfully matching the
4461 // PseudoLA_*_LARGE nodes.
4462 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4463
4464 // Use a PC-relative addressing mode to access the global dynamic GOT address.
4465 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
4466 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
4467 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4468 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4469}
4470
4471SDValue
4472LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
4473 SelectionDAG &DAG) const {
4476 report_fatal_error("In GHC calling convention TLS is not supported");
4477
4478 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
4479 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
4480
4481 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4482 assert(N->getOffset() == 0 && "unexpected offset in global node");
4483
4484 if (DAG.getTarget().useEmulatedTLS())
4485 reportFatalUsageError("the emulated TLS is prohibited");
4486
4487 bool IsDesc = DAG.getTarget().useTLSDESC();
4488
4489 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
4491 // In this model, application code calls the dynamic linker function
4492 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
4493 // runtime.
4494 if (!IsDesc)
4495 return getDynamicTLSAddr(N, DAG,
4496 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
4497 : LoongArch::PseudoLA_TLS_GD,
4498 Large);
4499 break;
4501 // Same as GeneralDynamic, except for assembly modifiers and relocation
4502 // records.
4503 if (!IsDesc)
4504 return getDynamicTLSAddr(N, DAG,
4505 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
4506 : LoongArch::PseudoLA_TLS_LD,
4507 Large);
4508 break;
4510 // This model uses the GOT to resolve TLS offsets.
4511 return getStaticTLSAddr(N, DAG,
4512 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
4513 : LoongArch::PseudoLA_TLS_IE,
4514 /*UseGOT=*/true, Large);
4516 // This model is used when static linking as the TLS offsets are resolved
4517 // during program linking.
4518 //
4519 // This node doesn't need an extra argument for the large code model.
4520 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
4521 /*UseGOT=*/false, Large);
4522 }
4523
4524 return getTLSDescAddr(N, DAG,
4525 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4526 : LoongArch::PseudoLA_TLS_DESC,
4527 Large);
4528}
4529
4530template <unsigned N>
4532 SelectionDAG &DAG, bool IsSigned = false) {
4533 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
4534 // Check the ImmArg.
4535 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4536 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4537 DAG.getContext()->emitError(Op->getOperationName(0) +
4538 ": argument out of range.");
4539 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
4540 }
4541 return SDValue();
4542}
4543
4544SDValue
4545LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4546 SelectionDAG &DAG) const {
4547 switch (Op.getConstantOperandVal(0)) {
4548 default:
4549 return SDValue(); // Don't custom lower most intrinsics.
4550 case Intrinsic::thread_pointer: {
4551 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4552 return DAG.getRegister(LoongArch::R2, PtrVT);
4553 }
4554 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4555 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4556 case Intrinsic::loongarch_lsx_vreplvei_d:
4557 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4558 return checkIntrinsicImmArg<1>(Op, 2, DAG);
4559 case Intrinsic::loongarch_lsx_vreplvei_w:
4560 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4561 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4562 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4563 case Intrinsic::loongarch_lasx_xvpickve_d:
4564 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4565 return checkIntrinsicImmArg<2>(Op, 2, DAG);
4566 case Intrinsic::loongarch_lasx_xvinsve0_d:
4567 return checkIntrinsicImmArg<2>(Op, 3, DAG);
4568 case Intrinsic::loongarch_lsx_vsat_b:
4569 case Intrinsic::loongarch_lsx_vsat_bu:
4570 case Intrinsic::loongarch_lsx_vrotri_b:
4571 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4572 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4573 case Intrinsic::loongarch_lsx_vsrlri_b:
4574 case Intrinsic::loongarch_lsx_vsrari_b:
4575 case Intrinsic::loongarch_lsx_vreplvei_h:
4576 case Intrinsic::loongarch_lasx_xvsat_b:
4577 case Intrinsic::loongarch_lasx_xvsat_bu:
4578 case Intrinsic::loongarch_lasx_xvrotri_b:
4579 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4580 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4581 case Intrinsic::loongarch_lasx_xvsrlri_b:
4582 case Intrinsic::loongarch_lasx_xvsrari_b:
4583 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4584 case Intrinsic::loongarch_lasx_xvpickve_w:
4585 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4586 return checkIntrinsicImmArg<3>(Op, 2, DAG);
4587 case Intrinsic::loongarch_lasx_xvinsve0_w:
4588 return checkIntrinsicImmArg<3>(Op, 3, DAG);
4589 case Intrinsic::loongarch_lsx_vsat_h:
4590 case Intrinsic::loongarch_lsx_vsat_hu:
4591 case Intrinsic::loongarch_lsx_vrotri_h:
4592 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4593 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4594 case Intrinsic::loongarch_lsx_vsrlri_h:
4595 case Intrinsic::loongarch_lsx_vsrari_h:
4596 case Intrinsic::loongarch_lsx_vreplvei_b:
4597 case Intrinsic::loongarch_lasx_xvsat_h:
4598 case Intrinsic::loongarch_lasx_xvsat_hu:
4599 case Intrinsic::loongarch_lasx_xvrotri_h:
4600 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4601 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4602 case Intrinsic::loongarch_lasx_xvsrlri_h:
4603 case Intrinsic::loongarch_lasx_xvsrari_h:
4604 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4605 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4606 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4607 case Intrinsic::loongarch_lsx_vsrani_b_h:
4608 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4609 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4610 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4611 case Intrinsic::loongarch_lsx_vssrani_b_h:
4612 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4613 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4614 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4615 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4616 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4617 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4618 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4619 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4620 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4621 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4622 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4623 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4624 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4625 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4626 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4627 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4628 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4629 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4630 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4631 case Intrinsic::loongarch_lsx_vsat_w:
4632 case Intrinsic::loongarch_lsx_vsat_wu:
4633 case Intrinsic::loongarch_lsx_vrotri_w:
4634 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4635 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4636 case Intrinsic::loongarch_lsx_vsrlri_w:
4637 case Intrinsic::loongarch_lsx_vsrari_w:
4638 case Intrinsic::loongarch_lsx_vslei_bu:
4639 case Intrinsic::loongarch_lsx_vslei_hu:
4640 case Intrinsic::loongarch_lsx_vslei_wu:
4641 case Intrinsic::loongarch_lsx_vslei_du:
4642 case Intrinsic::loongarch_lsx_vslti_bu:
4643 case Intrinsic::loongarch_lsx_vslti_hu:
4644 case Intrinsic::loongarch_lsx_vslti_wu:
4645 case Intrinsic::loongarch_lsx_vslti_du:
4646 case Intrinsic::loongarch_lsx_vbsll_v:
4647 case Intrinsic::loongarch_lsx_vbsrl_v:
4648 case Intrinsic::loongarch_lasx_xvsat_w:
4649 case Intrinsic::loongarch_lasx_xvsat_wu:
4650 case Intrinsic::loongarch_lasx_xvrotri_w:
4651 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4652 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4653 case Intrinsic::loongarch_lasx_xvsrlri_w:
4654 case Intrinsic::loongarch_lasx_xvsrari_w:
4655 case Intrinsic::loongarch_lasx_xvslei_bu:
4656 case Intrinsic::loongarch_lasx_xvslei_hu:
4657 case Intrinsic::loongarch_lasx_xvslei_wu:
4658 case Intrinsic::loongarch_lasx_xvslei_du:
4659 case Intrinsic::loongarch_lasx_xvslti_bu:
4660 case Intrinsic::loongarch_lasx_xvslti_hu:
4661 case Intrinsic::loongarch_lasx_xvslti_wu:
4662 case Intrinsic::loongarch_lasx_xvslti_du:
4663 case Intrinsic::loongarch_lasx_xvbsll_v:
4664 case Intrinsic::loongarch_lasx_xvbsrl_v:
4665 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4666 case Intrinsic::loongarch_lsx_vseqi_b:
4667 case Intrinsic::loongarch_lsx_vseqi_h:
4668 case Intrinsic::loongarch_lsx_vseqi_w:
4669 case Intrinsic::loongarch_lsx_vseqi_d:
4670 case Intrinsic::loongarch_lsx_vslei_b:
4671 case Intrinsic::loongarch_lsx_vslei_h:
4672 case Intrinsic::loongarch_lsx_vslei_w:
4673 case Intrinsic::loongarch_lsx_vslei_d:
4674 case Intrinsic::loongarch_lsx_vslti_b:
4675 case Intrinsic::loongarch_lsx_vslti_h:
4676 case Intrinsic::loongarch_lsx_vslti_w:
4677 case Intrinsic::loongarch_lsx_vslti_d:
4678 case Intrinsic::loongarch_lasx_xvseqi_b:
4679 case Intrinsic::loongarch_lasx_xvseqi_h:
4680 case Intrinsic::loongarch_lasx_xvseqi_w:
4681 case Intrinsic::loongarch_lasx_xvseqi_d:
4682 case Intrinsic::loongarch_lasx_xvslei_b:
4683 case Intrinsic::loongarch_lasx_xvslei_h:
4684 case Intrinsic::loongarch_lasx_xvslei_w:
4685 case Intrinsic::loongarch_lasx_xvslei_d:
4686 case Intrinsic::loongarch_lasx_xvslti_b:
4687 case Intrinsic::loongarch_lasx_xvslti_h:
4688 case Intrinsic::loongarch_lasx_xvslti_w:
4689 case Intrinsic::loongarch_lasx_xvslti_d:
4690 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4691 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4692 case Intrinsic::loongarch_lsx_vsrani_h_w:
4693 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4694 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4695 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4696 case Intrinsic::loongarch_lsx_vssrani_h_w:
4697 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4698 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4699 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4700 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4701 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4702 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4703 case Intrinsic::loongarch_lsx_vfrstpi_b:
4704 case Intrinsic::loongarch_lsx_vfrstpi_h:
4705 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4706 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4707 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4708 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4709 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4710 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4711 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4712 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4713 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4714 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4715 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4716 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4717 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4718 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4719 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4720 case Intrinsic::loongarch_lsx_vsat_d:
4721 case Intrinsic::loongarch_lsx_vsat_du:
4722 case Intrinsic::loongarch_lsx_vrotri_d:
4723 case Intrinsic::loongarch_lsx_vsrlri_d:
4724 case Intrinsic::loongarch_lsx_vsrari_d:
4725 case Intrinsic::loongarch_lasx_xvsat_d:
4726 case Intrinsic::loongarch_lasx_xvsat_du:
4727 case Intrinsic::loongarch_lasx_xvrotri_d:
4728 case Intrinsic::loongarch_lasx_xvsrlri_d:
4729 case Intrinsic::loongarch_lasx_xvsrari_d:
4730 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4731 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4732 case Intrinsic::loongarch_lsx_vsrani_w_d:
4733 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4734 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4735 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4736 case Intrinsic::loongarch_lsx_vssrani_w_d:
4737 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4738 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4739 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4740 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4741 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4742 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4743 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4744 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4745 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4746 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4747 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4748 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4749 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4750 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4751 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4752 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4753 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4754 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4755 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4756 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4757 case Intrinsic::loongarch_lsx_vsrani_d_q:
4758 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4759 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4760 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4761 case Intrinsic::loongarch_lsx_vssrani_d_q:
4762 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4763 case Intrinsic::loongarch_lsx_vssrani_du_q:
4764 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4765 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4766 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4767 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4768 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4769 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4770 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4771 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4772 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4773 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4774 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4775 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4776 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4777 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4778 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4779 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4780 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4781 case Intrinsic::loongarch_lsx_vnori_b:
4782 case Intrinsic::loongarch_lsx_vshuf4i_b:
4783 case Intrinsic::loongarch_lsx_vshuf4i_h:
4784 case Intrinsic::loongarch_lsx_vshuf4i_w:
4785 case Intrinsic::loongarch_lasx_xvnori_b:
4786 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4787 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4788 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4789 case Intrinsic::loongarch_lasx_xvpermi_d:
4790 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4791 case Intrinsic::loongarch_lsx_vshuf4i_d:
4792 case Intrinsic::loongarch_lsx_vpermi_w:
4793 case Intrinsic::loongarch_lsx_vbitseli_b:
4794 case Intrinsic::loongarch_lsx_vextrins_b:
4795 case Intrinsic::loongarch_lsx_vextrins_h:
4796 case Intrinsic::loongarch_lsx_vextrins_w:
4797 case Intrinsic::loongarch_lsx_vextrins_d:
4798 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4799 case Intrinsic::loongarch_lasx_xvpermi_w:
4800 case Intrinsic::loongarch_lasx_xvpermi_q:
4801 case Intrinsic::loongarch_lasx_xvbitseli_b:
4802 case Intrinsic::loongarch_lasx_xvextrins_b:
4803 case Intrinsic::loongarch_lasx_xvextrins_h:
4804 case Intrinsic::loongarch_lasx_xvextrins_w:
4805 case Intrinsic::loongarch_lasx_xvextrins_d:
4806 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4807 case Intrinsic::loongarch_lsx_vrepli_b:
4808 case Intrinsic::loongarch_lsx_vrepli_h:
4809 case Intrinsic::loongarch_lsx_vrepli_w:
4810 case Intrinsic::loongarch_lsx_vrepli_d:
4811 case Intrinsic::loongarch_lasx_xvrepli_b:
4812 case Intrinsic::loongarch_lasx_xvrepli_h:
4813 case Intrinsic::loongarch_lasx_xvrepli_w:
4814 case Intrinsic::loongarch_lasx_xvrepli_d:
4815 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4816 case Intrinsic::loongarch_lsx_vldi:
4817 case Intrinsic::loongarch_lasx_xvldi:
4818 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4819 }
4820}
4821
4822// Helper function that emits error message for intrinsics with chain and return
4823// merge values of a UNDEF and the chain.
4825 StringRef ErrorMsg,
4826 SelectionDAG &DAG) {
4827 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4828 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4829 SDLoc(Op));
4830}
4831
4832SDValue
4833LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4834 SelectionDAG &DAG) const {
4835 SDLoc DL(Op);
4836 MVT GRLenVT = Subtarget.getGRLenVT();
4837 EVT VT = Op.getValueType();
4838 SDValue Chain = Op.getOperand(0);
4839 const StringRef ErrorMsgOOR = "argument out of range";
4840 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4841 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4842
4843 switch (Op.getConstantOperandVal(1)) {
4844 default:
4845 return Op;
4846 case Intrinsic::loongarch_crc_w_b_w:
4847 case Intrinsic::loongarch_crc_w_h_w:
4848 case Intrinsic::loongarch_crc_w_w_w:
4849 case Intrinsic::loongarch_crc_w_d_w:
4850 case Intrinsic::loongarch_crcc_w_b_w:
4851 case Intrinsic::loongarch_crcc_w_h_w:
4852 case Intrinsic::loongarch_crcc_w_w_w:
4853 case Intrinsic::loongarch_crcc_w_d_w:
4854 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4855 case Intrinsic::loongarch_csrrd_w:
4856 case Intrinsic::loongarch_csrrd_d: {
4857 unsigned Imm = Op.getConstantOperandVal(2);
4858 return !isUInt<14>(Imm)
4859 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4860 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4861 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4862 }
4863 case Intrinsic::loongarch_csrwr_w:
4864 case Intrinsic::loongarch_csrwr_d: {
4865 unsigned Imm = Op.getConstantOperandVal(3);
4866 return !isUInt<14>(Imm)
4867 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4868 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4869 {Chain, Op.getOperand(2),
4870 DAG.getConstant(Imm, DL, GRLenVT)});
4871 }
4872 case Intrinsic::loongarch_csrxchg_w:
4873 case Intrinsic::loongarch_csrxchg_d: {
4874 unsigned Imm = Op.getConstantOperandVal(4);
4875 return !isUInt<14>(Imm)
4876 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4877 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4878 {Chain, Op.getOperand(2), Op.getOperand(3),
4879 DAG.getConstant(Imm, DL, GRLenVT)});
4880 }
4881 case Intrinsic::loongarch_iocsrrd_d: {
4882 return DAG.getNode(
4883 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4884 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4885 }
4886#define IOCSRRD_CASE(NAME, NODE) \
4887 case Intrinsic::loongarch_##NAME: { \
4888 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4889 {Chain, Op.getOperand(2)}); \
4890 }
4891 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4892 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4893 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4894#undef IOCSRRD_CASE
4895 case Intrinsic::loongarch_cpucfg: {
4896 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4897 {Chain, Op.getOperand(2)});
4898 }
4899 case Intrinsic::loongarch_lddir_d: {
4900 unsigned Imm = Op.getConstantOperandVal(3);
4901 return !isUInt<8>(Imm)
4902 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4903 : Op;
4904 }
4905 case Intrinsic::loongarch_movfcsr2gr: {
4906 if (!Subtarget.hasBasicF())
4907 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4908 unsigned Imm = Op.getConstantOperandVal(2);
4909 return !isUInt<2>(Imm)
4910 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4911 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4912 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4913 }
4914 case Intrinsic::loongarch_lsx_vld:
4915 case Intrinsic::loongarch_lsx_vldrepl_b:
4916 case Intrinsic::loongarch_lasx_xvld:
4917 case Intrinsic::loongarch_lasx_xvldrepl_b:
4918 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4919 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4920 : SDValue();
4921 case Intrinsic::loongarch_lsx_vldrepl_h:
4922 case Intrinsic::loongarch_lasx_xvldrepl_h:
4923 return !isShiftedInt<11, 1>(
4924 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4926 Op, "argument out of range or not a multiple of 2", DAG)
4927 : SDValue();
4928 case Intrinsic::loongarch_lsx_vldrepl_w:
4929 case Intrinsic::loongarch_lasx_xvldrepl_w:
4930 return !isShiftedInt<10, 2>(
4931 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4933 Op, "argument out of range or not a multiple of 4", DAG)
4934 : SDValue();
4935 case Intrinsic::loongarch_lsx_vldrepl_d:
4936 case Intrinsic::loongarch_lasx_xvldrepl_d:
4937 return !isShiftedInt<9, 3>(
4938 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4940 Op, "argument out of range or not a multiple of 8", DAG)
4941 : SDValue();
4942 }
4943}
4944
4945// Helper function that emits error message for intrinsics with void return
4946// value and return the chain.
4948 SelectionDAG &DAG) {
4949
4950 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4951 return Op.getOperand(0);
4952}
4953
4954SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4955 SelectionDAG &DAG) const {
4956 SDLoc DL(Op);
4957 MVT GRLenVT = Subtarget.getGRLenVT();
4958 SDValue Chain = Op.getOperand(0);
4959 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4960 SDValue Op2 = Op.getOperand(2);
4961 const StringRef ErrorMsgOOR = "argument out of range";
4962 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4963 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4964 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4965
4966 switch (IntrinsicEnum) {
4967 default:
4968 // TODO: Add more Intrinsics.
4969 return SDValue();
4970 case Intrinsic::loongarch_cacop_d:
4971 case Intrinsic::loongarch_cacop_w: {
4972 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4973 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4974 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4975 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4976 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4977 unsigned Imm1 = Op2->getAsZExtVal();
4978 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4979 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4980 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4981 return Op;
4982 }
4983 case Intrinsic::loongarch_dbar: {
4984 unsigned Imm = Op2->getAsZExtVal();
4985 return !isUInt<15>(Imm)
4986 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4987 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4988 DAG.getConstant(Imm, DL, GRLenVT));
4989 }
4990 case Intrinsic::loongarch_ibar: {
4991 unsigned Imm = Op2->getAsZExtVal();
4992 return !isUInt<15>(Imm)
4993 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4994 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4995 DAG.getConstant(Imm, DL, GRLenVT));
4996 }
4997 case Intrinsic::loongarch_break: {
4998 unsigned Imm = Op2->getAsZExtVal();
4999 return !isUInt<15>(Imm)
5000 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5001 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
5002 DAG.getConstant(Imm, DL, GRLenVT));
5003 }
5004 case Intrinsic::loongarch_movgr2fcsr: {
5005 if (!Subtarget.hasBasicF())
5006 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
5007 unsigned Imm = Op2->getAsZExtVal();
5008 return !isUInt<2>(Imm)
5009 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5010 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
5011 DAG.getConstant(Imm, DL, GRLenVT),
5012 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
5013 Op.getOperand(3)));
5014 }
5015 case Intrinsic::loongarch_syscall: {
5016 unsigned Imm = Op2->getAsZExtVal();
5017 return !isUInt<15>(Imm)
5018 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5019 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
5020 DAG.getConstant(Imm, DL, GRLenVT));
5021 }
5022#define IOCSRWR_CASE(NAME, NODE) \
5023 case Intrinsic::loongarch_##NAME: { \
5024 SDValue Op3 = Op.getOperand(3); \
5025 return Subtarget.is64Bit() \
5026 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
5027 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5028 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
5029 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
5030 Op3); \
5031 }
5032 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
5033 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
5034 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
5035#undef IOCSRWR_CASE
5036 case Intrinsic::loongarch_iocsrwr_d: {
5037 return !Subtarget.is64Bit()
5038 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
5039 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
5040 Op2,
5041 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
5042 Op.getOperand(3)));
5043 }
5044#define ASRT_LE_GT_CASE(NAME) \
5045 case Intrinsic::loongarch_##NAME: { \
5046 return !Subtarget.is64Bit() \
5047 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
5048 : Op; \
5049 }
5050 ASRT_LE_GT_CASE(asrtle_d)
5051 ASRT_LE_GT_CASE(asrtgt_d)
5052#undef ASRT_LE_GT_CASE
5053 case Intrinsic::loongarch_ldpte_d: {
5054 unsigned Imm = Op.getConstantOperandVal(3);
5055 return !Subtarget.is64Bit()
5056 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
5057 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5058 : Op;
5059 }
5060 case Intrinsic::loongarch_lsx_vst:
5061 case Intrinsic::loongarch_lasx_xvst:
5062 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
5063 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5064 : SDValue();
5065 case Intrinsic::loongarch_lasx_xvstelm_b:
5066 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5067 !isUInt<5>(Op.getConstantOperandVal(5)))
5068 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5069 : SDValue();
5070 case Intrinsic::loongarch_lsx_vstelm_b:
5071 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5072 !isUInt<4>(Op.getConstantOperandVal(5)))
5073 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5074 : SDValue();
5075 case Intrinsic::loongarch_lasx_xvstelm_h:
5076 return (!isShiftedInt<8, 1>(
5077 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5078 !isUInt<4>(Op.getConstantOperandVal(5)))
5080 Op, "argument out of range or not a multiple of 2", DAG)
5081 : SDValue();
5082 case Intrinsic::loongarch_lsx_vstelm_h:
5083 return (!isShiftedInt<8, 1>(
5084 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5085 !isUInt<3>(Op.getConstantOperandVal(5)))
5087 Op, "argument out of range or not a multiple of 2", DAG)
5088 : SDValue();
5089 case Intrinsic::loongarch_lasx_xvstelm_w:
5090 return (!isShiftedInt<8, 2>(
5091 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5092 !isUInt<3>(Op.getConstantOperandVal(5)))
5094 Op, "argument out of range or not a multiple of 4", DAG)
5095 : SDValue();
5096 case Intrinsic::loongarch_lsx_vstelm_w:
5097 return (!isShiftedInt<8, 2>(
5098 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5099 !isUInt<2>(Op.getConstantOperandVal(5)))
5101 Op, "argument out of range or not a multiple of 4", DAG)
5102 : SDValue();
5103 case Intrinsic::loongarch_lasx_xvstelm_d:
5104 return (!isShiftedInt<8, 3>(
5105 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5106 !isUInt<2>(Op.getConstantOperandVal(5)))
5108 Op, "argument out of range or not a multiple of 8", DAG)
5109 : SDValue();
5110 case Intrinsic::loongarch_lsx_vstelm_d:
5111 return (!isShiftedInt<8, 3>(
5112 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5113 !isUInt<1>(Op.getConstantOperandVal(5)))
5115 Op, "argument out of range or not a multiple of 8", DAG)
5116 : SDValue();
5117 }
5118}
5119
5120SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
5121 SelectionDAG &DAG) const {
5122 SDLoc DL(Op);
5123 SDValue Lo = Op.getOperand(0);
5124 SDValue Hi = Op.getOperand(1);
5125 SDValue Shamt = Op.getOperand(2);
5126 EVT VT = Lo.getValueType();
5127
5128 // if Shamt-GRLen < 0: // Shamt < GRLen
5129 // Lo = Lo << Shamt
5130 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
5131 // else:
5132 // Lo = 0
5133 // Hi = Lo << (Shamt-GRLen)
5134
5135 SDValue Zero = DAG.getConstant(0, DL, VT);
5136 SDValue One = DAG.getConstant(1, DL, VT);
5137 SDValue MinusGRLen =
5138 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5139 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5140 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5141 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5142
5143 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
5144 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
5145 SDValue ShiftRightLo =
5146 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
5147 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
5148 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
5149 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
5150
5151 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5152
5153 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
5154 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5155
5156 SDValue Parts[2] = {Lo, Hi};
5157 return DAG.getMergeValues(Parts, DL);
5158}
5159
5160SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
5161 SelectionDAG &DAG,
5162 bool IsSRA) const {
5163 SDLoc DL(Op);
5164 SDValue Lo = Op.getOperand(0);
5165 SDValue Hi = Op.getOperand(1);
5166 SDValue Shamt = Op.getOperand(2);
5167 EVT VT = Lo.getValueType();
5168
5169 // SRA expansion:
5170 // if Shamt-GRLen < 0: // Shamt < GRLen
5171 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5172 // Hi = Hi >>s Shamt
5173 // else:
5174 // Lo = Hi >>s (Shamt-GRLen);
5175 // Hi = Hi >>s (GRLen-1)
5176 //
5177 // SRL expansion:
5178 // if Shamt-GRLen < 0: // Shamt < GRLen
5179 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5180 // Hi = Hi >>u Shamt
5181 // else:
5182 // Lo = Hi >>u (Shamt-GRLen);
5183 // Hi = 0;
5184
5185 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
5186
5187 SDValue Zero = DAG.getConstant(0, DL, VT);
5188 SDValue One = DAG.getConstant(1, DL, VT);
5189 SDValue MinusGRLen =
5190 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5191 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5192 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5193 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5194
5195 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
5196 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
5197 SDValue ShiftLeftHi =
5198 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
5199 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
5200 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
5201 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
5202 SDValue HiFalse =
5203 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
5204
5205 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5206
5207 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
5208 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5209
5210 SDValue Parts[2] = {Lo, Hi};
5211 return DAG.getMergeValues(Parts, DL);
5212}
5213
5214// Returns the opcode of the target-specific SDNode that implements the 32-bit
5215// form of the given Opcode.
5216static unsigned getLoongArchWOpcode(unsigned Opcode) {
5217 switch (Opcode) {
5218 default:
5219 llvm_unreachable("Unexpected opcode");
5220 case ISD::SDIV:
5221 return LoongArchISD::DIV_W;
5222 case ISD::UDIV:
5223 return LoongArchISD::DIV_WU;
5224 case ISD::SREM:
5225 return LoongArchISD::MOD_W;
5226 case ISD::UREM:
5227 return LoongArchISD::MOD_WU;
5228 case ISD::SHL:
5229 return LoongArchISD::SLL_W;
5230 case ISD::SRA:
5231 return LoongArchISD::SRA_W;
5232 case ISD::SRL:
5233 return LoongArchISD::SRL_W;
5234 case ISD::ROTL:
5235 case ISD::ROTR:
5236 return LoongArchISD::ROTR_W;
5237 case ISD::CTTZ:
5238 return LoongArchISD::CTZ_W;
5239 case ISD::CTLZ:
5240 return LoongArchISD::CLZ_W;
5241 }
5242}
5243
5244// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
5245// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
5246// otherwise be promoted to i64, making it difficult to select the
5247// SLL_W/.../*W later one because the fact the operation was originally of
5248// type i8/i16/i32 is lost.
5250 unsigned ExtOpc = ISD::ANY_EXTEND) {
5251 SDLoc DL(N);
5252 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
5253 SDValue NewOp0, NewRes;
5254
5255 switch (NumOp) {
5256 default:
5257 llvm_unreachable("Unexpected NumOp");
5258 case 1: {
5259 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5260 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
5261 break;
5262 }
5263 case 2: {
5264 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5265 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
5266 if (N->getOpcode() == ISD::ROTL) {
5267 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
5268 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
5269 }
5270 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
5271 break;
5272 }
5273 // TODO:Handle more NumOp.
5274 }
5275
5276 // ReplaceNodeResults requires we maintain the same type for the return
5277 // value.
5278 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
5279}
5280
5281// Converts the given 32-bit operation to a i64 operation with signed extension
5282// semantic to reduce the signed extension instructions.
5284 SDLoc DL(N);
5285 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5286 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5287 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
5288 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
5289 DAG.getValueType(MVT::i32));
5290 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
5291}
5292
5293// Helper function that emits error message for intrinsics with/without chain
5294// and return a UNDEF or and the chain as the results.
5297 StringRef ErrorMsg, bool WithChain = true) {
5298 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
5299 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
5300 if (!WithChain)
5301 return;
5302 Results.push_back(N->getOperand(0));
5303}
5304
5305template <unsigned N>
5306static void
5308 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
5309 unsigned ResOp) {
5310 const StringRef ErrorMsgOOR = "argument out of range";
5311 unsigned Imm = Node->getConstantOperandVal(2);
5312 if (!isUInt<N>(Imm)) {
5314 /*WithChain=*/false);
5315 return;
5316 }
5317 SDLoc DL(Node);
5318 SDValue Vec = Node->getOperand(1);
5319
5320 SDValue PickElt =
5321 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
5322 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
5324 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
5325 PickElt.getValue(0)));
5326}
5327
5330 SelectionDAG &DAG,
5331 const LoongArchSubtarget &Subtarget,
5332 unsigned ResOp) {
5333 SDLoc DL(N);
5334 SDValue Vec = N->getOperand(1);
5335
5336 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
5337 Results.push_back(
5338 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
5339}
5340
5341static void
5343 SelectionDAG &DAG,
5344 const LoongArchSubtarget &Subtarget) {
5345 switch (N->getConstantOperandVal(0)) {
5346 default:
5347 llvm_unreachable("Unexpected Intrinsic.");
5348 case Intrinsic::loongarch_lsx_vpickve2gr_b:
5349 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5350 LoongArchISD::VPICK_SEXT_ELT);
5351 break;
5352 case Intrinsic::loongarch_lsx_vpickve2gr_h:
5353 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
5354 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5355 LoongArchISD::VPICK_SEXT_ELT);
5356 break;
5357 case Intrinsic::loongarch_lsx_vpickve2gr_w:
5358 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5359 LoongArchISD::VPICK_SEXT_ELT);
5360 break;
5361 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
5362 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5363 LoongArchISD::VPICK_ZEXT_ELT);
5364 break;
5365 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
5366 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
5367 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5368 LoongArchISD::VPICK_ZEXT_ELT);
5369 break;
5370 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
5371 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5372 LoongArchISD::VPICK_ZEXT_ELT);
5373 break;
5374 case Intrinsic::loongarch_lsx_bz_b:
5375 case Intrinsic::loongarch_lsx_bz_h:
5376 case Intrinsic::loongarch_lsx_bz_w:
5377 case Intrinsic::loongarch_lsx_bz_d:
5378 case Intrinsic::loongarch_lasx_xbz_b:
5379 case Intrinsic::loongarch_lasx_xbz_h:
5380 case Intrinsic::loongarch_lasx_xbz_w:
5381 case Intrinsic::loongarch_lasx_xbz_d:
5382 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5383 LoongArchISD::VALL_ZERO);
5384 break;
5385 case Intrinsic::loongarch_lsx_bz_v:
5386 case Intrinsic::loongarch_lasx_xbz_v:
5387 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5388 LoongArchISD::VANY_ZERO);
5389 break;
5390 case Intrinsic::loongarch_lsx_bnz_b:
5391 case Intrinsic::loongarch_lsx_bnz_h:
5392 case Intrinsic::loongarch_lsx_bnz_w:
5393 case Intrinsic::loongarch_lsx_bnz_d:
5394 case Intrinsic::loongarch_lasx_xbnz_b:
5395 case Intrinsic::loongarch_lasx_xbnz_h:
5396 case Intrinsic::loongarch_lasx_xbnz_w:
5397 case Intrinsic::loongarch_lasx_xbnz_d:
5398 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5399 LoongArchISD::VALL_NONZERO);
5400 break;
5401 case Intrinsic::loongarch_lsx_bnz_v:
5402 case Intrinsic::loongarch_lasx_xbnz_v:
5403 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5404 LoongArchISD::VANY_NONZERO);
5405 break;
5406 }
5407}
5408
5411 SelectionDAG &DAG) {
5412 assert(N->getValueType(0) == MVT::i128 &&
5413 "AtomicCmpSwap on types less than 128 should be legal");
5414 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
5415
5416 unsigned Opcode;
5417 switch (MemOp->getMergedOrdering()) {
5421 Opcode = LoongArch::PseudoCmpXchg128Acquire;
5422 break;
5425 Opcode = LoongArch::PseudoCmpXchg128;
5426 break;
5427 default:
5428 llvm_unreachable("Unexpected ordering!");
5429 }
5430
5431 SDLoc DL(N);
5432 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
5433 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
5434 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
5435 NewVal.first, NewVal.second, N->getOperand(0)};
5436
5437 SDNode *CmpSwap = DAG.getMachineNode(
5438 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
5439 Ops);
5440 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
5441 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
5442 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
5443 Results.push_back(SDValue(CmpSwap, 3));
5444}
5445
5448 SDLoc DL(N);
5449 EVT VT = N->getValueType(0);
5450 switch (N->getOpcode()) {
5451 default:
5452 llvm_unreachable("Don't know how to legalize this operation");
5453 case ISD::ADD:
5454 case ISD::SUB:
5455 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5456 "Unexpected custom legalisation");
5457 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
5458 break;
5459 case ISD::SDIV:
5460 case ISD::UDIV:
5461 case ISD::SREM:
5462 case ISD::UREM:
5463 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5464 "Unexpected custom legalisation");
5465 Results.push_back(customLegalizeToWOp(N, DAG, 2,
5466 Subtarget.hasDiv32() && VT == MVT::i32
5468 : ISD::SIGN_EXTEND));
5469 break;
5470 case ISD::SHL:
5471 case ISD::SRA:
5472 case ISD::SRL:
5473 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5474 "Unexpected custom legalisation");
5475 if (N->getOperand(1).getOpcode() != ISD::Constant) {
5476 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5477 break;
5478 }
5479 break;
5480 case ISD::ROTL:
5481 case ISD::ROTR:
5482 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5483 "Unexpected custom legalisation");
5484 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5485 break;
5486 case ISD::LOAD: {
5487 // Use an f64 load and a scalar_to_vector for v2f32 loads. This avoids
5488 // scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
5489 // cast since type legalization will try to use an i64 load.
5490 MVT VT = N->getSimpleValueType(0);
5491 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5492 "Unexpected custom legalisation");
5494 "Unexpected type action!");
5495 if (!ISD::isNON_EXTLoad(N))
5496 return;
5497 auto *Ld = cast<LoadSDNode>(N);
5498 SDValue Res = DAG.getLoad(MVT::f64, DL, Ld->getChain(), Ld->getBasePtr(),
5499 Ld->getPointerInfo(), Ld->getBaseAlign(),
5500 Ld->getMemOperand()->getFlags());
5501 SDValue Chain = Res.getValue(1);
5502 MVT VecVT = MVT::getVectorVT(MVT::f64, 2);
5503 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Res);
5504 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
5505 Res = DAG.getBitcast(WideVT, Res);
5506 Results.push_back(Res);
5507 Results.push_back(Chain);
5508 break;
5509 }
5510 case ISD::FP_TO_SINT: {
5511 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5512 "Unexpected custom legalisation");
5513 SDValue Src = N->getOperand(0);
5514 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
5515 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
5517 if (!isTypeLegal(Src.getValueType()))
5518 return;
5519 if (Src.getValueType() == MVT::f16)
5520 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
5521 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
5522 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
5523 return;
5524 }
5525 // If the FP type needs to be softened, emit a library call using the 'si'
5526 // version. If we left it to default legalization we'd end up with 'di'.
5527 RTLIB::Libcall LC;
5528 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
5529 MakeLibCallOptions CallOptions;
5530 EVT OpVT = Src.getValueType();
5531 CallOptions.setTypeListBeforeSoften(OpVT, VT);
5532 SDValue Chain = SDValue();
5533 SDValue Result;
5534 std::tie(Result, Chain) =
5535 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
5536 Results.push_back(Result);
5537 break;
5538 }
5539 case ISD::BITCAST: {
5540 SDValue Src = N->getOperand(0);
5541 EVT SrcVT = Src.getValueType();
5542 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5543 Subtarget.hasBasicF()) {
5544 SDValue Dst =
5545 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
5546 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
5547 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5548 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
5549 DAG.getVTList(MVT::i32, MVT::i32), Src);
5550 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5551 NewReg.getValue(0), NewReg.getValue(1));
5552 Results.push_back(RetReg);
5553 }
5554 break;
5555 }
5556 case ISD::FP_TO_UINT: {
5557 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5558 "Unexpected custom legalisation");
5559 auto &TLI = DAG.getTargetLoweringInfo();
5560 SDValue Tmp1, Tmp2;
5561 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
5562 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
5563 break;
5564 }
5565 case ISD::FP_ROUND: {
5566 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5567 "Unexpected custom legalisation");
5568 // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
5569 // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
5570 // a target-specific LoongArchISD::VFCVT to optimize it.
5571 SDValue Op0 = N->getOperand(0);
5572 EVT OpVT = Op0.getValueType();
5573 if (OpVT == MVT::v2f64) {
5574 SDValue Undef = DAG.getUNDEF(OpVT);
5575 SDValue Dst =
5576 DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Op0);
5577 Results.push_back(Dst);
5578 }
5579 break;
5580 }
5581 case ISD::BSWAP: {
5582 SDValue Src = N->getOperand(0);
5583 assert((VT == MVT::i16 || VT == MVT::i32) &&
5584 "Unexpected custom legalization");
5585 MVT GRLenVT = Subtarget.getGRLenVT();
5586 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5587 SDValue Tmp;
5588 switch (VT.getSizeInBits()) {
5589 default:
5590 llvm_unreachable("Unexpected operand width");
5591 case 16:
5592 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
5593 break;
5594 case 32:
5595 // Only LA64 will get to here due to the size mismatch between VT and
5596 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5597 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
5598 break;
5599 }
5600 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5601 break;
5602 }
5603 case ISD::BITREVERSE: {
5604 SDValue Src = N->getOperand(0);
5605 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5606 "Unexpected custom legalization");
5607 MVT GRLenVT = Subtarget.getGRLenVT();
5608 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5609 SDValue Tmp;
5610 switch (VT.getSizeInBits()) {
5611 default:
5612 llvm_unreachable("Unexpected operand width");
5613 case 8:
5614 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
5615 break;
5616 case 32:
5617 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
5618 break;
5619 }
5620 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5621 break;
5622 }
5623 case ISD::CTLZ:
5624 case ISD::CTTZ: {
5625 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5626 "Unexpected custom legalisation");
5627 Results.push_back(customLegalizeToWOp(N, DAG, 1));
5628 break;
5629 }
5631 SDValue Chain = N->getOperand(0);
5632 SDValue Op2 = N->getOperand(2);
5633 MVT GRLenVT = Subtarget.getGRLenVT();
5634 const StringRef ErrorMsgOOR = "argument out of range";
5635 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5636 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5637
5638 switch (N->getConstantOperandVal(1)) {
5639 default:
5640 llvm_unreachable("Unexpected Intrinsic.");
5641 case Intrinsic::loongarch_movfcsr2gr: {
5642 if (!Subtarget.hasBasicF()) {
5643 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5644 return;
5645 }
5646 unsigned Imm = Op2->getAsZExtVal();
5647 if (!isUInt<2>(Imm)) {
5648 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5649 return;
5650 }
5651 SDValue MOVFCSR2GRResults = DAG.getNode(
5652 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5653 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5654 Results.push_back(
5655 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5656 Results.push_back(MOVFCSR2GRResults.getValue(1));
5657 break;
5658 }
5659#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5660 case Intrinsic::loongarch_##NAME: { \
5661 SDValue NODE = DAG.getNode( \
5662 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5663 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5664 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5665 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5666 Results.push_back(NODE.getValue(1)); \
5667 break; \
5668 }
5669 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5670 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5671 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5672 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5673 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5674 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5675#undef CRC_CASE_EXT_BINARYOP
5676
5677#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5678 case Intrinsic::loongarch_##NAME: { \
5679 SDValue NODE = DAG.getNode( \
5680 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5681 {Chain, Op2, \
5682 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5683 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5684 Results.push_back(NODE.getValue(1)); \
5685 break; \
5686 }
5687 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5688 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5689#undef CRC_CASE_EXT_UNARYOP
5690#define CSR_CASE(ID) \
5691 case Intrinsic::loongarch_##ID: { \
5692 if (!Subtarget.is64Bit()) \
5693 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5694 break; \
5695 }
5696 CSR_CASE(csrrd_d);
5697 CSR_CASE(csrwr_d);
5698 CSR_CASE(csrxchg_d);
5699 CSR_CASE(iocsrrd_d);
5700#undef CSR_CASE
5701 case Intrinsic::loongarch_csrrd_w: {
5702 unsigned Imm = Op2->getAsZExtVal();
5703 if (!isUInt<14>(Imm)) {
5704 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5705 return;
5706 }
5707 SDValue CSRRDResults =
5708 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5709 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5710 Results.push_back(
5711 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5712 Results.push_back(CSRRDResults.getValue(1));
5713 break;
5714 }
5715 case Intrinsic::loongarch_csrwr_w: {
5716 unsigned Imm = N->getConstantOperandVal(3);
5717 if (!isUInt<14>(Imm)) {
5718 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5719 return;
5720 }
5721 SDValue CSRWRResults =
5722 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5723 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5724 DAG.getConstant(Imm, DL, GRLenVT)});
5725 Results.push_back(
5726 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5727 Results.push_back(CSRWRResults.getValue(1));
5728 break;
5729 }
5730 case Intrinsic::loongarch_csrxchg_w: {
5731 unsigned Imm = N->getConstantOperandVal(4);
5732 if (!isUInt<14>(Imm)) {
5733 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5734 return;
5735 }
5736 SDValue CSRXCHGResults = DAG.getNode(
5737 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5738 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5739 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5740 DAG.getConstant(Imm, DL, GRLenVT)});
5741 Results.push_back(
5742 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5743 Results.push_back(CSRXCHGResults.getValue(1));
5744 break;
5745 }
5746#define IOCSRRD_CASE(NAME, NODE) \
5747 case Intrinsic::loongarch_##NAME: { \
5748 SDValue IOCSRRDResults = \
5749 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5750 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5751 Results.push_back( \
5752 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5753 Results.push_back(IOCSRRDResults.getValue(1)); \
5754 break; \
5755 }
5756 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5757 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5758 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5759#undef IOCSRRD_CASE
5760 case Intrinsic::loongarch_cpucfg: {
5761 SDValue CPUCFGResults =
5762 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5763 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5764 Results.push_back(
5765 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5766 Results.push_back(CPUCFGResults.getValue(1));
5767 break;
5768 }
5769 case Intrinsic::loongarch_lddir_d: {
5770 if (!Subtarget.is64Bit()) {
5771 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5772 return;
5773 }
5774 break;
5775 }
5776 }
5777 break;
5778 }
5779 case ISD::READ_REGISTER: {
5780 if (Subtarget.is64Bit())
5781 DAG.getContext()->emitError(
5782 "On LA64, only 64-bit registers can be read.");
5783 else
5784 DAG.getContext()->emitError(
5785 "On LA32, only 32-bit registers can be read.");
5786 Results.push_back(DAG.getUNDEF(VT));
5787 Results.push_back(N->getOperand(0));
5788 break;
5789 }
5791 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5792 break;
5793 }
5794 case ISD::LROUND: {
5795 SDValue Op0 = N->getOperand(0);
5796 EVT OpVT = Op0.getValueType();
5797 RTLIB::Libcall LC =
5798 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5799 MakeLibCallOptions CallOptions;
5800 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5801 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5802 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5803 Results.push_back(Result);
5804 break;
5805 }
5806 case ISD::ATOMIC_CMP_SWAP: {
5808 break;
5809 }
5810 case ISD::TRUNCATE: {
5811 MVT VT = N->getSimpleValueType(0);
5812 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5813 return;
5814
5815 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5816 SDValue In = N->getOperand(0);
5817 EVT InVT = In.getValueType();
5818 EVT InEltVT = InVT.getVectorElementType();
5819 EVT EltVT = VT.getVectorElementType();
5820 unsigned MinElts = VT.getVectorNumElements();
5821 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5822 unsigned InBits = InVT.getSizeInBits();
5823
5824 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5825 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5826 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5827 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5828 for (unsigned I = 0; I < MinElts; ++I)
5829 TruncMask[I] = Scale * I;
5830
5831 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5832 MVT SVT = In.getSimpleValueType().getScalarType();
5833 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5834 SDValue WidenIn =
5835 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5836 DAG.getVectorIdxConstant(0, DL));
5837 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5838 "Illegal vector type in truncation");
5839 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5840 Results.push_back(
5841 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5842 return;
5843 }
5844 }
5845
5846 break;
5847 }
5848 case ISD::SIGN_EXTEND: {
5849 // LASX has native VEXT2XV_* for sign extension.
5850 if (!Subtarget.hasExtLSX() || Subtarget.hasExtLASX())
5851 return;
5852
5853 EVT DstVT = N->getValueType(0);
5854 SDValue Src = N->getOperand(0);
5855 MVT SrcVT = Src.getSimpleValueType();
5856
5857 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5858 unsigned DstEltBits = DstVT.getScalarSizeInBits();
5859 unsigned NumElts = DstVT.getVectorNumElements();
5860
5861 if (SrcVT.getSizeInBits() > 128)
5862 return;
5863
5864 if (!DstVT.isVector() || DstVT.getSizeInBits() <= 128)
5865 return;
5866
5867 // Legalize and extend the src to 128-bit first.
5868 if (SrcVT.getSizeInBits() < 128) {
5869 unsigned WidenSrcElts = 128 / SrcEltBits;
5870 MVT WidenSrcVT = MVT::getVectorVT(SrcVT.getScalarType(), WidenSrcElts);
5871 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WidenSrcVT,
5872 DAG.getUNDEF(WidenSrcVT), Src,
5873 DAG.getVectorIdxConstant(0, DL));
5874 SrcVT = WidenSrcVT;
5875
5876 unsigned FirstStageEltBits = 128 / NumElts;
5877 MVT FirstStageEltVT = MVT::getIntegerVT(FirstStageEltBits);
5878 MVT FirstStageVT = MVT::getVectorVT(FirstStageEltVT, NumElts);
5879 Src = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, FirstStageVT, Src);
5880 SrcVT = FirstStageVT;
5881 SrcEltBits = FirstStageEltBits;
5882 }
5883
5885 Blocks.push_back(Src);
5886
5887 // Sign-extend the src by using SLTI + VILVL + VILVH recursively.
5888 while (SrcEltBits < DstEltBits) {
5889 unsigned NextEltBits = SrcEltBits * 2;
5890 MVT NextEltVT = MVT::getIntegerVT(NextEltBits);
5891 unsigned CurEltsPerBlock = SrcVT.getVectorNumElements();
5892 unsigned NextEltsPerBlock = CurEltsPerBlock / 2;
5893 MVT NextBlockVT = MVT::getVectorVT(NextEltVT, NextEltsPerBlock);
5894
5895 SmallVector<SDValue, 8> NextBlocks;
5896 NextBlocks.reserve(Blocks.size() * 2);
5897 for (SDValue Block : Blocks) {
5898 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
5899 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Block, Zero,
5900 DAG.getCondCode(ISD::SETLT));
5901 SDValue LoInterleaved =
5902 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Block);
5903 SDValue HiInterleaved =
5904 DAG.getNode(LoongArchISD::VILVH, DL, SrcVT, Mask, Block);
5905
5906 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, LoInterleaved));
5907 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, HiInterleaved));
5908 }
5909
5910 Blocks = std::move(NextBlocks);
5911 SrcVT = NextBlockVT;
5912 SrcEltBits = NextEltBits;
5913 }
5914
5915 Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Blocks));
5916 break;
5917 }
5918 case ISD::FP_EXTEND:
5919 // FP_EXTEND may reach here due to the Custom action for v2f32 results, but
5920 // no target-specific lowering is required. Leave it unchanged and rely on
5921 // the default type legalization.
5922 break;
5923 }
5924}
5925
5926/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5928 SelectionDAG &DAG) {
5929 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5930
5931 MVT VT = N->getSimpleValueType(0);
5932 if (!VT.is128BitVector() && !VT.is256BitVector())
5933 return SDValue();
5934
5935 SDValue X, Y;
5936 SDValue N0 = N->getOperand(0);
5937 SDValue N1 = N->getOperand(1);
5938
5939 if (SDValue Not = isNOT(N0, DAG)) {
5940 X = Not;
5941 Y = N1;
5942 } else if (SDValue Not = isNOT(N1, DAG)) {
5943 X = Not;
5944 Y = N0;
5945 } else
5946 return SDValue();
5947
5948 X = DAG.getBitcast(VT, X);
5949 Y = DAG.getBitcast(VT, Y);
5950 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5951}
5952
5953static bool isConstantSplatVector(SDValue N, APInt &SplatValue,
5954 unsigned MinSizeInBits) {
5957
5958 if (!Node)
5959 return false;
5960
5961 APInt SplatUndef;
5962 unsigned SplatBitSize;
5963 bool HasAnyUndefs;
5964
5965 return Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
5966 HasAnyUndefs, MinSizeInBits,
5967 /*IsBigEndian=*/false);
5968}
5969
5970static SDValue matchDeinterleaveBuildVector(SDValue N, unsigned &StartIndex) {
5971 auto *BV = dyn_cast<BuildVectorSDNode>(N);
5972 if (!BV)
5973 return SDValue();
5974
5975 SDValue Src;
5976 int Start = -1;
5977
5978 for (unsigned i = 0, NumElts = BV->getNumOperands(); i < NumElts; ++i) {
5979 SDValue Op = BV->getOperand(i);
5980 if (Op.isUndef())
5981 continue;
5982 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
5983 return SDValue();
5984
5985 auto *IdxC = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5986 if (!IdxC)
5987 return SDValue();
5988
5989 unsigned EltIdx = IdxC->getZExtValue();
5990 if (Start < 0)
5991 Start = (int)EltIdx - (int)(i * 2);
5992 if (Start < 0 || Start > 1 || EltIdx != (unsigned)(Start + (int)(i * 2)))
5993 return SDValue();
5994
5995 SDValue CurSrc = Op.getOperand(0);
5996 if (!Src)
5997 Src = CurSrc;
5998 else if (Src != CurSrc)
5999 return SDValue();
6000 }
6001
6002 if (!Src || Start < 0)
6003 return SDValue();
6004
6005 StartIndex = (unsigned)Start;
6006 return Src;
6007}
6008
6009static SDValue
6011 const LoongArchSubtarget &Subtarget) {
6012 if (!Subtarget.hasExtLSX())
6013 return SDValue();
6014
6015 unsigned Opc = N->getOpcode();
6016 assert((Opc == ISD::ADD || Opc == ISD::SUB) && "Unexpected opcode");
6017
6018 EVT VT = N->getValueType(0);
6019 SDLoc DL(N);
6020
6021 SDValue LHS = N->getOperand(0);
6022 SDValue RHS = N->getOperand(1);
6023
6024 bool isSigned;
6025 unsigned ExtOpc = LHS.getOpcode();
6026 if (ExtOpc == ISD::SIGN_EXTEND)
6027 isSigned = true;
6028 else if (ExtOpc == ISD::ZERO_EXTEND)
6029 isSigned = false;
6030 else
6031 return SDValue();
6032
6033 if (ExtOpc != RHS.getOpcode())
6034 return SDValue();
6035
6036 if (!LHS.hasOneUse() || !RHS.hasOneUse())
6037 return SDValue();
6038
6039 unsigned OddIdx, EvenIdx;
6040 SDValue LHSVec = matchDeinterleaveBuildVector(LHS.getOperand(0), OddIdx);
6041 SDValue RHSVec = matchDeinterleaveBuildVector(RHS.getOperand(0), EvenIdx);
6042
6043 if (!LHSVec || !RHSVec)
6044 return SDValue();
6045 if (OddIdx != 1 || EvenIdx != 0)
6046 return SDValue();
6047 if (LHSVec.getValueType() != RHSVec.getValueType())
6048 return SDValue();
6049
6050 EVT SrcVT = LHSVec.getValueType();
6051 EVT SrcEltVT = SrcVT.getVectorElementType();
6052 EVT DstEltVT = VT.getVectorElementType();
6053 auto &TLI = DAG.getTargetLoweringInfo();
6054
6055 if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
6056 return SDValue();
6057 if (!SrcVT.isVector() || !VT.isVector())
6058 return SDValue();
6059 if (SrcVT.getSizeInBits() != VT.getSizeInBits())
6060 return SDValue();
6061 if (DstEltVT.getSizeInBits() != SrcEltVT.getSizeInBits() * 2)
6062 return SDValue();
6063 if (!SrcEltVT.isInteger() || SrcEltVT.getSizeInBits() > 32)
6064 return SDValue();
6065
6066 unsigned TargetOpc;
6067 if (Opc == ISD::ADD)
6068 TargetOpc = isSigned ? LoongArchISD::VHADDW : LoongArchISD::VHADDW_U;
6069 else
6070 TargetOpc = isSigned ? LoongArchISD::VHSUBW : LoongArchISD::VHSUBW_U;
6071
6072 return DAG.getNode(TargetOpc, DL, VT, LHSVec, RHSVec);
6073}
6074
6077 const LoongArchSubtarget &Subtarget) {
6078 if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget))
6079 return V;
6080
6081 if (DCI.isBeforeLegalizeOps())
6082 return SDValue();
6083
6084 EVT VT = N->getValueType(0);
6085 if (!VT.isVector())
6086 return SDValue();
6087
6088 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
6089 return SDValue();
6090
6091 EVT EltVT = VT.getVectorElementType();
6092 if (!EltVT.isInteger())
6093 return SDValue();
6094
6095 // match:
6096 //
6097 // add
6098 // (and
6099 // (srl X, shift-1) / X
6100 // 1)
6101 // (srl/sra X, shift)
6102
6103 SDValue Add0 = N->getOperand(0);
6104 SDValue Add1 = N->getOperand(1);
6105 SDValue And;
6106 SDValue Shr;
6107
6108 if (Add0.getOpcode() == ISD::AND) {
6109 And = Add0;
6110 Shr = Add1;
6111 } else if (Add1.getOpcode() == ISD::AND) {
6112 And = Add1;
6113 Shr = Add0;
6114 } else {
6115 return SDValue();
6116 }
6117
6118 // match:
6119 //
6120 // srl/sra X, shift
6121
6122 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
6123 return SDValue();
6124
6125 SDValue X = Shr.getOperand(0);
6126 SDValue Shift = Shr.getOperand(1);
6127 APInt ShiftVal;
6128
6129 if (!isConstantSplatVector(Shift, ShiftVal, EltVT.getSizeInBits()))
6130 return SDValue();
6131
6132 if (ShiftVal == 0)
6133 return SDValue();
6134
6135 // match:
6136 //
6137 // and
6138 // (srl X, shift-1) / X
6139 // 1
6140
6141 SDValue One = And.getOperand(1);
6142 APInt SplatVal;
6143
6144 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
6145 return SDValue();
6146
6147 if (SplatVal != 1)
6148 return SDValue();
6149
6150 if (And.getOperand(0) == X) {
6151 // match:
6152 //
6153 // shift == 1
6154
6155 if (ShiftVal != 1)
6156 return SDValue();
6157 } else {
6158 // match:
6159 //
6160 // srl X, shift-1
6161
6162 SDValue Srl = And.getOperand(0);
6163
6164 if (Srl.getOpcode() != ISD::SRL)
6165 return SDValue();
6166
6167 if (Srl.getOperand(0) != X)
6168 return SDValue();
6169
6170 // match:
6171 //
6172 // shift-1
6173
6174 SDValue ShiftMinus1 = Srl.getOperand(1);
6175
6176 if (!isConstantSplatVector(ShiftMinus1, SplatVal, EltVT.getSizeInBits()))
6177 return SDValue();
6178
6179 if (ShiftVal != (SplatVal + 1))
6180 return SDValue();
6181 }
6182
6183 // We matched a rounded right shift pattern and can lower it
6184 // to a single vector rounded shift instruction.
6185
6186 SDLoc DL(N);
6187 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
6188 : LoongArchISD::VSRAR,
6189 DL, VT, X, Shift);
6190}
6191
6194 const LoongArchSubtarget &Subtarget) {
6195 if (DCI.isBeforeLegalizeOps())
6196 return SDValue();
6197
6198 SDValue FirstOperand = N->getOperand(0);
6199 SDValue SecondOperand = N->getOperand(1);
6200 unsigned FirstOperandOpc = FirstOperand.getOpcode();
6201 EVT ValTy = N->getValueType(0);
6202 SDLoc DL(N);
6203 uint64_t lsb, msb;
6204 unsigned SMIdx, SMLen;
6205 ConstantSDNode *CN;
6206 SDValue NewOperand;
6207 MVT GRLenVT = Subtarget.getGRLenVT();
6208
6209 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
6210 return R;
6211
6212 // BSTRPICK requires the 32S feature.
6213 if (!Subtarget.has32S())
6214 return SDValue();
6215
6216 // Op's second operand must be a shifted mask.
6217 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
6218 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
6219 return SDValue();
6220
6221 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
6222 // Pattern match BSTRPICK.
6223 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
6224 // => BSTRPICK $dst, $src, msb, lsb
6225 // where msb = lsb + len - 1
6226
6227 // The second operand of the shift must be an immediate.
6228 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
6229 return SDValue();
6230
6231 lsb = CN->getZExtValue();
6232
6233 // Return if the shifted mask does not start at bit 0 or the sum of its
6234 // length and lsb exceeds the word's size.
6235 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
6236 return SDValue();
6237
6238 NewOperand = FirstOperand.getOperand(0);
6239 } else {
6240 // Pattern match BSTRPICK.
6241 // $dst = and $src, (2**len- 1) , if len > 12
6242 // => BSTRPICK $dst, $src, msb, lsb
6243 // where lsb = 0 and msb = len - 1
6244
6245 // If the mask is <= 0xfff, andi can be used instead.
6246 if (CN->getZExtValue() <= 0xfff)
6247 return SDValue();
6248
6249 // Return if the MSB exceeds.
6250 if (SMIdx + SMLen > ValTy.getSizeInBits())
6251 return SDValue();
6252
6253 if (SMIdx > 0) {
6254 // Omit if the constant has more than 2 uses. This a conservative
6255 // decision. Whether it is a win depends on the HW microarchitecture.
6256 // However it should always be better for 1 and 2 uses.
6257 if (CN->use_size() > 2)
6258 return SDValue();
6259 // Return if the constant can be composed by a single LU12I.W.
6260 if ((CN->getZExtValue() & 0xfff) == 0)
6261 return SDValue();
6262 // Return if the constand can be composed by a single ADDI with
6263 // the zero register.
6264 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
6265 return SDValue();
6266 }
6267
6268 lsb = SMIdx;
6269 NewOperand = FirstOperand;
6270 }
6271
6272 msb = lsb + SMLen - 1;
6273 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
6274 DAG.getConstant(msb, DL, GRLenVT),
6275 DAG.getConstant(lsb, DL, GRLenVT));
6276 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
6277 return NR0;
6278 // Try to optimize to
6279 // bstrpick $Rd, $Rs, msb, lsb
6280 // slli $Rd, $Rd, lsb
6281 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
6282 DAG.getConstant(lsb, DL, GRLenVT));
6283}
6284
6285// Return the original source vector if N consists of the low half
6286// of each 128-bit lane.
6289
6290 EVT DstVT = N.getValueType();
6291 if (!DstVT.isVector())
6292 return SDValue();
6293
6294 // LSX canonical form:
6295 if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
6296 SDValue Src = N.getOperand(0);
6297 EVT SrcVT = Src.getValueType();
6298
6299 if (!SrcVT.isVector() || !SrcVT.is128BitVector())
6300 return SDValue();
6301 if (N.getConstantOperandVal(1) != 0)
6302 return SDValue();
6303 if (SrcVT.getSizeInBits() != DstVT.getSizeInBits() * 2)
6304 return SDValue();
6305 if (SrcVT.getVectorNumElements() != DstVT.getVectorNumElements() * 2)
6306 return SDValue();
6307
6308 return Src;
6309 }
6310
6311 // LASX canonical form:
6312 auto *BV = dyn_cast<BuildVectorSDNode>(N);
6313 if (!BV)
6314 return SDValue();
6315
6316 unsigned NumElts = DstVT.getVectorNumElements();
6317 if (NumElts % 2 != 0)
6318 return SDValue();
6319
6320 SDValue Src;
6321 EVT SrcVT;
6322
6323 for (unsigned I = 0; I != NumElts; ++I) {
6324 SDValue Elt = BV->getOperand(I);
6326 return SDValue();
6327
6328 SDValue ThisSrc = Elt.getOperand(0);
6329 SDValue Idx = Elt.getOperand(1);
6330 auto *CI = dyn_cast<ConstantSDNode>(Idx);
6331 if (!CI)
6332 return SDValue();
6333
6334 if (!Src) {
6335 Src = ThisSrc;
6336 SrcVT = Src.getValueType();
6337 if (!SrcVT.isVector())
6338 return SDValue();
6339
6340 if (SrcVT.getSizeInBits() != DstVT.getSizeInBits() * 2)
6341 return SDValue();
6342 if (SrcVT.getVectorNumElements() != NumElts * 2)
6343 return SDValue();
6344 if (!SrcVT.is256BitVector())
6345 return SDValue();
6346 } else if (ThisSrc != Src) {
6347 return SDValue();
6348 }
6349
6350 unsigned Half = NumElts / 2;
6351 unsigned ExpectedIdx = (I < Half) ? I : (I + Half);
6352 if (CI->getZExtValue() != ExpectedIdx)
6353 return SDValue();
6354 }
6355
6356 return Src;
6357}
6358
6361 const LoongArchSubtarget &Subtarget) {
6362 if (!Subtarget.hasExtLSX())
6363 return SDValue();
6364
6365 assert(N->getOpcode() == ISD::SHL && "Unexpected opcode");
6366
6367 EVT VT = N->getValueType(0);
6368 SDLoc DL(N);
6369
6370 SDValue LHS = N->getOperand(0);
6371 SDValue RHS = N->getOperand(1);
6372
6373 bool isSigned;
6374 unsigned ExtOpc = LHS.getOpcode();
6375 if (ExtOpc == ISD::SIGN_EXTEND)
6376 isSigned = true;
6377 else if (ExtOpc == ISD::ZERO_EXTEND)
6378 isSigned = false;
6379 else
6380 return SDValue();
6381
6382 if (!LHS.hasOneUse())
6383 return SDValue();
6384
6385 SDValue Vec = matchLowHalfOf128BitLanes(LHS.getOperand(0));
6386 if (!Vec)
6387 return SDValue();
6388
6389 EVT SrcVT = Vec.getValueType();
6390 EVT SrcEltVT = SrcVT.getVectorElementType();
6391 EVT DstEltVT = VT.getVectorElementType();
6392
6393 if (!SrcVT.isVector() || !VT.isVector())
6394 return SDValue();
6395 if (SrcVT.getSizeInBits() != VT.getSizeInBits())
6396 return SDValue();
6397 if (DstEltVT.getSizeInBits() != SrcEltVT.getSizeInBits() * 2)
6398 return SDValue();
6399 if (!SrcEltVT.isInteger() || SrcEltVT.getSizeInBits() > 32)
6400 return SDValue();
6401
6402 APInt Imm;
6403 if (!isConstantSplatVector(RHS, Imm, DstEltVT.getSizeInBits()))
6404 return SDValue();
6405 if (!Imm.ult(SrcEltVT.getSizeInBits()))
6406 return SDValue();
6407
6408 unsigned Opc = isSigned ? LoongArchISD::VSLLWIL : LoongArchISD::VSLLWIL_U;
6409 SDValue Sht = DAG.getConstant(Imm.getZExtValue(), DL, Subtarget.getGRLenVT());
6410 return DAG.getNode(Opc, DL, VT, Vec, Sht);
6411}
6412
6415 const LoongArchSubtarget &Subtarget) {
6416 // BSTRPICK requires the 32S feature.
6417 if (!Subtarget.has32S())
6418 return SDValue();
6419
6420 if (DCI.isBeforeLegalizeOps())
6421 return SDValue();
6422
6423 // $dst = srl (and $src, Mask), Shamt
6424 // =>
6425 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
6426 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
6427 //
6428
6429 SDValue FirstOperand = N->getOperand(0);
6430 ConstantSDNode *CN;
6431 EVT ValTy = N->getValueType(0);
6432 SDLoc DL(N);
6433 MVT GRLenVT = Subtarget.getGRLenVT();
6434 unsigned MaskIdx, MaskLen;
6435 uint64_t Shamt;
6436
6437 // The first operand must be an AND and the second operand of the AND must be
6438 // a shifted mask.
6439 if (FirstOperand.getOpcode() != ISD::AND ||
6440 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
6441 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
6442 return SDValue();
6443
6444 // The second operand (shift amount) must be an immediate.
6445 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
6446 return SDValue();
6447
6448 Shamt = CN->getZExtValue();
6449 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
6450 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
6451 FirstOperand->getOperand(0),
6452 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6453 DAG.getConstant(Shamt, DL, GRLenVT));
6454
6455 return SDValue();
6456}
6457
6460 const LoongArchSubtarget &Subtarget) {
6461 if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget))
6462 return V;
6463
6464 return SDValue();
6465}
6466
6467// Helper to peek through bitops/trunc/setcc to determine size of source vector.
6468// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
6469static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
6470 unsigned Depth) {
6471 // Limit recursion.
6473 return false;
6474 switch (Src.getOpcode()) {
6475 case ISD::SETCC:
6476 case ISD::TRUNCATE:
6477 return Src.getOperand(0).getValueSizeInBits() == Size;
6478 case ISD::FREEZE:
6479 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
6480 case ISD::AND:
6481 case ISD::XOR:
6482 case ISD::OR:
6483 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
6484 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
6485 case ISD::SELECT:
6486 case ISD::VSELECT:
6487 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
6488 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
6489 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
6490 case ISD::BUILD_VECTOR:
6491 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
6492 ISD::isBuildVectorAllOnes(Src.getNode());
6493 }
6494 return false;
6495}
6496
6497// Helper to push sign extension of vXi1 SETCC result through bitops.
6499 SDValue Src, const SDLoc &DL) {
6500 switch (Src.getOpcode()) {
6501 case ISD::SETCC:
6502 case ISD::FREEZE:
6503 case ISD::TRUNCATE:
6504 case ISD::BUILD_VECTOR:
6505 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6506 case ISD::AND:
6507 case ISD::XOR:
6508 case ISD::OR:
6509 return DAG.getNode(
6510 Src.getOpcode(), DL, SExtVT,
6511 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
6512 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
6513 case ISD::SELECT:
6514 case ISD::VSELECT:
6515 return DAG.getSelect(
6516 DL, SExtVT, Src.getOperand(0),
6517 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
6518 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
6519 }
6520 llvm_unreachable("Unexpected node type for vXi1 sign extension");
6521}
6522
6523static SDValue
6526 const LoongArchSubtarget &Subtarget) {
6527 SDLoc DL(N);
6528 EVT VT = N->getValueType(0);
6529 SDValue Src = N->getOperand(0);
6530 EVT SrcVT = Src.getValueType();
6531
6532 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
6533 return SDValue();
6534
6535 bool UseLASX;
6536 unsigned Opc = ISD::DELETED_NODE;
6537 EVT CmpVT = Src.getOperand(0).getValueType();
6538 EVT EltVT = CmpVT.getVectorElementType();
6539
6540 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
6541 UseLASX = false;
6542 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
6543 CmpVT.getSizeInBits() == 256)
6544 UseLASX = true;
6545 else
6546 return SDValue();
6547
6548 SDValue SrcN1 = Src.getOperand(1);
6549 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
6550 default:
6551 break;
6552 case ISD::SETEQ:
6553 // x == 0 => not (vmsknez.b x)
6554 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6555 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
6556 break;
6557 case ISD::SETGT:
6558 // x > -1 => vmskgez.b x
6559 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
6560 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6561 break;
6562 case ISD::SETGE:
6563 // x >= 0 => vmskgez.b x
6564 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6565 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6566 break;
6567 case ISD::SETLT:
6568 // x < 0 => vmskltz.{b,h,w,d} x
6569 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
6570 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6571 EltVT == MVT::i64))
6572 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6573 break;
6574 case ISD::SETLE:
6575 // x <= -1 => vmskltz.{b,h,w,d} x
6576 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
6577 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6578 EltVT == MVT::i64))
6579 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6580 break;
6581 case ISD::SETNE:
6582 // x != 0 => vmsknez.b x
6583 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6584 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
6585 break;
6586 }
6587
6588 if (Opc == ISD::DELETED_NODE)
6589 return SDValue();
6590
6591 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
6593 V = DAG.getZExtOrTrunc(V, DL, T);
6594 return DAG.getBitcast(VT, V);
6595}
6596
6599 const LoongArchSubtarget &Subtarget) {
6600 SDLoc DL(N);
6601 EVT VT = N->getValueType(0);
6602 SDValue Src = N->getOperand(0);
6603 EVT SrcVT = Src.getValueType();
6604 MVT GRLenVT = Subtarget.getGRLenVT();
6605
6606 if (!DCI.isBeforeLegalizeOps())
6607 return SDValue();
6608
6609 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
6610 return SDValue();
6611
6612 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
6613 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
6614 if (Res)
6615 return Res;
6616
6617 // Generate vXi1 using [X]VMSKLTZ
6618 MVT SExtVT;
6619 unsigned Opc;
6620 bool UseLASX = false;
6621 bool PropagateSExt = false;
6622
6623 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
6624 EVT CmpVT = Src.getOperand(0).getValueType();
6625 if (CmpVT.getSizeInBits() > 256)
6626 return SDValue();
6627 }
6628
6629 switch (SrcVT.getSimpleVT().SimpleTy) {
6630 default:
6631 return SDValue();
6632 case MVT::v2i1:
6633 SExtVT = MVT::v2i64;
6634 break;
6635 case MVT::v4i1:
6636 SExtVT = MVT::v4i32;
6637 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6638 SExtVT = MVT::v4i64;
6639 UseLASX = true;
6640 PropagateSExt = true;
6641 }
6642 break;
6643 case MVT::v8i1:
6644 SExtVT = MVT::v8i16;
6645 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6646 SExtVT = MVT::v8i32;
6647 UseLASX = true;
6648 PropagateSExt = true;
6649 }
6650 break;
6651 case MVT::v16i1:
6652 SExtVT = MVT::v16i8;
6653 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6654 SExtVT = MVT::v16i16;
6655 UseLASX = true;
6656 PropagateSExt = true;
6657 }
6658 break;
6659 case MVT::v32i1:
6660 SExtVT = MVT::v32i8;
6661 UseLASX = true;
6662 break;
6663 };
6664 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
6665 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6666
6667 SDValue V;
6668 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
6669 if (Src.getSimpleValueType() == MVT::v32i8) {
6670 SDValue Lo, Hi;
6671 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
6672 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
6673 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
6674 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
6675 DAG.getShiftAmountConstant(16, GRLenVT, DL));
6676 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
6677 } else if (UseLASX) {
6678 return SDValue();
6679 }
6680 }
6681
6682 if (!V) {
6683 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6684 V = DAG.getNode(Opc, DL, GRLenVT, Src);
6685 }
6686
6688 V = DAG.getZExtOrTrunc(V, DL, T);
6689 return DAG.getBitcast(VT, V);
6690}
6691
6694 const LoongArchSubtarget &Subtarget) {
6695 MVT GRLenVT = Subtarget.getGRLenVT();
6696 EVT ValTy = N->getValueType(0);
6697 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6698 ConstantSDNode *CN0, *CN1;
6699 SDLoc DL(N);
6700 unsigned ValBits = ValTy.getSizeInBits();
6701 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
6702 unsigned Shamt;
6703 bool SwapAndRetried = false;
6704
6705 // BSTRPICK requires the 32S feature.
6706 if (!Subtarget.has32S())
6707 return SDValue();
6708
6709 if (DCI.isBeforeLegalizeOps())
6710 return SDValue();
6711
6712 if (ValBits != 32 && ValBits != 64)
6713 return SDValue();
6714
6715Retry:
6716 // 1st pattern to match BSTRINS:
6717 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
6718 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
6719 // =>
6720 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6721 if (N0.getOpcode() == ISD::AND &&
6722 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6723 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6724 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
6725 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6726 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6727 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
6728 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6729 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6730 (MaskIdx0 + MaskLen0 <= ValBits)) {
6731 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
6732 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6733 N1.getOperand(0).getOperand(0),
6734 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6735 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6736 }
6737
6738 // 2nd pattern to match BSTRINS:
6739 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
6740 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
6741 // =>
6742 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6743 if (N0.getOpcode() == ISD::AND &&
6744 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6745 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6746 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6747 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6748 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6749 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6750 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6751 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
6752 (MaskIdx0 + MaskLen0 <= ValBits)) {
6753 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
6754 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6755 N1.getOperand(0).getOperand(0),
6756 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6757 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6758 }
6759
6760 // 3rd pattern to match BSTRINS:
6761 // R = or (and X, mask0), (and Y, mask1)
6762 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
6763 // =>
6764 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
6765 // where msb = lsb + size - 1
6766 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6767 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6768 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6769 (MaskIdx0 + MaskLen0 <= 64) &&
6770 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
6771 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6772 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
6773 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6774 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
6775 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
6776 DAG.getConstant(ValBits == 32
6777 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6778 : (MaskIdx0 + MaskLen0 - 1),
6779 DL, GRLenVT),
6780 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6781 }
6782
6783 // 4th pattern to match BSTRINS:
6784 // R = or (and X, mask), (shl Y, shamt)
6785 // where mask = (2**shamt - 1)
6786 // =>
6787 // R = BSTRINS X, Y, ValBits - 1, shamt
6788 // where ValBits = 32 or 64
6789 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
6790 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6791 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
6792 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6793 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
6794 (MaskIdx0 + MaskLen0 <= ValBits)) {
6795 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
6796 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6797 N1.getOperand(0),
6798 DAG.getConstant((ValBits - 1), DL, GRLenVT),
6799 DAG.getConstant(Shamt, DL, GRLenVT));
6800 }
6801
6802 // 5th pattern to match BSTRINS:
6803 // R = or (and X, mask), const
6804 // where ~mask = (2**size - 1) << lsb, mask & const = 0
6805 // =>
6806 // R = BSTRINS X, (const >> lsb), msb, lsb
6807 // where msb = lsb + size - 1
6808 if (N0.getOpcode() == ISD::AND &&
6809 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6810 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6811 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
6812 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6813 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
6814 return DAG.getNode(
6815 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6816 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
6817 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6818 : (MaskIdx0 + MaskLen0 - 1),
6819 DL, GRLenVT),
6820 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6821 }
6822
6823 // 6th pattern.
6824 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
6825 // by the incoming bits are known to be zero.
6826 // =>
6827 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
6828 //
6829 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
6830 // pattern is more common than the 1st. So we put the 1st before the 6th in
6831 // order to match as many nodes as possible.
6832 ConstantSDNode *CNMask, *CNShamt;
6833 unsigned MaskIdx, MaskLen;
6834 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6835 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6836 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6837 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6838 CNShamt->getZExtValue() + MaskLen <= ValBits) {
6839 Shamt = CNShamt->getZExtValue();
6840 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
6841 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6842 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
6843 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6844 N1.getOperand(0).getOperand(0),
6845 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
6846 DAG.getConstant(Shamt, DL, GRLenVT));
6847 }
6848 }
6849
6850 // 7th pattern.
6851 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
6852 // overwritten by the incoming bits are known to be zero.
6853 // =>
6854 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
6855 //
6856 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
6857 // before the 7th in order to match as many nodes as possible.
6858 if (N1.getOpcode() == ISD::AND &&
6859 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6860 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6861 N1.getOperand(0).getOpcode() == ISD::SHL &&
6862 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6863 CNShamt->getZExtValue() == MaskIdx) {
6864 APInt ShMask(ValBits, CNMask->getZExtValue());
6865 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6866 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
6867 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6868 N1.getOperand(0).getOperand(0),
6869 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6870 DAG.getConstant(MaskIdx, DL, GRLenVT));
6871 }
6872 }
6873
6874 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
6875 if (!SwapAndRetried) {
6876 std::swap(N0, N1);
6877 SwapAndRetried = true;
6878 goto Retry;
6879 }
6880
6881 SwapAndRetried = false;
6882Retry2:
6883 // 8th pattern.
6884 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
6885 // the incoming bits are known to be zero.
6886 // =>
6887 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
6888 //
6889 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
6890 // we put it here in order to match as many nodes as possible or generate less
6891 // instructions.
6892 if (N1.getOpcode() == ISD::AND &&
6893 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6894 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
6895 APInt ShMask(ValBits, CNMask->getZExtValue());
6896 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6897 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
6898 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6899 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
6900 N1->getOperand(0),
6901 DAG.getConstant(MaskIdx, DL, GRLenVT)),
6902 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6903 DAG.getConstant(MaskIdx, DL, GRLenVT));
6904 }
6905 }
6906 // Swap N0/N1 and retry.
6907 if (!SwapAndRetried) {
6908 std::swap(N0, N1);
6909 SwapAndRetried = true;
6910 goto Retry2;
6911 }
6912
6913 return SDValue();
6914}
6915
6916static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
6917 ExtType = ISD::NON_EXTLOAD;
6918
6919 switch (V.getNode()->getOpcode()) {
6920 case ISD::LOAD: {
6921 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
6922 if ((LoadNode->getMemoryVT() == MVT::i8) ||
6923 (LoadNode->getMemoryVT() == MVT::i16)) {
6924 ExtType = LoadNode->getExtensionType();
6925 return true;
6926 }
6927 return false;
6928 }
6929 case ISD::AssertSext: {
6930 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6931 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6932 ExtType = ISD::SEXTLOAD;
6933 return true;
6934 }
6935 return false;
6936 }
6937 case ISD::AssertZext: {
6938 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6939 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6940 ExtType = ISD::ZEXTLOAD;
6941 return true;
6942 }
6943 return false;
6944 }
6945 default:
6946 return false;
6947 }
6948
6949 return false;
6950}
6951
6952// Eliminate redundant truncation and zero-extension nodes.
6953// * Case 1:
6954// +------------+ +------------+ +------------+
6955// | Input1 | | Input2 | | CC |
6956// +------------+ +------------+ +------------+
6957// | | |
6958// V V +----+
6959// +------------+ +------------+ |
6960// | TRUNCATE | | TRUNCATE | |
6961// +------------+ +------------+ |
6962// | | |
6963// V V |
6964// +------------+ +------------+ |
6965// | ZERO_EXT | | ZERO_EXT | |
6966// +------------+ +------------+ |
6967// | | |
6968// | +-------------+ |
6969// V V | |
6970// +----------------+ | |
6971// | AND | | |
6972// +----------------+ | |
6973// | | |
6974// +---------------+ | |
6975// | | |
6976// V V V
6977// +-------------+
6978// | CMP |
6979// +-------------+
6980// * Case 2:
6981// +------------+ +------------+ +-------------+ +------------+ +------------+
6982// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
6983// +------------+ +------------+ +-------------+ +------------+ +------------+
6984// | | | | |
6985// V | | | |
6986// +------------+ | | | |
6987// | XOR |<---------------------+ | |
6988// +------------+ | | |
6989// | | | |
6990// V V +---------------+ |
6991// +------------+ +------------+ | |
6992// | TRUNCATE | | TRUNCATE | | +-------------------------+
6993// +------------+ +------------+ | |
6994// | | | |
6995// V V | |
6996// +------------+ +------------+ | |
6997// | ZERO_EXT | | ZERO_EXT | | |
6998// +------------+ +------------+ | |
6999// | | | |
7000// V V | |
7001// +----------------+ | |
7002// | AND | | |
7003// +----------------+ | |
7004// | | |
7005// +---------------+ | |
7006// | | |
7007// V V V
7008// +-------------+
7009// | CMP |
7010// +-------------+
7013 const LoongArchSubtarget &Subtarget) {
7014 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
7015
7016 SDNode *AndNode = N->getOperand(0).getNode();
7017 if (AndNode->getOpcode() != ISD::AND)
7018 return SDValue();
7019
7020 SDValue AndInputValue2 = AndNode->getOperand(1);
7021 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
7022 return SDValue();
7023
7024 SDValue CmpInputValue = N->getOperand(1);
7025 SDValue AndInputValue1 = AndNode->getOperand(0);
7026 if (AndInputValue1.getOpcode() == ISD::XOR) {
7027 if (CC != ISD::SETEQ && CC != ISD::SETNE)
7028 return SDValue();
7029 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
7030 if (!CN || !CN->isAllOnes())
7031 return SDValue();
7032 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
7033 if (!CN || !CN->isZero())
7034 return SDValue();
7035 AndInputValue1 = AndInputValue1.getOperand(0);
7036 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
7037 return SDValue();
7038 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
7039 if (AndInputValue2 != CmpInputValue)
7040 return SDValue();
7041 } else {
7042 return SDValue();
7043 }
7044
7045 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
7046 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
7047 return SDValue();
7048
7049 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
7050 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
7051 return SDValue();
7052
7053 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
7054 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
7055 ISD::LoadExtType ExtType1;
7056 ISD::LoadExtType ExtType2;
7057
7058 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
7059 !checkValueWidth(TruncInputValue2, ExtType2))
7060 return SDValue();
7061
7062 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
7063 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
7064 return SDValue();
7065
7066 if ((ExtType2 != ISD::ZEXTLOAD) &&
7067 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
7068 return SDValue();
7069
7070 // These truncation and zero-extension nodes are not necessary, remove them.
7071 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
7072 TruncInputValue1, TruncInputValue2);
7073 SDValue NewSetCC =
7074 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
7075 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
7076 return SDValue(N, 0);
7077}
7078
7079// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
7082 const LoongArchSubtarget &Subtarget) {
7083 if (DCI.isBeforeLegalizeOps())
7084 return SDValue();
7085
7086 SDValue Src = N->getOperand(0);
7087 if (Src.getOpcode() != LoongArchISD::REVB_2W)
7088 return SDValue();
7089
7090 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
7091 Src.getOperand(0));
7092}
7093
7094// Perform common combines for BR_CC and SELECT_CC conditions.
7095static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
7096 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
7097 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
7098
7099 // As far as arithmetic right shift always saves the sign,
7100 // shift can be omitted.
7101 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
7102 // setge (sra X, N), 0 -> setge X, 0
7103 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
7104 LHS.getOpcode() == ISD::SRA) {
7105 LHS = LHS.getOperand(0);
7106 return true;
7107 }
7108
7109 if (!ISD::isIntEqualitySetCC(CCVal))
7110 return false;
7111
7112 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
7113 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
7114 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
7115 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
7116 // If we're looking for eq 0 instead of ne 0, we need to invert the
7117 // condition.
7118 bool Invert = CCVal == ISD::SETEQ;
7119 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
7120 if (Invert)
7121 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7122
7123 RHS = LHS.getOperand(1);
7124 LHS = LHS.getOperand(0);
7125 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7126
7127 CC = DAG.getCondCode(CCVal);
7128 return true;
7129 }
7130
7131 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
7132 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
7133 LHS.getOperand(1).getOpcode() == ISD::Constant) {
7134 SDValue LHS0 = LHS.getOperand(0);
7135 if (LHS0.getOpcode() == ISD::AND &&
7136 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
7137 uint64_t Mask = LHS0.getConstantOperandVal(1);
7138 uint64_t ShAmt = LHS.getConstantOperandVal(1);
7139 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
7140 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
7141 CC = DAG.getCondCode(CCVal);
7142
7143 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
7144 LHS = LHS0.getOperand(0);
7145 if (ShAmt != 0)
7146 LHS =
7147 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
7148 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
7149 return true;
7150 }
7151 }
7152 }
7153
7154 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
7155 // This can occur when legalizing some floating point comparisons.
7156 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
7157 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
7158 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7159 CC = DAG.getCondCode(CCVal);
7160 RHS = DAG.getConstant(0, DL, LHS.getValueType());
7161 return true;
7162 }
7163
7164 return false;
7165}
7166
7169 const LoongArchSubtarget &Subtarget) {
7170 SDValue LHS = N->getOperand(1);
7171 SDValue RHS = N->getOperand(2);
7172 SDValue CC = N->getOperand(3);
7173 SDLoc DL(N);
7174
7175 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
7176 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
7177 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
7178
7179 return SDValue();
7180}
7181
7184 const LoongArchSubtarget &Subtarget) {
7185 // Transform
7186 SDValue LHS = N->getOperand(0);
7187 SDValue RHS = N->getOperand(1);
7188 SDValue CC = N->getOperand(2);
7189 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
7190 SDValue TrueV = N->getOperand(3);
7191 SDValue FalseV = N->getOperand(4);
7192 SDLoc DL(N);
7193 EVT VT = N->getValueType(0);
7194
7195 // If the True and False values are the same, we don't need a select_cc.
7196 if (TrueV == FalseV)
7197 return TrueV;
7198
7199 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
7200 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
7201 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7203 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
7204 if (CCVal == ISD::CondCode::SETGE)
7205 std::swap(TrueV, FalseV);
7206
7207 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
7208 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
7209 // Only handle simm12, if it is not in this range, it can be considered as
7210 // register.
7211 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
7212 isInt<12>(TrueSImm - FalseSImm)) {
7213 SDValue SRA =
7214 DAG.getNode(ISD::SRA, DL, VT, LHS,
7215 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
7216 SDValue AND =
7217 DAG.getNode(ISD::AND, DL, VT, SRA,
7218 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
7219 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
7220 }
7221
7222 if (CCVal == ISD::CondCode::SETGE)
7223 std::swap(TrueV, FalseV);
7224 }
7225
7226 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
7227 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
7228 {LHS, RHS, CC, TrueV, FalseV});
7229
7230 return SDValue();
7231}
7232
7233template <unsigned N>
7235 SelectionDAG &DAG,
7236 const LoongArchSubtarget &Subtarget,
7237 bool IsSigned = false) {
7238 SDLoc DL(Node);
7239 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
7240 // Check the ImmArg.
7241 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
7242 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
7243 DAG.getContext()->emitError(Node->getOperationName(0) +
7244 ": argument out of range.");
7245 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
7246 }
7247 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
7248}
7249
7250template <unsigned N>
7251static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
7252 SelectionDAG &DAG, bool IsSigned = false) {
7253 SDLoc DL(Node);
7254 EVT ResTy = Node->getValueType(0);
7255 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
7256
7257 // Check the ImmArg.
7258 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
7259 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
7260 DAG.getContext()->emitError(Node->getOperationName(0) +
7261 ": argument out of range.");
7262 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7263 }
7264 return DAG.getConstant(
7266 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
7267 DL, ResTy);
7268}
7269
7271 SDLoc DL(Node);
7272 EVT ResTy = Node->getValueType(0);
7273 SDValue Vec = Node->getOperand(2);
7274 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
7275 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
7276}
7277
7279 SDLoc DL(Node);
7280 EVT ResTy = Node->getValueType(0);
7281 SDValue One = DAG.getConstant(1, DL, ResTy);
7282 SDValue Bit =
7283 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
7284
7285 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
7286 DAG.getNOT(DL, Bit, ResTy));
7287}
7288
7289template <unsigned N>
7291 SDLoc DL(Node);
7292 EVT ResTy = Node->getValueType(0);
7293 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7294 // Check the unsigned ImmArg.
7295 if (!isUInt<N>(CImm->getZExtValue())) {
7296 DAG.getContext()->emitError(Node->getOperationName(0) +
7297 ": argument out of range.");
7298 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7299 }
7300
7301 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7302 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
7303
7304 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
7305}
7306
7307template <unsigned N>
7309 SDLoc DL(Node);
7310 EVT ResTy = Node->getValueType(0);
7311 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7312 // Check the unsigned ImmArg.
7313 if (!isUInt<N>(CImm->getZExtValue())) {
7314 DAG.getContext()->emitError(Node->getOperationName(0) +
7315 ": argument out of range.");
7316 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7317 }
7318
7319 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7320 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7321 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
7322}
7323
7324template <unsigned N>
7326 SDLoc DL(Node);
7327 EVT ResTy = Node->getValueType(0);
7328 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7329 // Check the unsigned ImmArg.
7330 if (!isUInt<N>(CImm->getZExtValue())) {
7331 DAG.getContext()->emitError(Node->getOperationName(0) +
7332 ": argument out of range.");
7333 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7334 }
7335
7336 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7337 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7338 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
7339}
7340
7341template <unsigned W>
7343 unsigned ResOp) {
7344 unsigned Imm = N->getConstantOperandVal(2);
7345 if (!isUInt<W>(Imm)) {
7346 const StringRef ErrorMsg = "argument out of range";
7347 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
7348 return DAG.getUNDEF(N->getValueType(0));
7349 }
7350 SDLoc DL(N);
7351 SDValue Vec = N->getOperand(1);
7352 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
7354 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
7355}
7356
7357static SDValue
7360 const LoongArchSubtarget &Subtarget) {
7361 SDLoc DL(N);
7362 switch (N->getConstantOperandVal(0)) {
7363 default:
7364 break;
7365 case Intrinsic::loongarch_lsx_vadd_b:
7366 case Intrinsic::loongarch_lsx_vadd_h:
7367 case Intrinsic::loongarch_lsx_vadd_w:
7368 case Intrinsic::loongarch_lsx_vadd_d:
7369 case Intrinsic::loongarch_lasx_xvadd_b:
7370 case Intrinsic::loongarch_lasx_xvadd_h:
7371 case Intrinsic::loongarch_lasx_xvadd_w:
7372 case Intrinsic::loongarch_lasx_xvadd_d:
7373 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7374 N->getOperand(2));
7375 case Intrinsic::loongarch_lsx_vaddi_bu:
7376 case Intrinsic::loongarch_lsx_vaddi_hu:
7377 case Intrinsic::loongarch_lsx_vaddi_wu:
7378 case Intrinsic::loongarch_lsx_vaddi_du:
7379 case Intrinsic::loongarch_lasx_xvaddi_bu:
7380 case Intrinsic::loongarch_lasx_xvaddi_hu:
7381 case Intrinsic::loongarch_lasx_xvaddi_wu:
7382 case Intrinsic::loongarch_lasx_xvaddi_du:
7383 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7384 lowerVectorSplatImm<5>(N, 2, DAG));
7385 case Intrinsic::loongarch_lsx_vsub_b:
7386 case Intrinsic::loongarch_lsx_vsub_h:
7387 case Intrinsic::loongarch_lsx_vsub_w:
7388 case Intrinsic::loongarch_lsx_vsub_d:
7389 case Intrinsic::loongarch_lasx_xvsub_b:
7390 case Intrinsic::loongarch_lasx_xvsub_h:
7391 case Intrinsic::loongarch_lasx_xvsub_w:
7392 case Intrinsic::loongarch_lasx_xvsub_d:
7393 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7394 N->getOperand(2));
7395 case Intrinsic::loongarch_lsx_vsubi_bu:
7396 case Intrinsic::loongarch_lsx_vsubi_hu:
7397 case Intrinsic::loongarch_lsx_vsubi_wu:
7398 case Intrinsic::loongarch_lsx_vsubi_du:
7399 case Intrinsic::loongarch_lasx_xvsubi_bu:
7400 case Intrinsic::loongarch_lasx_xvsubi_hu:
7401 case Intrinsic::loongarch_lasx_xvsubi_wu:
7402 case Intrinsic::loongarch_lasx_xvsubi_du:
7403 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7404 lowerVectorSplatImm<5>(N, 2, DAG));
7405 case Intrinsic::loongarch_lsx_vneg_b:
7406 case Intrinsic::loongarch_lsx_vneg_h:
7407 case Intrinsic::loongarch_lsx_vneg_w:
7408 case Intrinsic::loongarch_lsx_vneg_d:
7409 case Intrinsic::loongarch_lasx_xvneg_b:
7410 case Intrinsic::loongarch_lasx_xvneg_h:
7411 case Intrinsic::loongarch_lasx_xvneg_w:
7412 case Intrinsic::loongarch_lasx_xvneg_d:
7413 return DAG.getNode(
7414 ISD::SUB, DL, N->getValueType(0),
7415 DAG.getConstant(
7416 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
7417 /*isSigned=*/true),
7418 SDLoc(N), N->getValueType(0)),
7419 N->getOperand(1));
7420 case Intrinsic::loongarch_lsx_vmax_b:
7421 case Intrinsic::loongarch_lsx_vmax_h:
7422 case Intrinsic::loongarch_lsx_vmax_w:
7423 case Intrinsic::loongarch_lsx_vmax_d:
7424 case Intrinsic::loongarch_lasx_xvmax_b:
7425 case Intrinsic::loongarch_lasx_xvmax_h:
7426 case Intrinsic::loongarch_lasx_xvmax_w:
7427 case Intrinsic::loongarch_lasx_xvmax_d:
7428 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7429 N->getOperand(2));
7430 case Intrinsic::loongarch_lsx_vmax_bu:
7431 case Intrinsic::loongarch_lsx_vmax_hu:
7432 case Intrinsic::loongarch_lsx_vmax_wu:
7433 case Intrinsic::loongarch_lsx_vmax_du:
7434 case Intrinsic::loongarch_lasx_xvmax_bu:
7435 case Intrinsic::loongarch_lasx_xvmax_hu:
7436 case Intrinsic::loongarch_lasx_xvmax_wu:
7437 case Intrinsic::loongarch_lasx_xvmax_du:
7438 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7439 N->getOperand(2));
7440 case Intrinsic::loongarch_lsx_vmaxi_b:
7441 case Intrinsic::loongarch_lsx_vmaxi_h:
7442 case Intrinsic::loongarch_lsx_vmaxi_w:
7443 case Intrinsic::loongarch_lsx_vmaxi_d:
7444 case Intrinsic::loongarch_lasx_xvmaxi_b:
7445 case Intrinsic::loongarch_lasx_xvmaxi_h:
7446 case Intrinsic::loongarch_lasx_xvmaxi_w:
7447 case Intrinsic::loongarch_lasx_xvmaxi_d:
7448 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7449 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7450 case Intrinsic::loongarch_lsx_vmaxi_bu:
7451 case Intrinsic::loongarch_lsx_vmaxi_hu:
7452 case Intrinsic::loongarch_lsx_vmaxi_wu:
7453 case Intrinsic::loongarch_lsx_vmaxi_du:
7454 case Intrinsic::loongarch_lasx_xvmaxi_bu:
7455 case Intrinsic::loongarch_lasx_xvmaxi_hu:
7456 case Intrinsic::loongarch_lasx_xvmaxi_wu:
7457 case Intrinsic::loongarch_lasx_xvmaxi_du:
7458 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7459 lowerVectorSplatImm<5>(N, 2, DAG));
7460 case Intrinsic::loongarch_lsx_vmin_b:
7461 case Intrinsic::loongarch_lsx_vmin_h:
7462 case Intrinsic::loongarch_lsx_vmin_w:
7463 case Intrinsic::loongarch_lsx_vmin_d:
7464 case Intrinsic::loongarch_lasx_xvmin_b:
7465 case Intrinsic::loongarch_lasx_xvmin_h:
7466 case Intrinsic::loongarch_lasx_xvmin_w:
7467 case Intrinsic::loongarch_lasx_xvmin_d:
7468 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7469 N->getOperand(2));
7470 case Intrinsic::loongarch_lsx_vmin_bu:
7471 case Intrinsic::loongarch_lsx_vmin_hu:
7472 case Intrinsic::loongarch_lsx_vmin_wu:
7473 case Intrinsic::loongarch_lsx_vmin_du:
7474 case Intrinsic::loongarch_lasx_xvmin_bu:
7475 case Intrinsic::loongarch_lasx_xvmin_hu:
7476 case Intrinsic::loongarch_lasx_xvmin_wu:
7477 case Intrinsic::loongarch_lasx_xvmin_du:
7478 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7479 N->getOperand(2));
7480 case Intrinsic::loongarch_lsx_vmini_b:
7481 case Intrinsic::loongarch_lsx_vmini_h:
7482 case Intrinsic::loongarch_lsx_vmini_w:
7483 case Intrinsic::loongarch_lsx_vmini_d:
7484 case Intrinsic::loongarch_lasx_xvmini_b:
7485 case Intrinsic::loongarch_lasx_xvmini_h:
7486 case Intrinsic::loongarch_lasx_xvmini_w:
7487 case Intrinsic::loongarch_lasx_xvmini_d:
7488 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7489 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7490 case Intrinsic::loongarch_lsx_vmini_bu:
7491 case Intrinsic::loongarch_lsx_vmini_hu:
7492 case Intrinsic::loongarch_lsx_vmini_wu:
7493 case Intrinsic::loongarch_lsx_vmini_du:
7494 case Intrinsic::loongarch_lasx_xvmini_bu:
7495 case Intrinsic::loongarch_lasx_xvmini_hu:
7496 case Intrinsic::loongarch_lasx_xvmini_wu:
7497 case Intrinsic::loongarch_lasx_xvmini_du:
7498 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7499 lowerVectorSplatImm<5>(N, 2, DAG));
7500 case Intrinsic::loongarch_lsx_vmul_b:
7501 case Intrinsic::loongarch_lsx_vmul_h:
7502 case Intrinsic::loongarch_lsx_vmul_w:
7503 case Intrinsic::loongarch_lsx_vmul_d:
7504 case Intrinsic::loongarch_lasx_xvmul_b:
7505 case Intrinsic::loongarch_lasx_xvmul_h:
7506 case Intrinsic::loongarch_lasx_xvmul_w:
7507 case Intrinsic::loongarch_lasx_xvmul_d:
7508 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
7509 N->getOperand(2));
7510 case Intrinsic::loongarch_lsx_vmadd_b:
7511 case Intrinsic::loongarch_lsx_vmadd_h:
7512 case Intrinsic::loongarch_lsx_vmadd_w:
7513 case Intrinsic::loongarch_lsx_vmadd_d:
7514 case Intrinsic::loongarch_lasx_xvmadd_b:
7515 case Intrinsic::loongarch_lasx_xvmadd_h:
7516 case Intrinsic::loongarch_lasx_xvmadd_w:
7517 case Intrinsic::loongarch_lasx_xvmadd_d: {
7518 EVT ResTy = N->getValueType(0);
7519 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
7520 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7521 N->getOperand(3)));
7522 }
7523 case Intrinsic::loongarch_lsx_vmsub_b:
7524 case Intrinsic::loongarch_lsx_vmsub_h:
7525 case Intrinsic::loongarch_lsx_vmsub_w:
7526 case Intrinsic::loongarch_lsx_vmsub_d:
7527 case Intrinsic::loongarch_lasx_xvmsub_b:
7528 case Intrinsic::loongarch_lasx_xvmsub_h:
7529 case Intrinsic::loongarch_lasx_xvmsub_w:
7530 case Intrinsic::loongarch_lasx_xvmsub_d: {
7531 EVT ResTy = N->getValueType(0);
7532 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
7533 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7534 N->getOperand(3)));
7535 }
7536 case Intrinsic::loongarch_lsx_vdiv_b:
7537 case Intrinsic::loongarch_lsx_vdiv_h:
7538 case Intrinsic::loongarch_lsx_vdiv_w:
7539 case Intrinsic::loongarch_lsx_vdiv_d:
7540 case Intrinsic::loongarch_lasx_xvdiv_b:
7541 case Intrinsic::loongarch_lasx_xvdiv_h:
7542 case Intrinsic::loongarch_lasx_xvdiv_w:
7543 case Intrinsic::loongarch_lasx_xvdiv_d:
7544 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
7545 N->getOperand(2));
7546 case Intrinsic::loongarch_lsx_vdiv_bu:
7547 case Intrinsic::loongarch_lsx_vdiv_hu:
7548 case Intrinsic::loongarch_lsx_vdiv_wu:
7549 case Intrinsic::loongarch_lsx_vdiv_du:
7550 case Intrinsic::loongarch_lasx_xvdiv_bu:
7551 case Intrinsic::loongarch_lasx_xvdiv_hu:
7552 case Intrinsic::loongarch_lasx_xvdiv_wu:
7553 case Intrinsic::loongarch_lasx_xvdiv_du:
7554 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
7555 N->getOperand(2));
7556 case Intrinsic::loongarch_lsx_vmod_b:
7557 case Intrinsic::loongarch_lsx_vmod_h:
7558 case Intrinsic::loongarch_lsx_vmod_w:
7559 case Intrinsic::loongarch_lsx_vmod_d:
7560 case Intrinsic::loongarch_lasx_xvmod_b:
7561 case Intrinsic::loongarch_lasx_xvmod_h:
7562 case Intrinsic::loongarch_lasx_xvmod_w:
7563 case Intrinsic::loongarch_lasx_xvmod_d:
7564 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
7565 N->getOperand(2));
7566 case Intrinsic::loongarch_lsx_vmod_bu:
7567 case Intrinsic::loongarch_lsx_vmod_hu:
7568 case Intrinsic::loongarch_lsx_vmod_wu:
7569 case Intrinsic::loongarch_lsx_vmod_du:
7570 case Intrinsic::loongarch_lasx_xvmod_bu:
7571 case Intrinsic::loongarch_lasx_xvmod_hu:
7572 case Intrinsic::loongarch_lasx_xvmod_wu:
7573 case Intrinsic::loongarch_lasx_xvmod_du:
7574 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
7575 N->getOperand(2));
7576 case Intrinsic::loongarch_lsx_vand_v:
7577 case Intrinsic::loongarch_lasx_xvand_v:
7578 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7579 N->getOperand(2));
7580 case Intrinsic::loongarch_lsx_vor_v:
7581 case Intrinsic::loongarch_lasx_xvor_v:
7582 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7583 N->getOperand(2));
7584 case Intrinsic::loongarch_lsx_vxor_v:
7585 case Intrinsic::loongarch_lasx_xvxor_v:
7586 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7587 N->getOperand(2));
7588 case Intrinsic::loongarch_lsx_vnor_v:
7589 case Intrinsic::loongarch_lasx_xvnor_v: {
7590 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7591 N->getOperand(2));
7592 return DAG.getNOT(DL, Res, Res->getValueType(0));
7593 }
7594 case Intrinsic::loongarch_lsx_vandi_b:
7595 case Intrinsic::loongarch_lasx_xvandi_b:
7596 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7597 lowerVectorSplatImm<8>(N, 2, DAG));
7598 case Intrinsic::loongarch_lsx_vori_b:
7599 case Intrinsic::loongarch_lasx_xvori_b:
7600 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7601 lowerVectorSplatImm<8>(N, 2, DAG));
7602 case Intrinsic::loongarch_lsx_vxori_b:
7603 case Intrinsic::loongarch_lasx_xvxori_b:
7604 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7605 lowerVectorSplatImm<8>(N, 2, DAG));
7606 case Intrinsic::loongarch_lsx_vsll_b:
7607 case Intrinsic::loongarch_lsx_vsll_h:
7608 case Intrinsic::loongarch_lsx_vsll_w:
7609 case Intrinsic::loongarch_lsx_vsll_d:
7610 case Intrinsic::loongarch_lasx_xvsll_b:
7611 case Intrinsic::loongarch_lasx_xvsll_h:
7612 case Intrinsic::loongarch_lasx_xvsll_w:
7613 case Intrinsic::loongarch_lasx_xvsll_d:
7614 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7615 truncateVecElts(N, DAG));
7616 case Intrinsic::loongarch_lsx_vslli_b:
7617 case Intrinsic::loongarch_lasx_xvslli_b:
7618 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7619 lowerVectorSplatImm<3>(N, 2, DAG));
7620 case Intrinsic::loongarch_lsx_vslli_h:
7621 case Intrinsic::loongarch_lasx_xvslli_h:
7622 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7623 lowerVectorSplatImm<4>(N, 2, DAG));
7624 case Intrinsic::loongarch_lsx_vslli_w:
7625 case Intrinsic::loongarch_lasx_xvslli_w:
7626 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7627 lowerVectorSplatImm<5>(N, 2, DAG));
7628 case Intrinsic::loongarch_lsx_vslli_d:
7629 case Intrinsic::loongarch_lasx_xvslli_d:
7630 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7631 lowerVectorSplatImm<6>(N, 2, DAG));
7632 case Intrinsic::loongarch_lsx_vsrl_b:
7633 case Intrinsic::loongarch_lsx_vsrl_h:
7634 case Intrinsic::loongarch_lsx_vsrl_w:
7635 case Intrinsic::loongarch_lsx_vsrl_d:
7636 case Intrinsic::loongarch_lasx_xvsrl_b:
7637 case Intrinsic::loongarch_lasx_xvsrl_h:
7638 case Intrinsic::loongarch_lasx_xvsrl_w:
7639 case Intrinsic::loongarch_lasx_xvsrl_d:
7640 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7641 truncateVecElts(N, DAG));
7642 case Intrinsic::loongarch_lsx_vsrli_b:
7643 case Intrinsic::loongarch_lasx_xvsrli_b:
7644 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7645 lowerVectorSplatImm<3>(N, 2, DAG));
7646 case Intrinsic::loongarch_lsx_vsrli_h:
7647 case Intrinsic::loongarch_lasx_xvsrli_h:
7648 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7649 lowerVectorSplatImm<4>(N, 2, DAG));
7650 case Intrinsic::loongarch_lsx_vsrli_w:
7651 case Intrinsic::loongarch_lasx_xvsrli_w:
7652 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7653 lowerVectorSplatImm<5>(N, 2, DAG));
7654 case Intrinsic::loongarch_lsx_vsrli_d:
7655 case Intrinsic::loongarch_lasx_xvsrli_d:
7656 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7657 lowerVectorSplatImm<6>(N, 2, DAG));
7658 case Intrinsic::loongarch_lsx_vsra_b:
7659 case Intrinsic::loongarch_lsx_vsra_h:
7660 case Intrinsic::loongarch_lsx_vsra_w:
7661 case Intrinsic::loongarch_lsx_vsra_d:
7662 case Intrinsic::loongarch_lasx_xvsra_b:
7663 case Intrinsic::loongarch_lasx_xvsra_h:
7664 case Intrinsic::loongarch_lasx_xvsra_w:
7665 case Intrinsic::loongarch_lasx_xvsra_d:
7666 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7667 truncateVecElts(N, DAG));
7668 case Intrinsic::loongarch_lsx_vsrai_b:
7669 case Intrinsic::loongarch_lasx_xvsrai_b:
7670 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7671 lowerVectorSplatImm<3>(N, 2, DAG));
7672 case Intrinsic::loongarch_lsx_vsrai_h:
7673 case Intrinsic::loongarch_lasx_xvsrai_h:
7674 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7675 lowerVectorSplatImm<4>(N, 2, DAG));
7676 case Intrinsic::loongarch_lsx_vsrai_w:
7677 case Intrinsic::loongarch_lasx_xvsrai_w:
7678 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7679 lowerVectorSplatImm<5>(N, 2, DAG));
7680 case Intrinsic::loongarch_lsx_vsrai_d:
7681 case Intrinsic::loongarch_lasx_xvsrai_d:
7682 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7683 lowerVectorSplatImm<6>(N, 2, DAG));
7684 case Intrinsic::loongarch_lsx_vclz_b:
7685 case Intrinsic::loongarch_lsx_vclz_h:
7686 case Intrinsic::loongarch_lsx_vclz_w:
7687 case Intrinsic::loongarch_lsx_vclz_d:
7688 case Intrinsic::loongarch_lasx_xvclz_b:
7689 case Intrinsic::loongarch_lasx_xvclz_h:
7690 case Intrinsic::loongarch_lasx_xvclz_w:
7691 case Intrinsic::loongarch_lasx_xvclz_d:
7692 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
7693 case Intrinsic::loongarch_lsx_vpcnt_b:
7694 case Intrinsic::loongarch_lsx_vpcnt_h:
7695 case Intrinsic::loongarch_lsx_vpcnt_w:
7696 case Intrinsic::loongarch_lsx_vpcnt_d:
7697 case Intrinsic::loongarch_lasx_xvpcnt_b:
7698 case Intrinsic::loongarch_lasx_xvpcnt_h:
7699 case Intrinsic::loongarch_lasx_xvpcnt_w:
7700 case Intrinsic::loongarch_lasx_xvpcnt_d:
7701 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
7702 case Intrinsic::loongarch_lsx_vbitclr_b:
7703 case Intrinsic::loongarch_lsx_vbitclr_h:
7704 case Intrinsic::loongarch_lsx_vbitclr_w:
7705 case Intrinsic::loongarch_lsx_vbitclr_d:
7706 case Intrinsic::loongarch_lasx_xvbitclr_b:
7707 case Intrinsic::loongarch_lasx_xvbitclr_h:
7708 case Intrinsic::loongarch_lasx_xvbitclr_w:
7709 case Intrinsic::loongarch_lasx_xvbitclr_d:
7710 return lowerVectorBitClear(N, DAG);
7711 case Intrinsic::loongarch_lsx_vbitclri_b:
7712 case Intrinsic::loongarch_lasx_xvbitclri_b:
7713 return lowerVectorBitClearImm<3>(N, DAG);
7714 case Intrinsic::loongarch_lsx_vbitclri_h:
7715 case Intrinsic::loongarch_lasx_xvbitclri_h:
7716 return lowerVectorBitClearImm<4>(N, DAG);
7717 case Intrinsic::loongarch_lsx_vbitclri_w:
7718 case Intrinsic::loongarch_lasx_xvbitclri_w:
7719 return lowerVectorBitClearImm<5>(N, DAG);
7720 case Intrinsic::loongarch_lsx_vbitclri_d:
7721 case Intrinsic::loongarch_lasx_xvbitclri_d:
7722 return lowerVectorBitClearImm<6>(N, DAG);
7723 case Intrinsic::loongarch_lsx_vbitset_b:
7724 case Intrinsic::loongarch_lsx_vbitset_h:
7725 case Intrinsic::loongarch_lsx_vbitset_w:
7726 case Intrinsic::loongarch_lsx_vbitset_d:
7727 case Intrinsic::loongarch_lasx_xvbitset_b:
7728 case Intrinsic::loongarch_lasx_xvbitset_h:
7729 case Intrinsic::loongarch_lasx_xvbitset_w:
7730 case Intrinsic::loongarch_lasx_xvbitset_d: {
7731 EVT VecTy = N->getValueType(0);
7732 SDValue One = DAG.getConstant(1, DL, VecTy);
7733 return DAG.getNode(
7734 ISD::OR, DL, VecTy, N->getOperand(1),
7735 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7736 }
7737 case Intrinsic::loongarch_lsx_vbitseti_b:
7738 case Intrinsic::loongarch_lasx_xvbitseti_b:
7739 return lowerVectorBitSetImm<3>(N, DAG);
7740 case Intrinsic::loongarch_lsx_vbitseti_h:
7741 case Intrinsic::loongarch_lasx_xvbitseti_h:
7742 return lowerVectorBitSetImm<4>(N, DAG);
7743 case Intrinsic::loongarch_lsx_vbitseti_w:
7744 case Intrinsic::loongarch_lasx_xvbitseti_w:
7745 return lowerVectorBitSetImm<5>(N, DAG);
7746 case Intrinsic::loongarch_lsx_vbitseti_d:
7747 case Intrinsic::loongarch_lasx_xvbitseti_d:
7748 return lowerVectorBitSetImm<6>(N, DAG);
7749 case Intrinsic::loongarch_lsx_vbitrev_b:
7750 case Intrinsic::loongarch_lsx_vbitrev_h:
7751 case Intrinsic::loongarch_lsx_vbitrev_w:
7752 case Intrinsic::loongarch_lsx_vbitrev_d:
7753 case Intrinsic::loongarch_lasx_xvbitrev_b:
7754 case Intrinsic::loongarch_lasx_xvbitrev_h:
7755 case Intrinsic::loongarch_lasx_xvbitrev_w:
7756 case Intrinsic::loongarch_lasx_xvbitrev_d: {
7757 EVT VecTy = N->getValueType(0);
7758 SDValue One = DAG.getConstant(1, DL, VecTy);
7759 return DAG.getNode(
7760 ISD::XOR, DL, VecTy, N->getOperand(1),
7761 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7762 }
7763 case Intrinsic::loongarch_lsx_vbitrevi_b:
7764 case Intrinsic::loongarch_lasx_xvbitrevi_b:
7765 return lowerVectorBitRevImm<3>(N, DAG);
7766 case Intrinsic::loongarch_lsx_vbitrevi_h:
7767 case Intrinsic::loongarch_lasx_xvbitrevi_h:
7768 return lowerVectorBitRevImm<4>(N, DAG);
7769 case Intrinsic::loongarch_lsx_vbitrevi_w:
7770 case Intrinsic::loongarch_lasx_xvbitrevi_w:
7771 return lowerVectorBitRevImm<5>(N, DAG);
7772 case Intrinsic::loongarch_lsx_vbitrevi_d:
7773 case Intrinsic::loongarch_lasx_xvbitrevi_d:
7774 return lowerVectorBitRevImm<6>(N, DAG);
7775 case Intrinsic::loongarch_lsx_vfadd_s:
7776 case Intrinsic::loongarch_lsx_vfadd_d:
7777 case Intrinsic::loongarch_lasx_xvfadd_s:
7778 case Intrinsic::loongarch_lasx_xvfadd_d:
7779 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
7780 N->getOperand(2));
7781 case Intrinsic::loongarch_lsx_vfsub_s:
7782 case Intrinsic::loongarch_lsx_vfsub_d:
7783 case Intrinsic::loongarch_lasx_xvfsub_s:
7784 case Intrinsic::loongarch_lasx_xvfsub_d:
7785 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
7786 N->getOperand(2));
7787 case Intrinsic::loongarch_lsx_vfmul_s:
7788 case Intrinsic::loongarch_lsx_vfmul_d:
7789 case Intrinsic::loongarch_lasx_xvfmul_s:
7790 case Intrinsic::loongarch_lasx_xvfmul_d:
7791 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
7792 N->getOperand(2));
7793 case Intrinsic::loongarch_lsx_vfdiv_s:
7794 case Intrinsic::loongarch_lsx_vfdiv_d:
7795 case Intrinsic::loongarch_lasx_xvfdiv_s:
7796 case Intrinsic::loongarch_lasx_xvfdiv_d:
7797 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
7798 N->getOperand(2));
7799 case Intrinsic::loongarch_lsx_vfmadd_s:
7800 case Intrinsic::loongarch_lsx_vfmadd_d:
7801 case Intrinsic::loongarch_lasx_xvfmadd_s:
7802 case Intrinsic::loongarch_lasx_xvfmadd_d:
7803 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
7804 N->getOperand(2), N->getOperand(3));
7805 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
7806 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7807 N->getOperand(1), N->getOperand(2),
7808 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
7809 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
7810 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
7811 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7812 N->getOperand(1), N->getOperand(2),
7813 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
7814 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
7815 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
7816 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7817 N->getOperand(1), N->getOperand(2),
7818 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
7819 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
7820 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7821 N->getOperand(1), N->getOperand(2),
7822 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
7823 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
7824 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
7825 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
7826 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
7827 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
7828 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
7829 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
7830 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
7831 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
7832 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7833 N->getOperand(1)));
7834 case Intrinsic::loongarch_lsx_vreplve_b:
7835 case Intrinsic::loongarch_lsx_vreplve_h:
7836 case Intrinsic::loongarch_lsx_vreplve_w:
7837 case Intrinsic::loongarch_lsx_vreplve_d:
7838 case Intrinsic::loongarch_lasx_xvreplve_b:
7839 case Intrinsic::loongarch_lasx_xvreplve_h:
7840 case Intrinsic::loongarch_lasx_xvreplve_w:
7841 case Intrinsic::loongarch_lasx_xvreplve_d:
7842 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
7843 N->getOperand(1),
7844 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7845 N->getOperand(2)));
7846 case Intrinsic::loongarch_lsx_vpickve2gr_b:
7847 if (!Subtarget.is64Bit())
7848 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7849 break;
7850 case Intrinsic::loongarch_lsx_vpickve2gr_h:
7851 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
7852 if (!Subtarget.is64Bit())
7853 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7854 break;
7855 case Intrinsic::loongarch_lsx_vpickve2gr_w:
7856 if (!Subtarget.is64Bit())
7857 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7858 break;
7859 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
7860 if (!Subtarget.is64Bit())
7861 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7862 break;
7863 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
7864 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
7865 if (!Subtarget.is64Bit())
7866 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7867 break;
7868 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
7869 if (!Subtarget.is64Bit())
7870 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7871 break;
7872 case Intrinsic::loongarch_lsx_bz_b:
7873 case Intrinsic::loongarch_lsx_bz_h:
7874 case Intrinsic::loongarch_lsx_bz_w:
7875 case Intrinsic::loongarch_lsx_bz_d:
7876 case Intrinsic::loongarch_lasx_xbz_b:
7877 case Intrinsic::loongarch_lasx_xbz_h:
7878 case Intrinsic::loongarch_lasx_xbz_w:
7879 case Intrinsic::loongarch_lasx_xbz_d:
7880 if (!Subtarget.is64Bit())
7881 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
7882 N->getOperand(1));
7883 break;
7884 case Intrinsic::loongarch_lsx_bz_v:
7885 case Intrinsic::loongarch_lasx_xbz_v:
7886 if (!Subtarget.is64Bit())
7887 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
7888 N->getOperand(1));
7889 break;
7890 case Intrinsic::loongarch_lsx_bnz_b:
7891 case Intrinsic::loongarch_lsx_bnz_h:
7892 case Intrinsic::loongarch_lsx_bnz_w:
7893 case Intrinsic::loongarch_lsx_bnz_d:
7894 case Intrinsic::loongarch_lasx_xbnz_b:
7895 case Intrinsic::loongarch_lasx_xbnz_h:
7896 case Intrinsic::loongarch_lasx_xbnz_w:
7897 case Intrinsic::loongarch_lasx_xbnz_d:
7898 if (!Subtarget.is64Bit())
7899 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
7900 N->getOperand(1));
7901 break;
7902 case Intrinsic::loongarch_lsx_bnz_v:
7903 case Intrinsic::loongarch_lasx_xbnz_v:
7904 if (!Subtarget.is64Bit())
7905 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
7906 N->getOperand(1));
7907 break;
7908 case Intrinsic::loongarch_lasx_concat_128_s:
7909 case Intrinsic::loongarch_lasx_concat_128_d:
7910 case Intrinsic::loongarch_lasx_concat_128:
7911 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
7912 N->getOperand(1), N->getOperand(2));
7913 }
7914 return SDValue();
7915}
7916
7919 const LoongArchSubtarget &Subtarget) {
7920 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
7921 // conversion is unnecessary and can be replaced with the
7922 // MOVFR2GR_S_LA64 operand.
7923 SDValue Op0 = N->getOperand(0);
7924 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
7925 return Op0.getOperand(0);
7926 return SDValue();
7927}
7928
7931 const LoongArchSubtarget &Subtarget) {
7932 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
7933 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
7934 // operand.
7935 SDValue Op0 = N->getOperand(0);
7936 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
7937 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
7938 "Unexpected value type!");
7939 return Op0.getOperand(0);
7940 }
7941 return SDValue();
7942}
7943
7944static SDValue
7947 MVT VT = N->getSimpleValueType(0);
7948 unsigned NumBits = VT.getScalarSizeInBits();
7949
7950 // Simplify the inputs.
7951 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7952 APInt DemandedMask(APInt::getAllOnes(NumBits));
7953 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
7954 return SDValue(N, 0);
7955
7956 return SDValue();
7957}
7958
7959static SDValue
7962 const LoongArchSubtarget &Subtarget) {
7963 SDValue Op0 = N->getOperand(0);
7964 SDLoc DL(N);
7965
7966 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
7967 // redundant. Instead, use BuildPairF64's operands directly.
7968 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
7969 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7970
7971 if (Op0->isUndef()) {
7972 SDValue Lo = DAG.getUNDEF(MVT::i32);
7973 SDValue Hi = DAG.getUNDEF(MVT::i32);
7974 return DCI.CombineTo(N, Lo, Hi);
7975 }
7976
7977 // It's cheaper to materialise two 32-bit integers than to load a double
7978 // from the constant pool and transfer it to integer registers through the
7979 // stack.
7981 APInt V = C->getValueAPF().bitcastToAPInt();
7982 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7983 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7984 return DCI.CombineTo(N, Lo, Hi);
7985 }
7986
7987 return SDValue();
7988}
7989
7990/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
7993 const LoongArchSubtarget &Subtarget) {
7994 SDValue N0 = N->getOperand(0);
7995 SDValue N1 = N->getOperand(1);
7996 MVT VT = N->getSimpleValueType(0);
7997 SDLoc DL(N);
7998
7999 // VANDN(undef, x) -> 0
8000 // VANDN(x, undef) -> 0
8001 if (N0.isUndef() || N1.isUndef())
8002 return DAG.getConstant(0, DL, VT);
8003
8004 // VANDN(0, x) -> x
8006 return N1;
8007
8008 // VANDN(x, 0) -> 0
8010 return DAG.getConstant(0, DL, VT);
8011
8012 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
8014 return DAG.getNOT(DL, N0, VT);
8015
8016 // Turn VANDN back to AND if input is inverted.
8017 if (SDValue Not = isNOT(N0, DAG))
8018 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
8019
8020 // Folds for better commutativity:
8021 if (N1->hasOneUse()) {
8022 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
8023 if (SDValue Not = isNOT(N1, DAG))
8024 return DAG.getNOT(
8025 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
8026
8027 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
8028 // -> NOT(OR(x, SplatVector(-Imm))
8029 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
8030 // gain benefits.
8031 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
8032 N1.getOpcode() == ISD::BUILD_VECTOR) {
8033 if (SDValue SplatValue =
8034 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
8035 if (!N1->isOnlyUserOf(SplatValue.getNode()))
8036 return SDValue();
8037
8038 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
8039 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
8040 SDValue Not =
8041 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
8042 return DAG.getNOT(
8043 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
8044 VT);
8045 }
8046 }
8047 }
8048 }
8049
8050 return SDValue();
8051}
8052
8055 const LoongArchSubtarget &Subtarget) {
8056 SDLoc DL(N);
8057 EVT VT = N->getValueType(0);
8058
8059 if (VT != MVT::f32 && VT != MVT::f64)
8060 return SDValue();
8061 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8062 return SDValue();
8063 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8064 return SDValue();
8065
8066 // Only optimize when the source and destination types have the same width.
8067 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
8068 return SDValue();
8069
8070 SDValue Src = N->getOperand(0);
8071 // If the result of an integer load is only used by an integer-to-float
8072 // conversion, use a fp load instead. This eliminates an integer-to-float-move
8073 // (movgr2fr) instruction.
8074 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
8075 // Do not change the width of a volatile load. This condition check is
8076 // inspired by AArch64.
8077 !cast<LoadSDNode>(Src)->isVolatile()) {
8078 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
8079 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
8080 LN0->getPointerInfo(), LN0->getAlign(),
8081 LN0->getMemOperand()->getFlags());
8082
8083 // Make sure successors of the original load stay after it by updating them
8084 // to use the new Chain.
8085 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
8086 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
8087 }
8088
8089 return SDValue();
8090}
8091
8092// Using [X]VFTINTRZ_W_D for double to signed 32-bit integer conversion.
8093// For example:
8094// v4i32 = fp_to_sint (concat_vectors v2f64, v2f64)
8095// Can be combined into:
8096// v4i32 = VFTINTRZ_W_D v2f64. v2f64
8099 const LoongArchSubtarget &Subtarget) {
8100 if (!Subtarget.hasExtLSX())
8101 return SDValue();
8102
8103 SDLoc DL(N);
8104 EVT DstVT = N->getValueType(0);
8105 SDValue Src = N->getOperand(0);
8106 EVT SrcVT = Src.getValueType();
8107 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
8108
8109 if (!DstVT.isVector() || !DstVT.isSimple() || !SrcVT.isSimple())
8110 return SDValue();
8111
8112 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8113 unsigned SrcBits = SrcVT.getSizeInBits();
8114 unsigned DstEltBits = DstVT.getScalarSizeInBits();
8115 unsigned NumElts = DstVT.getVectorNumElements();
8116 unsigned BlockBits = Subtarget.hasExtLASX() ? 256 : 128;
8117
8118 if (!isPowerOf2_32(NumElts) || !isPowerOf2_32(DstEltBits))
8119 return SDValue();
8120
8121 if (SrcBits % BlockBits != 0 && SrcBits != 128)
8122 return SDValue();
8123
8124 if (DstEltBits < 32) {
8125 MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(32), NumElts);
8126 SDValue Conv = DAG.getNode(N->getOpcode(), DL, PromoteVT, Src);
8127 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Conv);
8128 }
8129
8130 if (SrcEltBits != 64 || DstEltBits != 32)
8131 return SDValue();
8132
8133 if (!IsSigned) {
8134 // LASX already has pattern for double convert to uint32.
8135 if (Subtarget.hasExtLASX())
8136 return SDValue();
8137 MVT TmpVT = MVT::getVectorVT(MVT::i64, NumElts);
8138 SDValue Tmp = DAG.getNode(ISD::FP_TO_SINT, DL, TmpVT, Src);
8139 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Tmp);
8140 }
8141
8143 unsigned BlockNumElts = BlockBits / 64;
8144 MVT BlockVT = MVT::getVectorVT(MVT::f64, BlockNumElts);
8145 if (Src.getOpcode() == ISD::CONCAT_VECTORS &&
8146 Src.getOperand(0).getValueType() == BlockVT) {
8147 for (unsigned i = 0; i < Src.getNumOperands(); i++)
8148 Blocks.push_back(Src.getOperand(i));
8149 } else if (SrcBits > BlockBits) {
8150 // Wider than one register: extract each BlockBits-wide sub-vector.
8151 for (unsigned i = 0; i < SrcBits / BlockBits; i++)
8152 Blocks.push_back(
8153 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, BlockVT, Src,
8154 DAG.getVectorIdxConstant(i * BlockNumElts, DL)));
8155 } else {
8156 BlockBits = SrcBits;
8157 Blocks.push_back(Src);
8158 }
8159
8160 MVT NativeVT = BlockBits == 256 ? MVT::v8i32 : MVT::v4i32;
8162 for (unsigned i = 0; i < Blocks.size(); i += 2) {
8163 SDValue Lo = Blocks[i];
8164 SDValue Hi = Blocks.size() > 1 ? Blocks[i + 1] : Lo;
8165 SDValue Res = DAG.getNode(LoongArchISD::VFTINTRZ, DL, NativeVT, Hi, Lo);
8166
8167 if (BlockBits == 256) {
8168 SDValue Undef = DAG.getUNDEF(Res.getValueType());
8169 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
8170 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
8171 Res = DAG.getBitcast(NativeVT, Res);
8172 }
8173
8174 Parts.push_back(Res);
8175 }
8176
8177 if (Blocks.size() == 1)
8178 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, DstVT, Parts[0],
8179 DAG.getVectorIdxConstant(0, DL));
8180 return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Parts);
8181}
8182
8183// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
8184// logical operations, like in the example below.
8185// or (and (truncate x, truncate y)),
8186// (xor (truncate z, build_vector (constants)))
8187// Given a target type \p VT, we generate
8188// or (and x, y), (xor z, zext(build_vector (constants)))
8189// given x, y and z are of type \p VT. We can do so, if operands are either
8190// truncates from VT types, the second operand is a vector of constants, can
8191// be recursively promoted or is an existing extension we can extend further.
8193 SelectionDAG &DAG,
8194 const LoongArchSubtarget &Subtarget,
8195 unsigned Depth) {
8196 // Limit recursion to avoid excessive compile times.
8198 return SDValue();
8199
8200 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
8201 return SDValue();
8202
8203 SDValue N0 = N.getOperand(0);
8204 SDValue N1 = N.getOperand(1);
8205
8206 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8207 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
8208 return SDValue();
8209
8210 if (SDValue NN0 =
8211 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
8212 N0 = NN0;
8213 else {
8214 // The left side has to be a 'trunc'.
8215 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
8216 N0.getOperand(0).getValueType() == VT;
8217 if (LHSTrunc)
8218 N0 = N0.getOperand(0);
8219 else
8220 return SDValue();
8221 }
8222
8223 if (SDValue NN1 =
8224 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
8225 N1 = NN1;
8226 else {
8227 // The right side has to be a 'trunc', a (foldable) constant or an
8228 // existing extension we can extend further.
8229 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
8230 N1.getOperand(0).getValueType() == VT;
8231 if (RHSTrunc)
8232 N1 = N1.getOperand(0);
8233 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
8234 Subtarget.hasExtLASX() && N1.hasOneUse())
8235 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
8236 // On 32-bit platform, i64 is an illegal integer scalar type, and
8237 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
8238 // future.
8239 else if (SDValue Cst =
8241 N1 = Cst;
8242 else
8243 return SDValue();
8244 }
8245
8246 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
8247}
8248
8249// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
8250// is LSX-sized register. In most cases we actually compare or select LASX-sized
8251// registers and mixing the two types creates horrible code. This method
8252// optimizes some of the transition sequences.
8254 SelectionDAG &DAG,
8255 const LoongArchSubtarget &Subtarget) {
8256 EVT VT = N.getValueType();
8257 assert(VT.isVector() && "Expected vector type");
8258 assert((N.getOpcode() == ISD::ANY_EXTEND ||
8259 N.getOpcode() == ISD::ZERO_EXTEND ||
8260 N.getOpcode() == ISD::SIGN_EXTEND) &&
8261 "Invalid Node");
8262
8263 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
8264 return SDValue();
8265
8266 SDValue Narrow = N.getOperand(0);
8267 EVT NarrowVT = Narrow.getValueType();
8268
8269 // Generate the wide operation.
8270 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
8271 if (!Op)
8272 return SDValue();
8273 switch (N.getOpcode()) {
8274 default:
8275 llvm_unreachable("Unexpected opcode");
8276 case ISD::ANY_EXTEND:
8277 return Op;
8278 case ISD::ZERO_EXTEND:
8279 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
8280 case ISD::SIGN_EXTEND:
8281 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8282 DAG.getValueType(NarrowVT));
8283 }
8284}
8285
8288 const LoongArchSubtarget &Subtarget) {
8289 EVT VT = N->getValueType(0);
8290 SDLoc DL(N);
8291
8292 if (VT.isVector())
8293 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
8294 return R;
8295
8296 return SDValue();
8297}
8298
8299static SDValue
8302 const LoongArchSubtarget &Subtarget) {
8303 SDLoc DL(N);
8304 EVT VT = N->getValueType(0);
8305
8306 if (VT.isVector() && N->getNumOperands() == 2)
8307 if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
8308 return R;
8309
8310 return SDValue();
8311}
8312
8315 const LoongArchSubtarget &Subtarget) {
8316 if (DCI.isBeforeLegalizeOps())
8317 return SDValue();
8318
8319 EVT VT = N->getValueType(0);
8320 if (!VT.isVector())
8321 return SDValue();
8322
8323 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8324 return SDValue();
8325
8326 EVT EltVT = VT.getVectorElementType();
8327 if (!EltVT.isInteger())
8328 return SDValue();
8329
8330 SDValue Cond = N->getOperand(0);
8331 SDValue TrueVal = N->getOperand(1);
8332 SDValue FalseVal = N->getOperand(2);
8333
8334 // match:
8335 //
8336 // vselect (setcc shift, 0, seteq),
8337 // x,
8338 // rounded_shift
8339
8340 if (Cond.getOpcode() != ISD::SETCC)
8341 return SDValue();
8342
8343 if (!ISD::isConstantSplatVectorAllZeros(Cond.getOperand(1).getNode()))
8344 return SDValue();
8345
8346 auto *CC = cast<CondCodeSDNode>(Cond.getOperand(2));
8347 if (CC->get() != ISD::SETEQ)
8348 return SDValue();
8349
8350 SDValue Shift = Cond.getOperand(0);
8351
8352 // True branch must be original value:
8353 //
8354 // vselect cond, x, ...
8355
8356 SDValue X = TrueVal;
8357
8358 // Now match rounded shift pattern:
8359 //
8360 // add
8361 // (and
8362 // (srl X, shift-1)
8363 // 1)
8364 // (srl/sra X, shift)
8365
8366 if (FalseVal.getOpcode() != ISD::ADD)
8367 return SDValue();
8368
8369 SDValue Add0 = FalseVal.getOperand(0);
8370 SDValue Add1 = FalseVal.getOperand(1);
8371 SDValue And;
8372 SDValue Shr;
8373
8374 if (Add0.getOpcode() == ISD::AND) {
8375 And = Add0;
8376 Shr = Add1;
8377 } else if (Add1.getOpcode() == ISD::AND) {
8378 And = Add1;
8379 Shr = Add0;
8380 } else {
8381 return SDValue();
8382 }
8383
8384 // match:
8385 //
8386 // srl/sra X, shift
8387
8388 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
8389 return SDValue();
8390
8391 if (Shr.getOperand(0) != X)
8392 return SDValue();
8393
8394 if (Shr.getOperand(1) != Shift)
8395 return SDValue();
8396
8397 // match:
8398 //
8399 // and
8400 // (srl X, shift-1)
8401 // 1
8402
8403 SDValue Srl = And.getOperand(0);
8404 SDValue One = And.getOperand(1);
8405 APInt SplatVal;
8406
8407 if (Srl.getOpcode() != ISD::SRL)
8408 return SDValue();
8409
8410 One = peekThroughBitcasts(One);
8411 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
8412 return SDValue();
8413
8414 if (SplatVal != 1)
8415 return SDValue();
8416
8417 if (Srl.getOperand(0) != X)
8418 return SDValue();
8419
8420 // match:
8421 //
8422 // shift-1
8423
8424 SDValue ShiftMinus1 = Srl.getOperand(1);
8425
8426 if (ShiftMinus1.getOpcode() != ISD::ADD)
8427 return SDValue();
8428
8429 if (ShiftMinus1.getOperand(0) != Shift)
8430 return SDValue();
8431
8433 return SDValue();
8434
8435 // We matched a rounded right shift pattern and can lower it
8436 // to a single vector rounded shift instruction.
8437
8438 SDLoc DL(N);
8439 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
8440 : LoongArchISD::VSRAR,
8441 DL, VT, X, Shift);
8442}
8443
8445 DAGCombinerInfo &DCI) const {
8446 SelectionDAG &DAG = DCI.DAG;
8447 switch (N->getOpcode()) {
8448 default:
8449 break;
8450 case ISD::ADD:
8451 return performADDCombine(N, DAG, DCI, Subtarget);
8452 case ISD::AND:
8453 return performANDCombine(N, DAG, DCI, Subtarget);
8454 case ISD::OR:
8455 return performORCombine(N, DAG, DCI, Subtarget);
8456 case ISD::SETCC:
8457 return performSETCCCombine(N, DAG, DCI, Subtarget);
8458 case ISD::SHL:
8459 return performSHLCombine(N, DAG, DCI, Subtarget);
8460 case ISD::SRL:
8461 return performSRLCombine(N, DAG, DCI, Subtarget);
8462 case ISD::SUB:
8463 return performSUBCombine(N, DAG, DCI, Subtarget);
8464 case ISD::BITCAST:
8465 return performBITCASTCombine(N, DAG, DCI, Subtarget);
8466 case ISD::ANY_EXTEND:
8467 case ISD::ZERO_EXTEND:
8468 case ISD::SIGN_EXTEND:
8469 return performEXTENDCombine(N, DAG, DCI, Subtarget);
8470 case ISD::SINT_TO_FP:
8471 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
8472 case ISD::FP_TO_SINT:
8473 case ISD::FP_TO_UINT:
8474 return performFP_TO_INTCombine(N, DAG, DCI, Subtarget);
8475 case LoongArchISD::BITREV_W:
8476 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
8477 case LoongArchISD::BR_CC:
8478 return performBR_CCCombine(N, DAG, DCI, Subtarget);
8479 case LoongArchISD::SELECT_CC:
8480 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
8482 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
8483 case LoongArchISD::MOVGR2FR_W_LA64:
8484 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
8485 case LoongArchISD::MOVFR2GR_S_LA64:
8486 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
8487 case LoongArchISD::CRC_W_B_W:
8488 case LoongArchISD::CRC_W_H_W:
8489 case LoongArchISD::CRCC_W_B_W:
8490 case LoongArchISD::CRCC_W_H_W:
8491 case LoongArchISD::VMSKLTZ:
8492 case LoongArchISD::XVMSKLTZ:
8493 return performDemandedBitsCombine(N, DAG, DCI);
8494 case LoongArchISD::SPLIT_PAIR_F64:
8495 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
8496 case LoongArchISD::VANDN:
8497 return performVANDNCombine(N, DAG, DCI, Subtarget);
8499 return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
8500 case ISD::VSELECT:
8501 return performVSELECTCombine(N, DAG, DCI, Subtarget);
8502 case LoongArchISD::VPACKEV:
8503 case LoongArchISD::VPERMI:
8504 if (SDValue Result =
8505 combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
8506 return Result;
8507 }
8508 return SDValue();
8509}
8510
8513 if (!ZeroDivCheck)
8514 return MBB;
8515
8516 // Build instructions:
8517 // MBB:
8518 // div(or mod) $dst, $dividend, $divisor
8519 // bne $divisor, $zero, SinkMBB
8520 // BreakMBB:
8521 // break 7 // BRK_DIVZERO
8522 // SinkMBB:
8523 // fallthrough
8524 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
8525 MachineFunction::iterator It = ++MBB->getIterator();
8526 MachineFunction *MF = MBB->getParent();
8527 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8528 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8529 MF->insert(It, BreakMBB);
8530 MF->insert(It, SinkMBB);
8531
8532 // Transfer the remainder of MBB and its successor edges to SinkMBB.
8533 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
8534 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8535
8536 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
8537 DebugLoc DL = MI.getDebugLoc();
8538 MachineOperand &Divisor = MI.getOperand(2);
8539 Register DivisorReg = Divisor.getReg();
8540
8541 // MBB:
8542 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
8543 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
8544 .addReg(LoongArch::R0)
8545 .addMBB(SinkMBB);
8546 MBB->addSuccessor(BreakMBB);
8547 MBB->addSuccessor(SinkMBB);
8548
8549 // BreakMBB:
8550 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
8551 // definition of BRK_DIVZERO.
8552 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
8553 BreakMBB->addSuccessor(SinkMBB);
8554
8555 // Clear Divisor's kill flag.
8556 Divisor.setIsKill(false);
8557
8558 return SinkMBB;
8559}
8560
8561static MachineBasicBlock *
8563 const LoongArchSubtarget &Subtarget) {
8564 unsigned CondOpc;
8565 switch (MI.getOpcode()) {
8566 default:
8567 llvm_unreachable("Unexpected opcode");
8568 case LoongArch::PseudoVBZ:
8569 CondOpc = LoongArch::VSETEQZ_V;
8570 break;
8571 case LoongArch::PseudoVBZ_B:
8572 CondOpc = LoongArch::VSETANYEQZ_B;
8573 break;
8574 case LoongArch::PseudoVBZ_H:
8575 CondOpc = LoongArch::VSETANYEQZ_H;
8576 break;
8577 case LoongArch::PseudoVBZ_W:
8578 CondOpc = LoongArch::VSETANYEQZ_W;
8579 break;
8580 case LoongArch::PseudoVBZ_D:
8581 CondOpc = LoongArch::VSETANYEQZ_D;
8582 break;
8583 case LoongArch::PseudoVBNZ:
8584 CondOpc = LoongArch::VSETNEZ_V;
8585 break;
8586 case LoongArch::PseudoVBNZ_B:
8587 CondOpc = LoongArch::VSETALLNEZ_B;
8588 break;
8589 case LoongArch::PseudoVBNZ_H:
8590 CondOpc = LoongArch::VSETALLNEZ_H;
8591 break;
8592 case LoongArch::PseudoVBNZ_W:
8593 CondOpc = LoongArch::VSETALLNEZ_W;
8594 break;
8595 case LoongArch::PseudoVBNZ_D:
8596 CondOpc = LoongArch::VSETALLNEZ_D;
8597 break;
8598 case LoongArch::PseudoXVBZ:
8599 CondOpc = LoongArch::XVSETEQZ_V;
8600 break;
8601 case LoongArch::PseudoXVBZ_B:
8602 CondOpc = LoongArch::XVSETANYEQZ_B;
8603 break;
8604 case LoongArch::PseudoXVBZ_H:
8605 CondOpc = LoongArch::XVSETANYEQZ_H;
8606 break;
8607 case LoongArch::PseudoXVBZ_W:
8608 CondOpc = LoongArch::XVSETANYEQZ_W;
8609 break;
8610 case LoongArch::PseudoXVBZ_D:
8611 CondOpc = LoongArch::XVSETANYEQZ_D;
8612 break;
8613 case LoongArch::PseudoXVBNZ:
8614 CondOpc = LoongArch::XVSETNEZ_V;
8615 break;
8616 case LoongArch::PseudoXVBNZ_B:
8617 CondOpc = LoongArch::XVSETALLNEZ_B;
8618 break;
8619 case LoongArch::PseudoXVBNZ_H:
8620 CondOpc = LoongArch::XVSETALLNEZ_H;
8621 break;
8622 case LoongArch::PseudoXVBNZ_W:
8623 CondOpc = LoongArch::XVSETALLNEZ_W;
8624 break;
8625 case LoongArch::PseudoXVBNZ_D:
8626 CondOpc = LoongArch::XVSETALLNEZ_D;
8627 break;
8628 }
8629
8630 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8631 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8632 DebugLoc DL = MI.getDebugLoc();
8635
8636 MachineFunction *F = BB->getParent();
8637 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
8638 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
8639 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
8640
8641 F->insert(It, FalseBB);
8642 F->insert(It, TrueBB);
8643 F->insert(It, SinkBB);
8644
8645 // Transfer the remainder of MBB and its successor edges to Sink.
8646 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
8648
8649 // Insert the real instruction to BB.
8650 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
8651 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
8652
8653 // Insert branch.
8654 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
8655 BB->addSuccessor(FalseBB);
8656 BB->addSuccessor(TrueBB);
8657
8658 // FalseBB.
8659 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8660 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
8661 .addReg(LoongArch::R0)
8662 .addImm(0);
8663 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
8664 FalseBB->addSuccessor(SinkBB);
8665
8666 // TrueBB.
8667 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8668 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
8669 .addReg(LoongArch::R0)
8670 .addImm(1);
8671 TrueBB->addSuccessor(SinkBB);
8672
8673 // SinkBB: merge the results.
8674 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
8675 MI.getOperand(0).getReg())
8676 .addReg(RD1)
8677 .addMBB(FalseBB)
8678 .addReg(RD2)
8679 .addMBB(TrueBB);
8680
8681 // The pseudo instruction is gone now.
8682 MI.eraseFromParent();
8683 return SinkBB;
8684}
8685
8686static MachineBasicBlock *
8688 const LoongArchSubtarget &Subtarget) {
8689 unsigned InsOp;
8690 unsigned BroadcastOp;
8691 unsigned HalfSize;
8692 switch (MI.getOpcode()) {
8693 default:
8694 llvm_unreachable("Unexpected opcode");
8695 case LoongArch::PseudoXVINSGR2VR_B:
8696 HalfSize = 16;
8697 BroadcastOp = LoongArch::XVREPLGR2VR_B;
8698 InsOp = LoongArch::XVEXTRINS_B;
8699 break;
8700 case LoongArch::PseudoXVINSGR2VR_H:
8701 HalfSize = 8;
8702 BroadcastOp = LoongArch::XVREPLGR2VR_H;
8703 InsOp = LoongArch::XVEXTRINS_H;
8704 break;
8705 }
8706 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8707 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
8708 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
8709 DebugLoc DL = MI.getDebugLoc();
8711 // XDst = vector_insert XSrc, Elt, Idx
8712 Register XDst = MI.getOperand(0).getReg();
8713 Register XSrc = MI.getOperand(1).getReg();
8714 Register Elt = MI.getOperand(2).getReg();
8715 unsigned Idx = MI.getOperand(3).getImm();
8716
8717 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
8718 Idx < HalfSize) {
8719 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
8720 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
8721
8722 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
8723 .addReg(XSrc, {}, LoongArch::sub_128);
8724 BuildMI(*BB, MI, DL,
8725 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
8726 : LoongArch::VINSGR2VR_B),
8727 ScratchSubReg2)
8728 .addReg(ScratchSubReg1)
8729 .addReg(Elt)
8730 .addImm(Idx);
8731
8732 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
8733 .addReg(ScratchSubReg2)
8734 .addImm(LoongArch::sub_128);
8735 } else {
8736 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8737 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8738
8739 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
8740
8741 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
8742 .addReg(ScratchReg1)
8743 .addReg(XSrc)
8744 .addImm(Idx >= HalfSize ? 48 : 18);
8745
8746 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
8747 .addReg(XSrc)
8748 .addReg(ScratchReg2)
8749 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
8750 }
8751
8752 MI.eraseFromParent();
8753 return BB;
8754}
8755
8758 const LoongArchSubtarget &Subtarget) {
8759 assert(Subtarget.hasExtLSX());
8760 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8761 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8762 DebugLoc DL = MI.getDebugLoc();
8764 Register Dst = MI.getOperand(0).getReg();
8765 Register Src = MI.getOperand(1).getReg();
8766
8767 unsigned BroadcastOp, CTOp, PickOp;
8768 switch (MI.getOpcode()) {
8769 default:
8770 llvm_unreachable("Unexpected opcode");
8771 case LoongArch::PseudoCTPOP_B:
8772 BroadcastOp = LoongArch::VREPLGR2VR_B;
8773 CTOp = LoongArch::VPCNT_B;
8774 PickOp = LoongArch::VPICKVE2GR_B;
8775 break;
8776 case LoongArch::PseudoCTPOP_H:
8777 case LoongArch::PseudoCTPOP_H_LA32:
8778 BroadcastOp = LoongArch::VREPLGR2VR_H;
8779 CTOp = LoongArch::VPCNT_H;
8780 PickOp = LoongArch::VPICKVE2GR_H;
8781 break;
8782 case LoongArch::PseudoCTPOP_W:
8783 case LoongArch::PseudoCTPOP_W_LA32:
8784 BroadcastOp = LoongArch::VREPLGR2VR_W;
8785 CTOp = LoongArch::VPCNT_W;
8786 PickOp = LoongArch::VPICKVE2GR_W;
8787 break;
8788 case LoongArch::PseudoCTPOP_D:
8789 BroadcastOp = LoongArch::VREPLGR2VR_D;
8790 CTOp = LoongArch::VPCNT_D;
8791 PickOp = LoongArch::VPICKVE2GR_D;
8792 break;
8793 }
8794
8795 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8796 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8797 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Src);
8798 BuildMI(*BB, MI, DL, TII->get(CTOp), ScratchReg2).addReg(ScratchReg1);
8799 BuildMI(*BB, MI, DL, TII->get(PickOp), Dst).addReg(ScratchReg2).addImm(0);
8800
8801 MI.eraseFromParent();
8802 return BB;
8803}
8804
8805static MachineBasicBlock *
8807 const LoongArchSubtarget &Subtarget) {
8808 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8809 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8810 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8812 Register Dst = MI.getOperand(0).getReg();
8813 Register Src = MI.getOperand(1).getReg();
8814 DebugLoc DL = MI.getDebugLoc();
8815 unsigned EleBits = 8;
8816 unsigned NotOpc = 0;
8817 unsigned MskOpc;
8818
8819 switch (MI.getOpcode()) {
8820 default:
8821 llvm_unreachable("Unexpected opcode");
8822 case LoongArch::PseudoVMSKLTZ_B:
8823 MskOpc = LoongArch::VMSKLTZ_B;
8824 break;
8825 case LoongArch::PseudoVMSKLTZ_H:
8826 MskOpc = LoongArch::VMSKLTZ_H;
8827 EleBits = 16;
8828 break;
8829 case LoongArch::PseudoVMSKLTZ_W:
8830 MskOpc = LoongArch::VMSKLTZ_W;
8831 EleBits = 32;
8832 break;
8833 case LoongArch::PseudoVMSKLTZ_D:
8834 MskOpc = LoongArch::VMSKLTZ_D;
8835 EleBits = 64;
8836 break;
8837 case LoongArch::PseudoVMSKGEZ_B:
8838 MskOpc = LoongArch::VMSKGEZ_B;
8839 break;
8840 case LoongArch::PseudoVMSKEQZ_B:
8841 MskOpc = LoongArch::VMSKNZ_B;
8842 NotOpc = LoongArch::VNOR_V;
8843 break;
8844 case LoongArch::PseudoVMSKNEZ_B:
8845 MskOpc = LoongArch::VMSKNZ_B;
8846 break;
8847 case LoongArch::PseudoXVMSKLTZ_B:
8848 MskOpc = LoongArch::XVMSKLTZ_B;
8849 RC = &LoongArch::LASX256RegClass;
8850 break;
8851 case LoongArch::PseudoXVMSKLTZ_H:
8852 MskOpc = LoongArch::XVMSKLTZ_H;
8853 RC = &LoongArch::LASX256RegClass;
8854 EleBits = 16;
8855 break;
8856 case LoongArch::PseudoXVMSKLTZ_W:
8857 MskOpc = LoongArch::XVMSKLTZ_W;
8858 RC = &LoongArch::LASX256RegClass;
8859 EleBits = 32;
8860 break;
8861 case LoongArch::PseudoXVMSKLTZ_D:
8862 MskOpc = LoongArch::XVMSKLTZ_D;
8863 RC = &LoongArch::LASX256RegClass;
8864 EleBits = 64;
8865 break;
8866 case LoongArch::PseudoXVMSKGEZ_B:
8867 MskOpc = LoongArch::XVMSKGEZ_B;
8868 RC = &LoongArch::LASX256RegClass;
8869 break;
8870 case LoongArch::PseudoXVMSKEQZ_B:
8871 MskOpc = LoongArch::XVMSKNZ_B;
8872 NotOpc = LoongArch::XVNOR_V;
8873 RC = &LoongArch::LASX256RegClass;
8874 break;
8875 case LoongArch::PseudoXVMSKNEZ_B:
8876 MskOpc = LoongArch::XVMSKNZ_B;
8877 RC = &LoongArch::LASX256RegClass;
8878 break;
8879 }
8880
8881 Register Msk = MRI.createVirtualRegister(RC);
8882 if (NotOpc) {
8883 Register Tmp = MRI.createVirtualRegister(RC);
8884 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
8885 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
8886 .addReg(Tmp, RegState::Kill)
8887 .addReg(Tmp, RegState::Kill);
8888 } else {
8889 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
8890 }
8891
8892 if (TRI->getRegSizeInBits(*RC) > 128) {
8893 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8894 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8895 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
8896 .addReg(Msk)
8897 .addImm(0);
8898 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
8899 .addReg(Msk, RegState::Kill)
8900 .addImm(4);
8901 BuildMI(*BB, MI, DL,
8902 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
8903 : LoongArch::BSTRINS_W),
8904 Dst)
8907 .addImm(256 / EleBits - 1)
8908 .addImm(128 / EleBits);
8909 } else {
8910 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
8911 .addReg(Msk, RegState::Kill)
8912 .addImm(0);
8913 }
8914
8915 MI.eraseFromParent();
8916 return BB;
8917}
8918
8919static MachineBasicBlock *
8921 const LoongArchSubtarget &Subtarget) {
8922 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
8923 "Unexpected instruction");
8924
8925 MachineFunction &MF = *BB->getParent();
8926 DebugLoc DL = MI.getDebugLoc();
8928 Register LoReg = MI.getOperand(0).getReg();
8929 Register HiReg = MI.getOperand(1).getReg();
8930 Register SrcReg = MI.getOperand(2).getReg();
8931
8932 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
8933 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
8934 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
8935 MI.eraseFromParent(); // The pseudo instruction is gone now.
8936 return BB;
8937}
8938
8939static MachineBasicBlock *
8941 const LoongArchSubtarget &Subtarget) {
8942 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
8943 "Unexpected instruction");
8944
8945 MachineFunction &MF = *BB->getParent();
8946 DebugLoc DL = MI.getDebugLoc();
8949 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
8950 Register DstReg = MI.getOperand(0).getReg();
8951 Register LoReg = MI.getOperand(1).getReg();
8952 Register HiReg = MI.getOperand(2).getReg();
8953
8954 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
8955 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
8956 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
8957 .addReg(TmpReg, RegState::Kill)
8958 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
8959 MI.eraseFromParent(); // The pseudo instruction is gone now.
8960 return BB;
8961}
8962
8964 switch (MI.getOpcode()) {
8965 default:
8966 return false;
8967 case LoongArch::Select_GPR_Using_CC_GPR:
8968 return true;
8969 }
8970}
8971
8972static MachineBasicBlock *
8974 const LoongArchSubtarget &Subtarget) {
8975 // To "insert" Select_* instructions, we actually have to insert the triangle
8976 // control-flow pattern. The incoming instructions know the destination vreg
8977 // to set, the condition code register to branch on, the true/false values to
8978 // select between, and the condcode to use to select the appropriate branch.
8979 //
8980 // We produce the following control flow:
8981 // HeadMBB
8982 // | \
8983 // | IfFalseMBB
8984 // | /
8985 // TailMBB
8986 //
8987 // When we find a sequence of selects we attempt to optimize their emission
8988 // by sharing the control flow. Currently we only handle cases where we have
8989 // multiple selects with the exact same condition (same LHS, RHS and CC).
8990 // The selects may be interleaved with other instructions if the other
8991 // instructions meet some requirements we deem safe:
8992 // - They are not pseudo instructions.
8993 // - They are debug instructions. Otherwise,
8994 // - They do not have side-effects, do not access memory and their inputs do
8995 // not depend on the results of the select pseudo-instructions.
8996 // The TrueV/FalseV operands of the selects cannot depend on the result of
8997 // previous selects in the sequence.
8998 // These conditions could be further relaxed. See the X86 target for a
8999 // related approach and more information.
9000
9001 Register LHS = MI.getOperand(1).getReg();
9002 Register RHS;
9003 if (MI.getOperand(2).isReg())
9004 RHS = MI.getOperand(2).getReg();
9005 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
9006
9007 SmallVector<MachineInstr *, 4> SelectDebugValues;
9008 SmallSet<Register, 4> SelectDests;
9009 SelectDests.insert(MI.getOperand(0).getReg());
9010
9011 MachineInstr *LastSelectPseudo = &MI;
9012 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
9013 SequenceMBBI != E; ++SequenceMBBI) {
9014 if (SequenceMBBI->isDebugInstr())
9015 continue;
9016 if (isSelectPseudo(*SequenceMBBI)) {
9017 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
9018 !SequenceMBBI->getOperand(2).isReg() ||
9019 SequenceMBBI->getOperand(2).getReg() != RHS ||
9020 SequenceMBBI->getOperand(3).getImm() != CC ||
9021 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
9022 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
9023 break;
9024 LastSelectPseudo = &*SequenceMBBI;
9025 SequenceMBBI->collectDebugValues(SelectDebugValues);
9026 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
9027 continue;
9028 }
9029 if (SequenceMBBI->hasUnmodeledSideEffects() ||
9030 SequenceMBBI->mayLoadOrStore() ||
9031 SequenceMBBI->usesCustomInsertionHook())
9032 break;
9033 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
9034 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
9035 }))
9036 break;
9037 }
9038
9039 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
9040 const BasicBlock *LLVM_BB = BB->getBasicBlock();
9041 DebugLoc DL = MI.getDebugLoc();
9043
9044 MachineBasicBlock *HeadMBB = BB;
9045 MachineFunction *F = BB->getParent();
9046 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
9047 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
9048
9049 F->insert(I, IfFalseMBB);
9050 F->insert(I, TailMBB);
9051
9052 // Set the call frame size on entry to the new basic blocks.
9053 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
9054 IfFalseMBB->setCallFrameSize(CallFrameSize);
9055 TailMBB->setCallFrameSize(CallFrameSize);
9056
9057 // Transfer debug instructions associated with the selects to TailMBB.
9058 for (MachineInstr *DebugInstr : SelectDebugValues) {
9059 TailMBB->push_back(DebugInstr->removeFromParent());
9060 }
9061
9062 // Move all instructions after the sequence to TailMBB.
9063 TailMBB->splice(TailMBB->end(), HeadMBB,
9064 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
9065 // Update machine-CFG edges by transferring all successors of the current
9066 // block to the new block which will contain the Phi nodes for the selects.
9067 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
9068 // Set the successors for HeadMBB.
9069 HeadMBB->addSuccessor(IfFalseMBB);
9070 HeadMBB->addSuccessor(TailMBB);
9071
9072 // Insert appropriate branch.
9073 if (MI.getOperand(2).isImm())
9074 BuildMI(HeadMBB, DL, TII.get(CC))
9075 .addReg(LHS)
9076 .addImm(MI.getOperand(2).getImm())
9077 .addMBB(TailMBB);
9078 else
9079 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
9080
9081 // IfFalseMBB just falls through to TailMBB.
9082 IfFalseMBB->addSuccessor(TailMBB);
9083
9084 // Create PHIs for all of the select pseudo-instructions.
9085 auto SelectMBBI = MI.getIterator();
9086 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
9087 auto InsertionPoint = TailMBB->begin();
9088 while (SelectMBBI != SelectEnd) {
9089 auto Next = std::next(SelectMBBI);
9090 if (isSelectPseudo(*SelectMBBI)) {
9091 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
9092 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
9093 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
9094 .addReg(SelectMBBI->getOperand(4).getReg())
9095 .addMBB(HeadMBB)
9096 .addReg(SelectMBBI->getOperand(5).getReg())
9097 .addMBB(IfFalseMBB);
9098 SelectMBBI->eraseFromParent();
9099 }
9100 SelectMBBI = Next;
9101 }
9102
9103 F->getProperties().resetNoPHIs();
9104 return TailMBB;
9105}
9106
9107MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
9108 MachineInstr &MI, MachineBasicBlock *BB) const {
9109 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9110 DebugLoc DL = MI.getDebugLoc();
9111
9112 switch (MI.getOpcode()) {
9113 default:
9114 llvm_unreachable("Unexpected instr type to insert");
9115 case LoongArch::DIV_W:
9116 case LoongArch::DIV_WU:
9117 case LoongArch::MOD_W:
9118 case LoongArch::MOD_WU:
9119 case LoongArch::DIV_D:
9120 case LoongArch::DIV_DU:
9121 case LoongArch::MOD_D:
9122 case LoongArch::MOD_DU:
9123 return insertDivByZeroTrap(MI, BB);
9124 break;
9125 case LoongArch::WRFCSR: {
9126 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
9127 LoongArch::FCSR0 + MI.getOperand(0).getImm())
9128 .addReg(MI.getOperand(1).getReg());
9129 MI.eraseFromParent();
9130 return BB;
9131 }
9132 case LoongArch::RDFCSR: {
9133 MachineInstr *ReadFCSR =
9134 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
9135 MI.getOperand(0).getReg())
9136 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
9137 ReadFCSR->getOperand(1).setIsUndef();
9138 MI.eraseFromParent();
9139 return BB;
9140 }
9141 case LoongArch::Select_GPR_Using_CC_GPR:
9142 return emitSelectPseudo(MI, BB, Subtarget);
9143 case LoongArch::BuildPairF64Pseudo:
9144 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
9145 case LoongArch::SplitPairF64Pseudo:
9146 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
9147 case LoongArch::PseudoVBZ:
9148 case LoongArch::PseudoVBZ_B:
9149 case LoongArch::PseudoVBZ_H:
9150 case LoongArch::PseudoVBZ_W:
9151 case LoongArch::PseudoVBZ_D:
9152 case LoongArch::PseudoVBNZ:
9153 case LoongArch::PseudoVBNZ_B:
9154 case LoongArch::PseudoVBNZ_H:
9155 case LoongArch::PseudoVBNZ_W:
9156 case LoongArch::PseudoVBNZ_D:
9157 case LoongArch::PseudoXVBZ:
9158 case LoongArch::PseudoXVBZ_B:
9159 case LoongArch::PseudoXVBZ_H:
9160 case LoongArch::PseudoXVBZ_W:
9161 case LoongArch::PseudoXVBZ_D:
9162 case LoongArch::PseudoXVBNZ:
9163 case LoongArch::PseudoXVBNZ_B:
9164 case LoongArch::PseudoXVBNZ_H:
9165 case LoongArch::PseudoXVBNZ_W:
9166 case LoongArch::PseudoXVBNZ_D:
9167 return emitVecCondBranchPseudo(MI, BB, Subtarget);
9168 case LoongArch::PseudoXVINSGR2VR_B:
9169 case LoongArch::PseudoXVINSGR2VR_H:
9170 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
9171 case LoongArch::PseudoCTPOP_B:
9172 case LoongArch::PseudoCTPOP_H:
9173 case LoongArch::PseudoCTPOP_W:
9174 case LoongArch::PseudoCTPOP_D:
9175 case LoongArch::PseudoCTPOP_H_LA32:
9176 case LoongArch::PseudoCTPOP_W_LA32:
9177 return emitPseudoCTPOP(MI, BB, Subtarget);
9178 case LoongArch::PseudoVMSKLTZ_B:
9179 case LoongArch::PseudoVMSKLTZ_H:
9180 case LoongArch::PseudoVMSKLTZ_W:
9181 case LoongArch::PseudoVMSKLTZ_D:
9182 case LoongArch::PseudoVMSKGEZ_B:
9183 case LoongArch::PseudoVMSKEQZ_B:
9184 case LoongArch::PseudoVMSKNEZ_B:
9185 case LoongArch::PseudoXVMSKLTZ_B:
9186 case LoongArch::PseudoXVMSKLTZ_H:
9187 case LoongArch::PseudoXVMSKLTZ_W:
9188 case LoongArch::PseudoXVMSKLTZ_D:
9189 case LoongArch::PseudoXVMSKGEZ_B:
9190 case LoongArch::PseudoXVMSKEQZ_B:
9191 case LoongArch::PseudoXVMSKNEZ_B:
9192 return emitPseudoVMSKCOND(MI, BB, Subtarget);
9193 case TargetOpcode::STATEPOINT:
9194 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
9195 // while bl call instruction (where statepoint will be lowered at the
9196 // end) has implicit def. This def is early-clobber as it will be set at
9197 // the moment of the call and earlier than any use is read.
9198 // Add this implicit dead def here as a workaround.
9199 MI.addOperand(*MI.getMF(),
9201 LoongArch::R1, /*isDef*/ true,
9202 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
9203 /*isUndef*/ false, /*isEarlyClobber*/ true));
9204 if (!Subtarget.is64Bit())
9205 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
9206 return emitPatchPoint(MI, BB);
9207 case LoongArch::PROBED_STACKALLOC_DYN:
9208 return emitDynamicProbedAlloc(MI, BB);
9209 }
9210}
9211
9213 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
9214 unsigned *Fast) const {
9215 if (!Subtarget.hasUAL())
9216 return false;
9217
9218 // TODO: set reasonable speed number.
9219 if (Fast)
9220 *Fast = 1;
9221 return true;
9222}
9223
9224//===----------------------------------------------------------------------===//
9225// Calling Convention Implementation
9226//===----------------------------------------------------------------------===//
9227
9228// Eight general-purpose registers a0-a7 used for passing integer arguments,
9229// with a0-a1 reused to return values. Generally, the GPRs are used to pass
9230// fixed-point arguments, and floating-point arguments when no FPR is available
9231// or with soft float ABI.
9232const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
9233 LoongArch::R7, LoongArch::R8, LoongArch::R9,
9234 LoongArch::R10, LoongArch::R11};
9235
9236// PreserveNone calling convention:
9237// Arguments may be passed in any general-purpose registers except:
9238// - R1 : return address register
9239// - R22 : frame pointer
9240// - R31 : base pointer
9241//
9242// All general-purpose registers are treated as caller-saved,
9243// except R1 (RA) and R22 (FP).
9244//
9245// Non-volatile registers are allocated first so that a function
9246// can call normal functions without having to spill and reload
9247// argument registers.
9249 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
9250 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
9251 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
9252 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
9253 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
9254 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
9255 LoongArch::R20};
9256
9257// Eight floating-point registers fa0-fa7 used for passing floating-point
9258// arguments, and fa0-fa1 are also used to return values.
9259const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
9260 LoongArch::F3, LoongArch::F4, LoongArch::F5,
9261 LoongArch::F6, LoongArch::F7};
9262// FPR32 and FPR64 alias each other.
9264 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
9265 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
9266
9267const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
9268 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
9269 LoongArch::VR6, LoongArch::VR7};
9270
9271const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
9272 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
9273 LoongArch::XR6, LoongArch::XR7};
9274
9276 switch (State.getCallingConv()) {
9278 if (!State.isVarArg())
9279 return State.AllocateReg(PreserveNoneArgGPRs);
9280 [[fallthrough]];
9281 default:
9282 return State.AllocateReg(ArgGPRs);
9283 }
9284}
9285
9286// Pass a 2*GRLen argument that has been split into two GRLen values through
9287// registers or the stack as necessary.
9288static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
9289 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
9290 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
9291 ISD::ArgFlagsTy ArgFlags2) {
9292 unsigned GRLenInBytes = GRLen / 8;
9293 if (Register Reg = allocateArgGPR(State)) {
9294 // At least one half can be passed via register.
9295 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
9296 VA1.getLocVT(), CCValAssign::Full));
9297 } else {
9298 // Both halves must be passed on the stack, with proper alignment.
9299 Align StackAlign =
9300 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
9301 State.addLoc(
9303 State.AllocateStack(GRLenInBytes, StackAlign),
9304 VA1.getLocVT(), CCValAssign::Full));
9305 State.addLoc(CCValAssign::getMem(
9306 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
9307 LocVT2, CCValAssign::Full));
9308 return false;
9309 }
9310 if (Register Reg = allocateArgGPR(State)) {
9311 // The second half can also be passed via register.
9312 State.addLoc(
9313 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
9314 } else {
9315 // The second half is passed via the stack, without additional alignment.
9316 State.addLoc(CCValAssign::getMem(
9317 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
9318 LocVT2, CCValAssign::Full));
9319 }
9320 return false;
9321}
9322
9323// Implements the LoongArch calling convention. Returns true upon failure.
9325 unsigned ValNo, MVT ValVT,
9326 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
9327 CCState &State, bool IsRet, Type *OrigTy) {
9328 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
9329 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
9330 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
9331 MVT LocVT = ValVT;
9332
9333 // Any return value split into more than two values can't be returned
9334 // directly.
9335 if (IsRet && ValNo > 1)
9336 return true;
9337
9338 // If passing a variadic argument, or if no FPR is available.
9339 bool UseGPRForFloat = true;
9340
9341 switch (ABI) {
9342 default:
9343 llvm_unreachable("Unexpected ABI");
9344 break;
9349 UseGPRForFloat = ArgFlags.isVarArg();
9350 break;
9353 break;
9354 }
9355
9356 // If this is a variadic argument, the LoongArch calling convention requires
9357 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
9358 // byte alignment. An aligned register should be used regardless of whether
9359 // the original argument was split during legalisation or not. The argument
9360 // will not be passed by registers if the original type is larger than
9361 // 2*GRLen, so the register alignment rule does not apply.
9362 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
9363 if (ArgFlags.isVarArg() &&
9364 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
9365 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
9366 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
9367 // Skip 'odd' register if necessary.
9368 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
9369 State.AllocateReg(ArgGPRs);
9370 }
9371
9372 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
9373 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
9374 State.getPendingArgFlags();
9375
9376 assert(PendingLocs.size() == PendingArgFlags.size() &&
9377 "PendingLocs and PendingArgFlags out of sync");
9378
9379 // FPR32 and FPR64 alias each other.
9380 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
9381 UseGPRForFloat = true;
9382
9383 if (UseGPRForFloat && ValVT == MVT::f32) {
9384 LocVT = GRLenVT;
9385 LocInfo = CCValAssign::BCvt;
9386 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
9387 LocVT = MVT::i64;
9388 LocInfo = CCValAssign::BCvt;
9389 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
9390 // Handle passing f64 on LA32D with a soft float ABI or when floating point
9391 // registers are exhausted.
9392 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
9393 // Depending on available argument GPRS, f64 may be passed in a pair of
9394 // GPRs, split between a GPR and the stack, or passed completely on the
9395 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
9396 // cases.
9397 MCRegister Reg = allocateArgGPR(State);
9398 if (!Reg) {
9399 int64_t StackOffset = State.AllocateStack(8, Align(8));
9400 State.addLoc(
9401 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9402 return false;
9403 }
9404 LocVT = MVT::i32;
9405 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9406 MCRegister HiReg = allocateArgGPR(State);
9407 if (HiReg) {
9408 State.addLoc(
9409 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
9410 } else {
9411 int64_t StackOffset = State.AllocateStack(4, Align(4));
9412 State.addLoc(
9413 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9414 }
9415 return false;
9416 }
9417
9418 // Split arguments might be passed indirectly, so keep track of the pending
9419 // values.
9420 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
9421 LocVT = GRLenVT;
9422 LocInfo = CCValAssign::Indirect;
9423 PendingLocs.push_back(
9424 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
9425 PendingArgFlags.push_back(ArgFlags);
9426 if (!ArgFlags.isSplitEnd()) {
9427 return false;
9428 }
9429 }
9430
9431 // If the split argument only had two elements, it should be passed directly
9432 // in registers or on the stack.
9433 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
9434 PendingLocs.size() <= 2) {
9435 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
9436 // Apply the normal calling convention rules to the first half of the
9437 // split argument.
9438 CCValAssign VA = PendingLocs[0];
9439 ISD::ArgFlagsTy AF = PendingArgFlags[0];
9440 PendingLocs.clear();
9441 PendingArgFlags.clear();
9442 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
9443 ArgFlags);
9444 }
9445
9446 // Allocate to a register if possible, or else a stack slot.
9447 Register Reg;
9448 unsigned StoreSizeBytes = GRLen / 8;
9449 Align StackAlign = Align(GRLen / 8);
9450
9451 if (ValVT == MVT::f32 && !UseGPRForFloat) {
9452 Reg = State.AllocateReg(ArgFPR32s);
9453 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
9454 Reg = State.AllocateReg(ArgFPR64s);
9455 } else if (ValVT.is128BitVector()) {
9456 Reg = State.AllocateReg(ArgVRs);
9457 UseGPRForFloat = false;
9458 StoreSizeBytes = 16;
9459 StackAlign = Align(16);
9460 } else if (ValVT.is256BitVector()) {
9461 Reg = State.AllocateReg(ArgXRs);
9462 UseGPRForFloat = false;
9463 StoreSizeBytes = 32;
9464 StackAlign = Align(32);
9465 } else {
9466 Reg = allocateArgGPR(State);
9467 }
9468
9469 unsigned StackOffset =
9470 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
9471
9472 // If we reach this point and PendingLocs is non-empty, we must be at the
9473 // end of a split argument that must be passed indirectly.
9474 if (!PendingLocs.empty()) {
9475 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
9476 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
9477 for (auto &It : PendingLocs) {
9478 if (Reg)
9479 It.convertToReg(Reg);
9480 else
9481 It.convertToMem(StackOffset);
9482 State.addLoc(It);
9483 }
9484 PendingLocs.clear();
9485 PendingArgFlags.clear();
9486 return false;
9487 }
9488 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
9489 "Expected an GRLenVT at this stage");
9490
9491 if (Reg) {
9492 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9493 return false;
9494 }
9495
9496 // When a floating-point value is passed on the stack, no bit-cast is needed.
9497 if (ValVT.isFloatingPoint()) {
9498 LocVT = ValVT;
9499 LocInfo = CCValAssign::Full;
9500 }
9501
9502 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9503 return false;
9504}
9505
9506void LoongArchTargetLowering::analyzeInputArgs(
9507 MachineFunction &MF, CCState &CCInfo,
9508 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
9509 LoongArchCCAssignFn Fn) const {
9510 FunctionType *FType = MF.getFunction().getFunctionType();
9511 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
9512 MVT ArgVT = Ins[i].VT;
9513 Type *ArgTy = nullptr;
9514 if (IsRet)
9515 ArgTy = FType->getReturnType();
9516 else if (Ins[i].isOrigArg())
9517 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
9519 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9520 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
9521 CCInfo, IsRet, ArgTy)) {
9522 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
9523 << '\n');
9524 llvm_unreachable("");
9525 }
9526 }
9527}
9528
9529void LoongArchTargetLowering::analyzeOutputArgs(
9530 MachineFunction &MF, CCState &CCInfo,
9531 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
9532 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
9533 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9534 MVT ArgVT = Outs[i].VT;
9535 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
9537 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9538 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
9539 CCInfo, IsRet, OrigTy)) {
9540 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
9541 << "\n");
9542 llvm_unreachable("");
9543 }
9544 }
9545}
9546
9547// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
9548// values.
9550 const CCValAssign &VA, const SDLoc &DL) {
9551 switch (VA.getLocInfo()) {
9552 default:
9553 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9554 case CCValAssign::Full:
9556 break;
9557 case CCValAssign::BCvt:
9558 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9559 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
9560 else
9561 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
9562 break;
9563 }
9564 return Val;
9565}
9566
9568 const CCValAssign &VA, const SDLoc &DL,
9569 const ISD::InputArg &In,
9570 const LoongArchTargetLowering &TLI) {
9573 EVT LocVT = VA.getLocVT();
9574 SDValue Val;
9575 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
9576 Register VReg = RegInfo.createVirtualRegister(RC);
9577 RegInfo.addLiveIn(VA.getLocReg(), VReg);
9578 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
9579
9580 // If input is sign extended from 32 bits, note it for the OptW pass.
9581 if (In.isOrigArg()) {
9582 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
9583 if (OrigArg->getType()->isIntegerTy()) {
9584 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
9585 // An input zero extended from i31 can also be considered sign extended.
9586 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
9587 (BitWidth < 32 && In.Flags.isZExt())) {
9590 LAFI->addSExt32Register(VReg);
9591 }
9592 }
9593 }
9594
9595 return convertLocVTToValVT(DAG, Val, VA, DL);
9596}
9597
9598// The caller is responsible for loading the full value if the argument is
9599// passed with CCValAssign::Indirect.
9601 const CCValAssign &VA, const SDLoc &DL) {
9603 MachineFrameInfo &MFI = MF.getFrameInfo();
9604 EVT ValVT = VA.getValVT();
9605 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
9606 /*IsImmutable=*/true);
9607 SDValue FIN = DAG.getFrameIndex(
9609
9610 ISD::LoadExtType ExtType;
9611 switch (VA.getLocInfo()) {
9612 default:
9613 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9614 case CCValAssign::Full:
9616 case CCValAssign::BCvt:
9617 ExtType = ISD::NON_EXTLOAD;
9618 break;
9619 }
9620 return DAG.getExtLoad(
9621 ExtType, DL, VA.getLocVT(), Chain, FIN,
9623}
9624
9626 const CCValAssign &VA,
9627 const CCValAssign &HiVA,
9628 const SDLoc &DL) {
9629 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9630 "Unexpected VA");
9632 MachineFrameInfo &MFI = MF.getFrameInfo();
9634
9635 assert(VA.isRegLoc() && "Expected register VA assignment");
9636
9637 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9638 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
9639 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
9640 SDValue Hi;
9641 if (HiVA.isMemLoc()) {
9642 // Second half of f64 is passed on the stack.
9643 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
9644 /*IsImmutable=*/true);
9645 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9646 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
9648 } else {
9649 // Second half of f64 is passed in another GPR.
9650 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9651 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
9652 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
9653 }
9654 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
9655}
9656
9658 const CCValAssign &VA, const SDLoc &DL) {
9659 EVT LocVT = VA.getLocVT();
9660
9661 switch (VA.getLocInfo()) {
9662 default:
9663 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9664 case CCValAssign::Full:
9665 break;
9666 case CCValAssign::BCvt:
9667 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9668 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
9669 else
9670 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
9671 break;
9672 }
9673 return Val;
9674}
9675
9676static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9677 CCValAssign::LocInfo LocInfo,
9678 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
9679 CCState &State) {
9680 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9681 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
9682 // s0 s1 s2 s3 s4 s5 s6 s7 s8
9683 static const MCPhysReg GPRList[] = {
9684 LoongArch::R23, LoongArch::R24, LoongArch::R25,
9685 LoongArch::R26, LoongArch::R27, LoongArch::R28,
9686 LoongArch::R29, LoongArch::R30, LoongArch::R31};
9687 if (MCRegister Reg = State.AllocateReg(GPRList)) {
9688 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9689 return false;
9690 }
9691 }
9692
9693 if (LocVT == MVT::f32) {
9694 // Pass in STG registers: F1, F2, F3, F4
9695 // fs0,fs1,fs2,fs3
9696 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
9697 LoongArch::F26, LoongArch::F27};
9698 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
9699 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9700 return false;
9701 }
9702 }
9703
9704 if (LocVT == MVT::f64) {
9705 // Pass in STG registers: D1, D2, D3, D4
9706 // fs4,fs5,fs6,fs7
9707 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
9708 LoongArch::F30_64, LoongArch::F31_64};
9709 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
9710 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9711 return false;
9712 }
9713 }
9714
9715 report_fatal_error("No registers left in GHC calling convention");
9716 return true;
9717}
9718
9719// Transform physical registers into virtual registers.
9721 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9722 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9723 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9724
9726
9727 switch (CallConv) {
9728 default:
9729 llvm_unreachable("Unsupported calling convention");
9730 case CallingConv::C:
9731 case CallingConv::Fast:
9734 break;
9735 case CallingConv::GHC:
9736 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
9737 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
9739 "GHC calling convention requires the F and D extensions");
9740 }
9741
9742 const Function &Func = MF.getFunction();
9743 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9744 MVT GRLenVT = Subtarget.getGRLenVT();
9745 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
9746
9747 // Check if this function has any musttail calls. If so, incoming indirect
9748 // arg pointers must be saved in virtual registers so they survive across
9749 // basic blocks (the SelectionDAG is cleared between BBs). Only do this
9750 // when needed to avoid adding register pressure to non-musttail functions.
9751 bool HasMusttail = llvm::any_of(Func, [](const BasicBlock &BB) {
9752 return llvm::any_of(BB, [](const Instruction &I) {
9753 if (const auto *CI = dyn_cast<CallInst>(&I))
9754 return CI->isMustTailCall();
9755 return false;
9756 });
9757 });
9758 // Used with varargs to acumulate store chains.
9759 std::vector<SDValue> OutChains;
9760
9761 // Assign locations to all of the incoming arguments.
9763 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9764
9765 if (CallConv == CallingConv::GHC)
9767 else
9768 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
9769
9770 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
9771 CCValAssign &VA = ArgLocs[i];
9772 SDValue ArgValue;
9773 // Passing f64 on LA32D with a soft float ABI must be handled as a special
9774 // case.
9775 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9776 assert(VA.needsCustom());
9777 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
9778 } else if (VA.isRegLoc())
9779 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
9780 else
9781 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9782 if (VA.getLocInfo() == CCValAssign::Indirect) {
9783 // If the original argument was split and passed by reference, we need to
9784 // load all parts of it here (using the same address).
9785 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9787 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
9788 if (HasMusttail) {
9791 Register VReg =
9792 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
9793 Chain = DAG.getCopyToReg(Chain, DL, VReg, ArgValue);
9794 LAFI->setIncomingIndirectArg(ArgIndex, VReg);
9795 }
9796 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
9797 assert(ArgPartOffset == 0);
9798 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
9799 CCValAssign &PartVA = ArgLocs[i + 1];
9800 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
9801 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9802 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9803 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9805 ++i;
9806 ++InsIdx;
9807 }
9808 continue;
9809 }
9810 InVals.push_back(ArgValue);
9811 }
9812
9813 if (IsVarArg) {
9815 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9816 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
9817 MachineFrameInfo &MFI = MF.getFrameInfo();
9818 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9819 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
9820
9821 // Offset of the first variable argument from stack pointer, and size of
9822 // the vararg save area. For now, the varargs save area is either zero or
9823 // large enough to hold a0-a7.
9824 int VaArgOffset, VarArgsSaveSize;
9825
9826 // If all registers are allocated, then all varargs must be passed on the
9827 // stack and we don't need to save any argregs.
9828 if (ArgRegs.size() == Idx) {
9829 VaArgOffset = CCInfo.getStackSize();
9830 VarArgsSaveSize = 0;
9831 } else {
9832 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
9833 VaArgOffset = -VarArgsSaveSize;
9834 }
9835
9836 // Record the frame index of the first variable argument
9837 // which is a value necessary to VASTART.
9838 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9839 LoongArchFI->setVarArgsFrameIndex(FI);
9840
9841 // If saving an odd number of registers then create an extra stack slot to
9842 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
9843 // offsets to even-numbered registered remain 2*GRLen-aligned.
9844 if (Idx % 2) {
9845 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
9846 true);
9847 VarArgsSaveSize += GRLenInBytes;
9848 }
9849
9850 // Copy the integer registers that may have been used for passing varargs
9851 // to the vararg save area.
9852 for (unsigned I = Idx; I < ArgRegs.size();
9853 ++I, VaArgOffset += GRLenInBytes) {
9854 const Register Reg = RegInfo.createVirtualRegister(RC);
9855 RegInfo.addLiveIn(ArgRegs[I], Reg);
9856 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
9857 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9858 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9859 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9861 cast<StoreSDNode>(Store.getNode())
9862 ->getMemOperand()
9863 ->setValue((Value *)nullptr);
9864 OutChains.push_back(Store);
9865 }
9866 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
9867 }
9868
9869 // All stores are grouped in one node to allow the matching between
9870 // the size of Ins and InVals. This only happens for vararg functions.
9871 if (!OutChains.empty()) {
9872 OutChains.push_back(Chain);
9873 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9874 }
9875
9876 return Chain;
9877}
9878
9880 return CI->isTailCall();
9881}
9882
9883// Check if the return value is used as only a return value, as otherwise
9884// we can't perform a tail-call.
9886 SDValue &Chain) const {
9887 if (N->getNumValues() != 1)
9888 return false;
9889 if (!N->hasNUsesOfValue(1, 0))
9890 return false;
9891
9892 SDNode *Copy = *N->user_begin();
9893 if (Copy->getOpcode() != ISD::CopyToReg)
9894 return false;
9895
9896 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
9897 // isn't safe to perform a tail call.
9898 if (Copy->getGluedNode())
9899 return false;
9900
9901 // The copy must be used by a LoongArchISD::RET, and nothing else.
9902 bool HasRet = false;
9903 for (SDNode *Node : Copy->users()) {
9904 if (Node->getOpcode() != LoongArchISD::RET)
9905 return false;
9906 HasRet = true;
9907 }
9908
9909 if (!HasRet)
9910 return false;
9911
9912 Chain = Copy->getOperand(0);
9913 return true;
9914}
9915
9916// Check whether the call is eligible for tail call optimization.
9917bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
9918 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9919 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
9920
9921 auto CalleeCC = CLI.CallConv;
9922 auto &Outs = CLI.Outs;
9923 auto &Caller = MF.getFunction();
9924 auto CallerCC = Caller.getCallingConv();
9925
9926 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
9927
9928 // Byval parameters hand the function a pointer directly into the stack area
9929 // we want to reuse during a tail call. Working around this *is* possible
9930 // but less efficient and uglier in LowerCall. For musttail, there is no
9931 // workaround today: a byval arg requires a local copy that becomes invalid
9932 // after the tail call deallocates the caller's frame, so rejecting here
9933 // (and triggering reportFatalInternalError in LowerCall) is safer than
9934 // miscompiling.
9935 for (auto &Arg : Outs)
9936 if (Arg.Flags.isByVal())
9937 return false;
9938
9939 // musttail bypasses the remaining checks: the checks either reject cases
9940 // we handle specially (indirect args are forwarded via incoming pointers,
9941 // stack-passed args reuse the matching incoming layout, sret is forwarded
9942 // like any other pointer arg) or are optimizations not applicable to
9943 // mandatory tail calls.
9944 if (IsMustTail)
9945 return true;
9946
9947 // Do not tail call opt if the stack is used to pass parameters.
9948 if (CCInfo.getStackSize() != 0)
9949 return false;
9950
9951 // Do not tail call opt if any parameters need to be passed indirectly.
9952 for (auto &VA : ArgLocs)
9953 if (VA.getLocInfo() == CCValAssign::Indirect)
9954 return false;
9955
9956 // Do not tail call opt if either caller or callee uses struct return
9957 // semantics.
9958 auto IsCallerStructRet = Caller.hasStructRetAttr();
9959 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
9960 if (IsCallerStructRet || IsCalleeStructRet)
9961 return false;
9962
9963 // The callee has to preserve all registers the caller needs to preserve.
9964 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
9965 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
9966 if (CalleeCC != CallerCC) {
9967 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
9968 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9969 return false;
9970 }
9971 return true;
9972}
9973
9975 return DAG.getDataLayout().getPrefTypeAlign(
9976 VT.getTypeForEVT(*DAG.getContext()));
9977}
9978
9979// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
9980// and output parameter nodes.
9981SDValue
9983 SmallVectorImpl<SDValue> &InVals) const {
9984 SelectionDAG &DAG = CLI.DAG;
9985 SDLoc &DL = CLI.DL;
9987 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
9989 SDValue Chain = CLI.Chain;
9990 SDValue Callee = CLI.Callee;
9991 CallingConv::ID CallConv = CLI.CallConv;
9992 bool IsVarArg = CLI.IsVarArg;
9993 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9994 MVT GRLenVT = Subtarget.getGRLenVT();
9995 bool &IsTailCall = CLI.IsTailCall;
9996
9998
9999 // Analyze the operands of the call, assigning locations to each operand.
10001 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
10002
10003 if (CallConv == CallingConv::GHC)
10004 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
10005 else
10006 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
10007
10008 // Check if it's really possible to do a tail call.
10009 if (IsTailCall)
10010 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
10011
10012 if (IsTailCall)
10013 ++NumTailCalls;
10014 else if (CLI.CB && CLI.CB->isMustTailCall())
10015 report_fatal_error("failed to perform tail call elimination on a call "
10016 "site marked musttail");
10017
10018 // Get a count of how many bytes are to be pushed on the stack.
10019 unsigned NumBytes = ArgCCInfo.getStackSize();
10020
10021 // Create local copies for byval args.
10022 SmallVector<SDValue> ByValArgs;
10023 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10024 ISD::ArgFlagsTy Flags = Outs[i].Flags;
10025 if (!Flags.isByVal())
10026 continue;
10027
10028 SDValue Arg = OutVals[i];
10029 unsigned Size = Flags.getByValSize();
10030 Align Alignment = Flags.getNonZeroByValAlign();
10031
10032 int FI =
10033 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
10034 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
10035 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
10036
10037 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, Alignment,
10038 /*IsVolatile=*/false,
10039 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
10041 ByValArgs.push_back(FIPtr);
10042 }
10043
10044 if (!IsTailCall)
10045 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
10046
10047 // Copy argument values to their designated locations.
10049 SmallVector<SDValue> MemOpChains;
10050 SDValue StackPtr;
10051 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
10052 ++i, ++OutIdx) {
10053 CCValAssign &VA = ArgLocs[i];
10054 SDValue ArgValue = OutVals[OutIdx];
10055 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
10056
10057 // Handle passing f64 on LA32D with a soft float ABI as a special case.
10058 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10059 assert(VA.isRegLoc() && "Expected register VA assignment");
10060 assert(VA.needsCustom());
10061 SDValue SplitF64 =
10062 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
10063 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
10064 SDValue Lo = SplitF64.getValue(0);
10065 SDValue Hi = SplitF64.getValue(1);
10066
10067 Register RegLo = VA.getLocReg();
10068 RegsToPass.push_back(std::make_pair(RegLo, Lo));
10069
10070 // Get the CCValAssign for the Hi part.
10071 CCValAssign &HiVA = ArgLocs[++i];
10072
10073 if (HiVA.isMemLoc()) {
10074 // Second half of f64 is passed on the stack.
10075 if (!StackPtr.getNode())
10076 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
10078 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
10079 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
10080 // Emit the store.
10081 MemOpChains.push_back(DAG.getStore(
10082 Chain, DL, Hi, Address,
10084 } else {
10085 // Second half of f64 is passed in another GPR.
10086 Register RegHigh = HiVA.getLocReg();
10087 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
10088 }
10089 continue;
10090 }
10091
10092 // Promote the value if needed.
10093 // For now, only handle fully promoted and indirect arguments.
10094 if (VA.getLocInfo() == CCValAssign::Indirect) {
10095 // For musttail calls, reuse incoming indirect pointers instead of
10096 // creating new stack temporaries. The incoming pointers point to the
10097 // caller's caller's frame, which remains valid after a tail call.
10098 if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
10101 unsigned CallArgIdx = Outs[OutIdx].OrigArgIndex;
10102
10103 // Resolve which formal parameter is being passed at this call
10104 // position.
10105 //
10106 // FIXME: Ins[].OrigArgIndex is Argument::getArgNo() (unfiltered),
10107 // but Outs[].OrigArgIndex is an index into a filtered arg list
10108 // (empty types removed, via CallLoweringInfo in the target-
10109 // independent layer). IncomingIndirectArgs is keyed by the
10110 // caller's unfiltered Argument::getArgNo(), so we have to walk
10111 // the caller's formals (same filter) to translate the index.
10112 // This target-independent asymmetry should be normalized so
10113 // backends do not need to re-derive the mapping.
10114 //
10115 // Steps:
10116 // 1. Find the call operand at filtered position CallArgIdx.
10117 // 2. If it is an Argument, use getArgNo() directly (same filter
10118 // for caller formals and call operands).
10119 // 3. Otherwise (computed value), walk the caller's formals and
10120 // skip empty types to map the filtered index to getArgNo().
10121 const Argument *FormalArg = nullptr;
10122 unsigned FilteredIdx = 0;
10123 for (const auto &CallArg : CLI.CB->args()) {
10124 if (CallArg->getType()->isEmptyTy())
10125 continue;
10126 if (FilteredIdx == CallArgIdx) {
10127 FormalArg = dyn_cast<Argument>(CallArg);
10128 break;
10129 }
10130 ++FilteredIdx;
10131 }
10132
10133 // For forwarded args, getArgNo() gives the unfiltered index directly.
10134 // For computed args, walk the caller's formals to resolve it.
10135 unsigned FormalArgIdx = CallArgIdx;
10136 if (FormalArg) {
10137 FormalArgIdx = FormalArg->getArgNo();
10138 } else {
10139 FilteredIdx = 0;
10140 for (const auto &Arg : MF.getFunction().args()) {
10141 if (Arg.getType()->isEmptyTy())
10142 continue;
10143 if (FilteredIdx == CallArgIdx) {
10144 FormalArgIdx = Arg.getArgNo();
10145 break;
10146 }
10147 ++FilteredIdx;
10148 }
10149 }
10150
10151 Register VReg = LAFI->getIncomingIndirectArg(FormalArgIdx);
10152 SDValue CopyOp = DAG.getCopyFromReg(Chain, DL, VReg, PtrVT);
10153 // Thread the CopyFromReg output chain through MemOpChains so the
10154 // TokenFactor below sequences the copy with any stores we emit
10155 // for this argument.
10156 MemOpChains.push_back(CopyOp.getValue(1));
10157 SDValue IncomingPtr = CopyOp;
10158
10159 if (!FormalArg) {
10160 // Computed value: store into the incoming indirect pointer for the
10161 // same-position formal parameter (musttail guarantees matching
10162 // prototypes, so types match). The pointer survives the tail call
10163 // since it points to the caller's caller's frame.
10164 //
10165 // The data-flow edge through IncomingPtr already prevents the
10166 // store from being scheduled before the CopyFromReg. Threading
10167 // CopyOp.getValue(1) (the copy's output chain) into the store
10168 // makes that ordering explicit on the chain edge as well, which
10169 // is the convention for memory ops chaining off their producers.
10170 MemOpChains.push_back(
10171 DAG.getStore(CopyOp.getValue(1), DL, ArgValue, IncomingPtr,
10173 // Store any split parts at their respective offsets.
10174 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
10175 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
10176 SDValue PartValue = OutVals[OutIdx + 1];
10177 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
10178 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
10179 SDValue Addr =
10180 DAG.getNode(ISD::ADD, DL, PtrVT, IncomingPtr, Offset);
10181 MemOpChains.push_back(
10182 DAG.getStore(CopyOp.getValue(1), DL, PartValue, Addr,
10184 ++i;
10185 ++OutIdx;
10186 }
10187 }
10188 ArgValue = IncomingPtr;
10189
10190 // Skip any remaining split parts (for forwarded args, they are
10191 // covered by the forwarded pointer).
10192 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
10193 ++i;
10194 ++OutIdx;
10195 }
10196 } else {
10197 // Store the argument in a stack slot and pass its address.
10198 Align StackAlign =
10199 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
10200 getPrefTypeAlign(ArgValue.getValueType(), DAG));
10201 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
10202 // If the original argument was split and passed by reference, we need
10203 // to store the required parts of it here (and pass just one address).
10204 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
10205 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
10206 assert(ArgPartOffset == 0);
10207 // Calculate the total size to store. We don't have access to what we're
10208 // actually storing other than performing the loop and collecting the
10209 // info.
10211 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
10212 SDValue PartValue = OutVals[OutIdx + 1];
10213 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
10214 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
10215 EVT PartVT = PartValue.getValueType();
10216 StoredSize += PartVT.getStoreSize();
10217 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
10218 Parts.push_back(std::make_pair(PartValue, Offset));
10219 ++i;
10220 ++OutIdx;
10221 }
10222 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
10223 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
10224 MemOpChains.push_back(
10225 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
10227 for (const auto &Part : Parts) {
10228 SDValue PartValue = Part.first;
10229 SDValue PartOffset = Part.second;
10231 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
10232 MemOpChains.push_back(
10233 DAG.getStore(Chain, DL, PartValue, Address,
10235 }
10236 ArgValue = SpillSlot;
10237 }
10238 } else {
10239 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
10240 }
10241
10242 // Use local copy if it is a byval arg.
10243 if (Flags.isByVal())
10244 ArgValue = ByValArgs[j++];
10245
10246 if (VA.isRegLoc()) {
10247 // Queue up the argument copies and emit them at the end.
10248 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
10249 } else {
10250 assert(VA.isMemLoc() && "Argument not register or memory");
10251 assert((!IsTailCall || (CLI.CB && CLI.CB->isMustTailCall())) &&
10252 "Tail call not allowed if stack is used for passing parameters");
10253
10254 // Work out the address of the stack slot.
10255 if (!StackPtr.getNode())
10256 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
10258 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
10260
10261 // Emit the store.
10262 MemOpChains.push_back(
10263 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
10264 }
10265 }
10266
10267 // Join the stores, which are independent of one another.
10268 if (!MemOpChains.empty())
10269 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
10270
10271 SDValue Glue;
10272
10273 // Build a sequence of copy-to-reg nodes, chained and glued together.
10274 for (auto &Reg : RegsToPass) {
10275 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
10276 Glue = Chain.getValue(1);
10277 }
10278
10279 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
10280 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
10281 // split it and then direct call can be matched by PseudoCALL_SMALL.
10283 const GlobalValue *GV = S->getGlobal();
10284 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
10287 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
10288 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
10289 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
10292 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
10293 }
10294
10295 // The first call operand is the chain and the second is the target address.
10297 Ops.push_back(Chain);
10298 Ops.push_back(Callee);
10299
10300 // Add argument registers to the end of the list so that they are
10301 // known live into the call.
10302 for (auto &Reg : RegsToPass)
10303 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
10304
10305 if (!IsTailCall) {
10306 // Add a register mask operand representing the call-preserved registers.
10307 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
10308 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
10309 assert(Mask && "Missing call preserved mask for calling convention");
10310 Ops.push_back(DAG.getRegisterMask(Mask));
10311 }
10312
10313 // Glue the call to the argument copies, if any.
10314 if (Glue.getNode())
10315 Ops.push_back(Glue);
10316
10317 // Emit the call.
10318 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10319 unsigned Op;
10320 switch (DAG.getTarget().getCodeModel()) {
10321 default:
10322 report_fatal_error("Unsupported code model");
10323 case CodeModel::Small:
10324 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
10325 break;
10326 case CodeModel::Medium:
10327 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
10328 break;
10329 case CodeModel::Large:
10330 assert(Subtarget.is64Bit() && "Large code model requires LA64");
10331 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
10332 break;
10333 }
10334
10335 if (IsTailCall) {
10337 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
10338 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
10339 return Ret;
10340 }
10341
10342 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
10343 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
10344 Glue = Chain.getValue(1);
10345
10346 // Mark the end of the call, which is glued to the call itself.
10347 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
10348 Glue = Chain.getValue(1);
10349
10350 // Assign locations to each value returned by this call.
10352 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
10353 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
10354
10355 // Copy all of the result registers out of their specified physreg.
10356 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
10357 auto &VA = RVLocs[i];
10358 // Copy the value out.
10359 SDValue RetValue =
10360 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
10361 // Glue the RetValue to the end of the call sequence.
10362 Chain = RetValue.getValue(1);
10363 Glue = RetValue.getValue(2);
10364
10365 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10366 assert(VA.needsCustom());
10367 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
10368 MVT::i32, Glue);
10369 Chain = RetValue2.getValue(1);
10370 Glue = RetValue2.getValue(2);
10371 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
10372 RetValue, RetValue2);
10373 } else
10374 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
10375
10376 InVals.push_back(RetValue);
10377 }
10378
10379 return Chain;
10380}
10381
10383 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
10384 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
10385 const Type *RetTy) const {
10387 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
10388
10389 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10390 LoongArchABI::ABI ABI =
10391 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
10392 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
10393 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
10394 return false;
10395 }
10396 return true;
10397}
10398
10400 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
10402 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
10403 SelectionDAG &DAG) const {
10404 // Stores the assignment of the return value to a location.
10406
10407 // Info about the registers and stack slot.
10408 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
10409 *DAG.getContext());
10410
10411 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
10412 nullptr, CC_LoongArch);
10413 if (CallConv == CallingConv::GHC && !RVLocs.empty())
10414 report_fatal_error("GHC functions return void only");
10415 SDValue Glue;
10416 SmallVector<SDValue, 4> RetOps(1, Chain);
10417
10418 // Copy the result values into the output registers.
10419 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
10420 SDValue Val = OutVals[OutIdx];
10421 CCValAssign &VA = RVLocs[i];
10422 assert(VA.isRegLoc() && "Can only return in registers!");
10423
10424 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10425 // Handle returning f64 on LA32D with a soft float ABI.
10426 assert(VA.isRegLoc() && "Expected return via registers");
10427 assert(VA.needsCustom());
10428 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
10429 DAG.getVTList(MVT::i32, MVT::i32), Val);
10430 SDValue Lo = SplitF64.getValue(0);
10431 SDValue Hi = SplitF64.getValue(1);
10432 Register RegLo = VA.getLocReg();
10433 Register RegHi = RVLocs[++i].getLocReg();
10434
10435 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
10436 Glue = Chain.getValue(1);
10437 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
10438 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
10439 Glue = Chain.getValue(1);
10440 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
10441 } else {
10442 // Handle a 'normal' return.
10443 Val = convertValVTToLocVT(DAG, Val, VA, DL);
10444 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
10445
10446 // Guarantee that all emitted copies are stuck together.
10447 Glue = Chain.getValue(1);
10448 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
10449 }
10450 }
10451
10452 RetOps[0] = Chain; // Update chain.
10453
10454 // Add the glue node if we have it.
10455 if (Glue.getNode())
10456 RetOps.push_back(Glue);
10457
10458 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
10459}
10460
10461// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
10462// Note: The following prefixes are excluded:
10463// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
10464// as they can be represented using [x]vrepli.[whb]
10466 const APInt &SplatValue, const unsigned SplatBitSize) const {
10467 uint64_t RequiredImm = 0;
10468 uint64_t V = SplatValue.getZExtValue();
10469 if (SplatBitSize == 16 && !(V & 0x00FF)) {
10470 // 4'b0101
10471 RequiredImm = (0b10101 << 8) | (V >> 8);
10472 return {true, RequiredImm};
10473 } else if (SplatBitSize == 32) {
10474 // 4'b0001
10475 if (!(V & 0xFFFF00FF)) {
10476 RequiredImm = (0b10001 << 8) | (V >> 8);
10477 return {true, RequiredImm};
10478 }
10479 // 4'b0010
10480 if (!(V & 0xFF00FFFF)) {
10481 RequiredImm = (0b10010 << 8) | (V >> 16);
10482 return {true, RequiredImm};
10483 }
10484 // 4'b0011
10485 if (!(V & 0x00FFFFFF)) {
10486 RequiredImm = (0b10011 << 8) | (V >> 24);
10487 return {true, RequiredImm};
10488 }
10489 // 4'b0110
10490 if ((V & 0xFFFF00FF) == 0xFF) {
10491 RequiredImm = (0b10110 << 8) | (V >> 8);
10492 return {true, RequiredImm};
10493 }
10494 // 4'b0111
10495 if ((V & 0xFF00FFFF) == 0xFFFF) {
10496 RequiredImm = (0b10111 << 8) | (V >> 16);
10497 return {true, RequiredImm};
10498 }
10499 // 4'b1010
10500 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
10501 RequiredImm =
10502 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10503 return {true, RequiredImm};
10504 }
10505 } else if (SplatBitSize == 64) {
10506 // 4'b1011
10507 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
10508 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
10509 RequiredImm =
10510 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10511 return {true, RequiredImm};
10512 }
10513 // 4'b1100
10514 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
10515 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
10516 RequiredImm =
10517 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
10518 return {true, RequiredImm};
10519 }
10520 // 4'b1001
10521 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
10522 uint8_t res = 0;
10523 for (int i = 0; i < 8; ++i) {
10524 uint8_t byte = x & 0xFF;
10525 if (byte == 0 || byte == 0xFF)
10526 res |= ((byte & 1) << i);
10527 else
10528 return {false, 0};
10529 x >>= 8;
10530 }
10531 return {true, res};
10532 };
10533 auto [IsSame, Suffix] = sameBitsPreByte(V);
10534 if (IsSame) {
10535 RequiredImm = (0b11001 << 8) | Suffix;
10536 return {true, RequiredImm};
10537 }
10538 }
10539 return {false, RequiredImm};
10540}
10541
10543 EVT VT) const {
10544 if (!Subtarget.hasExtLSX())
10545 return false;
10546
10547 if (VT == MVT::f32) {
10548 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
10549 return (masked == 0x3e000000 || masked == 0x40000000);
10550 }
10551
10552 if (VT == MVT::f64) {
10553 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
10554 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
10555 }
10556
10557 return false;
10558}
10559
10560bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
10561 bool ForCodeSize) const {
10562 // TODO: Maybe need more checks here after vector extension is supported.
10563 if (VT == MVT::f32 && !Subtarget.hasBasicF())
10564 return false;
10565 if (VT == MVT::f64 && !Subtarget.hasBasicD())
10566 return false;
10567 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
10568}
10569
10571 return true;
10572}
10573
10575 return true;
10576}
10577
10578bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
10579 const Instruction *I) const {
10580 if (!Subtarget.is64Bit())
10581 return isa<LoadInst>(I) || isa<StoreInst>(I);
10582
10583 if (isa<LoadInst>(I))
10584 return true;
10585
10586 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
10587 // require fences beacuse we can use amswap_db.[w/d].
10588 Type *Ty = I->getOperand(0)->getType();
10589 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
10590 unsigned Size = Ty->getIntegerBitWidth();
10591 return (Size == 8 || Size == 16);
10592 }
10593
10594 return false;
10595}
10596
10598 LLVMContext &Context,
10599 EVT VT) const {
10600 if (!VT.isVector())
10601 return getPointerTy(DL);
10603}
10604
10606 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
10607 // Do not merge to float value size (128 or 256 bits) if no implicit
10608 // float attribute is set.
10609 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
10610 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
10611 if (NoFloat)
10612 return MemVT.getSizeInBits() <= MaxIntSize;
10613
10614 // Make sure we don't merge greater than our maximum supported vector width.
10615 if (Subtarget.hasExtLASX())
10616 MaxIntSize = 256;
10617 else if (Subtarget.hasExtLSX())
10618 MaxIntSize = 128;
10619
10620 return MemVT.getSizeInBits() <= MaxIntSize;
10621}
10622
10624 EVT VT = Y.getValueType();
10625
10626 if (VT.isVector())
10627 return Subtarget.hasExtLSX() && VT.isInteger();
10628
10629 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
10630}
10631
10634 MachineFunction &MF, unsigned Intrinsic) const {
10635 switch (Intrinsic) {
10636 default:
10637 return;
10638 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
10639 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
10640 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
10641 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
10642 IntrinsicInfo Info;
10644 Info.memVT = MVT::i32;
10645 Info.ptrVal = I.getArgOperand(0);
10646 Info.offset = 0;
10647 Info.align = Align(4);
10650 Infos.push_back(Info);
10651 return;
10652 // TODO: Add more Intrinsics later.
10653 }
10654 }
10655}
10656
10657// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
10658// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
10659// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
10660// regression, we need to implement it manually.
10663
10665 Op == AtomicRMWInst::And) &&
10666 "Unable to expand");
10667 unsigned MinWordSize = 4;
10668
10669 IRBuilder<> Builder(AI);
10670 LLVMContext &Ctx = Builder.getContext();
10671 const DataLayout &DL = AI->getDataLayout();
10672 Type *ValueType = AI->getType();
10673 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
10674
10675 Value *Addr = AI->getPointerOperand();
10676 PointerType *PtrTy = cast<PointerType>(Addr->getType());
10677 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
10678
10679 Value *AlignedAddr = Builder.CreateIntrinsic(
10680 Intrinsic::ptrmask, {PtrTy, IntTy},
10681 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
10682 "AlignedAddr");
10683
10684 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
10685 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
10686 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
10687 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
10688 Value *Mask = Builder.CreateShl(
10689 ConstantInt::get(WordType,
10690 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
10691 ShiftAmt, "Mask");
10692 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
10693 Value *ValOperand_Shifted =
10694 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
10695 ShiftAmt, "ValOperand_Shifted");
10696 Value *NewOperand;
10697 if (Op == AtomicRMWInst::And)
10698 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
10699 else
10700 NewOperand = ValOperand_Shifted;
10701
10702 AtomicRMWInst *NewAI =
10703 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
10704 AI->getOrdering(), AI->getSyncScopeID());
10705
10706 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
10707 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
10708 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
10709 AI->replaceAllUsesWith(FinalOldResult);
10710 AI->eraseFromParent();
10711}
10712
10715 const AtomicRMWInst *AI) const {
10716 // TODO: Add more AtomicRMWInst that needs to be extended.
10717
10718 // Since floating-point operation requires a non-trivial set of data
10719 // operations, use CmpXChg to expand.
10720 if (AI->isFloatingPointOperation() ||
10726
10727 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
10730 AI->getOperation() == AtomicRMWInst::Sub)) {
10732 }
10733
10734 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10735 if (Subtarget.hasLAMCAS()) {
10736 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
10740 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
10742 }
10743
10744 if (Size == 8 || Size == 16)
10747}
10748
10749static Intrinsic::ID
10751 AtomicRMWInst::BinOp BinOp) {
10752 if (GRLen == 64) {
10753 switch (BinOp) {
10754 default:
10755 llvm_unreachable("Unexpected AtomicRMW BinOp");
10757 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
10758 case AtomicRMWInst::Add:
10759 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
10760 case AtomicRMWInst::Sub:
10761 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
10763 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
10765 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
10767 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
10768 case AtomicRMWInst::Max:
10769 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
10770 case AtomicRMWInst::Min:
10771 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
10772 // TODO: support other AtomicRMWInst.
10773 }
10774 }
10775
10776 if (GRLen == 32) {
10777 switch (BinOp) {
10778 default:
10779 llvm_unreachable("Unexpected AtomicRMW BinOp");
10781 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
10782 case AtomicRMWInst::Add:
10783 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
10784 case AtomicRMWInst::Sub:
10785 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
10787 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
10789 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
10791 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
10792 case AtomicRMWInst::Max:
10793 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
10794 case AtomicRMWInst::Min:
10795 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
10796 // TODO: support other AtomicRMWInst.
10797 }
10798 }
10799
10800 llvm_unreachable("Unexpected GRLen\n");
10801}
10802
10805 const AtomicCmpXchgInst *CI) const {
10806
10807 if (Subtarget.hasLAMCAS())
10809
10811 if (Size == 8 || Size == 16)
10814}
10815
10817 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10818 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10819 unsigned GRLen = Subtarget.getGRLen();
10820 AtomicOrdering FailOrd = CI->getFailureOrdering();
10821 Value *FailureOrdering =
10822 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
10823 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
10824 if (GRLen == 64) {
10825 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
10826 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
10827 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
10828 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10829 }
10830 Type *Tys[] = {AlignedAddr->getType()};
10831 Value *Result = Builder.CreateIntrinsic(
10832 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
10833 if (GRLen == 64)
10834 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10835 return Result;
10836}
10837
10839 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10840 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10841 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
10842 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
10843 // mask, as this produces better code than the LL/SC loop emitted by
10844 // int_loongarch_masked_atomicrmw_xchg.
10845 if (AI->getOperation() == AtomicRMWInst::Xchg &&
10848 if (CVal->isZero())
10849 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
10850 Builder.CreateNot(Mask, "Inv_Mask"),
10851 AI->getAlign(), Ord);
10852 if (CVal->isMinusOne())
10853 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
10854 AI->getAlign(), Ord);
10855 }
10856
10857 unsigned GRLen = Subtarget.getGRLen();
10858 Value *Ordering =
10859 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
10860 Type *Tys[] = {AlignedAddr->getType()};
10862 AI->getModule(),
10864
10865 if (GRLen == 64) {
10866 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10867 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10868 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10869 }
10870
10871 Value *Result;
10872
10873 // Must pass the shift amount needed to sign extend the loaded value prior
10874 // to performing a signed comparison for min/max. ShiftAmt is the number of
10875 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
10876 // is the number of bits to left+right shift the value in order to
10877 // sign-extend.
10878 if (AI->getOperation() == AtomicRMWInst::Min ||
10880 const DataLayout &DL = AI->getDataLayout();
10881 unsigned ValWidth =
10882 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
10883 Value *SextShamt =
10884 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
10885 Result = Builder.CreateCall(LlwOpScwLoop,
10886 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10887 } else {
10888 Result =
10889 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
10890 }
10891
10892 if (GRLen == 64)
10893 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10894 return Result;
10895}
10896
10898 const MachineFunction &MF, EVT VT) const {
10899 VT = VT.getScalarType();
10900
10901 if (!VT.isSimple())
10902 return false;
10903
10904 switch (VT.getSimpleVT().SimpleTy) {
10905 case MVT::f32:
10906 case MVT::f64:
10907 return true;
10908 default:
10909 break;
10910 }
10911
10912 return false;
10913}
10914
10916 const Constant *PersonalityFn) const {
10917 return LoongArch::R4;
10918}
10919
10921 const Constant *PersonalityFn) const {
10922 return LoongArch::R5;
10923}
10924
10925//===----------------------------------------------------------------------===//
10926// Target Optimization Hooks
10927//===----------------------------------------------------------------------===//
10928
10930 const LoongArchSubtarget &Subtarget) {
10931 // Feature FRECIPE instrucions relative accuracy is 2^-14.
10932 // IEEE float has 23 digits and double has 52 digits.
10933 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
10934 return RefinementSteps;
10935}
10936
10937static bool
10939 assert(Subtarget.hasFrecipe() &&
10940 "Reciprocal estimate queried on unsupported target");
10941
10942 if (!VT.isSimple())
10943 return false;
10944
10945 switch (VT.getSimpleVT().SimpleTy) {
10946 case MVT::f32:
10947 // f32 is the base type for reciprocal estimate instructions.
10948 return true;
10949
10950 case MVT::f64:
10951 return Subtarget.hasBasicD();
10952
10953 case MVT::v4f32:
10954 case MVT::v2f64:
10955 return Subtarget.hasExtLSX();
10956
10957 case MVT::v8f32:
10958 case MVT::v4f64:
10959 return Subtarget.hasExtLASX();
10960
10961 default:
10962 return false;
10963 }
10964}
10965
10967 SelectionDAG &DAG, int Enabled,
10968 int &RefinementSteps,
10969 bool &UseOneConstNR,
10970 bool Reciprocal) const {
10972 "Enabled should never be Disabled here");
10973
10974 if (!Subtarget.hasFrecipe())
10975 return SDValue();
10976
10977 SDLoc DL(Operand);
10978 EVT VT = Operand.getValueType();
10979
10980 // Check supported types.
10981 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10982 return SDValue();
10983
10984 // Handle refinement steps.
10985 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10986 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10987
10988 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
10989 UseOneConstNR = false;
10990 SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
10991
10992 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
10993 // are needed (which rely on the reciprocal form), return the raw reciprocal
10994 // estimate.
10995 if (Reciprocal || RefinementSteps > 0)
10996 return Rsqrt;
10997
10998 // Otherwise, return sqrt(x) by multiplying with the operand.
10999 return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
11000}
11001
11003 SelectionDAG &DAG,
11004 int Enabled,
11005 int &RefinementSteps) const {
11007 "Enabled should never be Disabled here");
11008
11009 if (!Subtarget.hasFrecipe())
11010 return SDValue();
11011
11012 SDLoc DL(Operand);
11013 EVT VT = Operand.getValueType();
11014
11015 // Check supported types.
11016 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
11017 return SDValue();
11018
11019 if (RefinementSteps == ReciprocalEstimate::Unspecified)
11020 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11021
11022 // FRECIPE computes 1.0 / x.
11023 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
11024}
11025
11026//===----------------------------------------------------------------------===//
11027// LoongArch Inline Assembly Support
11028//===----------------------------------------------------------------------===//
11029
11031LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
11032 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
11033 //
11034 // 'f': A floating-point register (if available).
11035 // 'k': A memory operand whose address is formed by a base register and
11036 // (optionally scaled) index register.
11037 // 'l': A signed 16-bit constant.
11038 // 'm': A memory operand whose address is formed by a base register and
11039 // offset that is suitable for use in instructions with the same
11040 // addressing mode as st.w and ld.w.
11041 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
11042 // instruction)
11043 // 'I': A signed 12-bit constant (for arithmetic instructions).
11044 // 'J': Integer zero.
11045 // 'K': An unsigned 12-bit constant (for logic instructions).
11046 // "ZB": An address that is held in a general-purpose register. The offset is
11047 // zero.
11048 // "ZC": A memory operand whose address is formed by a base register and
11049 // offset that is suitable for use in instructions with the same
11050 // addressing mode as ll.w and sc.w.
11051 if (Constraint.size() == 1) {
11052 switch (Constraint[0]) {
11053 default:
11054 break;
11055 case 'f':
11056 case 'q':
11057 return C_RegisterClass;
11058 case 'l':
11059 case 'I':
11060 case 'J':
11061 case 'K':
11062 return C_Immediate;
11063 case 'k':
11064 return C_Memory;
11065 }
11066 }
11067
11068 if (Constraint == "ZC" || Constraint == "ZB")
11069 return C_Memory;
11070
11071 // 'm' is handled here.
11072 return TargetLowering::getConstraintType(Constraint);
11073}
11074
11075InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
11076 StringRef ConstraintCode) const {
11077 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
11081 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
11082}
11083
11084std::pair<unsigned, const TargetRegisterClass *>
11085LoongArchTargetLowering::getRegForInlineAsmConstraint(
11086 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
11087 // First, see if this is a constraint that directly corresponds to a LoongArch
11088 // register class.
11089 if (Constraint.size() == 1) {
11090 switch (Constraint[0]) {
11091 case 'r':
11092 // TODO: Support fixed vectors up to GRLen?
11093 if (VT.isVector())
11094 break;
11095 return std::make_pair(0U, &LoongArch::GPRRegClass);
11096 case 'q':
11097 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
11098 case 'f':
11099 if (Subtarget.hasBasicF() && VT == MVT::f32)
11100 return std::make_pair(0U, &LoongArch::FPR32RegClass);
11101 if (Subtarget.hasBasicD() && VT == MVT::f64)
11102 return std::make_pair(0U, &LoongArch::FPR64RegClass);
11103 if (Subtarget.hasExtLSX() &&
11104 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
11105 return std::make_pair(0U, &LoongArch::LSX128RegClass);
11106 if (Subtarget.hasExtLASX() &&
11107 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
11108 return std::make_pair(0U, &LoongArch::LASX256RegClass);
11109 break;
11110 default:
11111 break;
11112 }
11113 }
11114
11115 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
11116 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
11117 // constraints while the official register name is prefixed with a '$'. So we
11118 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
11119 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
11120 // case insensitive, so no need to convert the constraint to upper case here.
11121 //
11122 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
11123 // decode the usage of register name aliases into their official names. And
11124 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
11125 // official register names.
11126 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
11127 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
11128 bool IsFP = Constraint[2] == 'f';
11129 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
11130 std::pair<unsigned, const TargetRegisterClass *> R;
11132 TRI, join_items("", Temp.first, Temp.second), VT);
11133 // Match those names to the widest floating point register type available.
11134 if (IsFP) {
11135 unsigned RegNo = R.first;
11136 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
11137 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
11138 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
11139 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
11140 }
11141 }
11142 }
11143 return R;
11144 }
11145
11146 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
11147}
11148
11149void LoongArchTargetLowering::LowerAsmOperandForConstraint(
11150 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
11151 SelectionDAG &DAG) const {
11152 // Currently only support length 1 constraints.
11153 if (Constraint.size() == 1) {
11154 switch (Constraint[0]) {
11155 case 'l':
11156 // Validate & create a 16-bit signed immediate operand.
11157 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11158 uint64_t CVal = C->getSExtValue();
11159 if (isInt<16>(CVal))
11160 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
11161 Subtarget.getGRLenVT()));
11162 }
11163 return;
11164 case 'I':
11165 // Validate & create a 12-bit signed immediate operand.
11166 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11167 uint64_t CVal = C->getSExtValue();
11168 if (isInt<12>(CVal))
11169 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
11170 Subtarget.getGRLenVT()));
11171 }
11172 return;
11173 case 'J':
11174 // Validate & create an integer zero operand.
11175 if (auto *C = dyn_cast<ConstantSDNode>(Op))
11176 if (C->getZExtValue() == 0)
11177 Ops.push_back(
11178 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
11179 return;
11180 case 'K':
11181 // Validate & create a 12-bit unsigned immediate operand.
11182 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11183 uint64_t CVal = C->getZExtValue();
11184 if (isUInt<12>(CVal))
11185 Ops.push_back(
11186 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
11187 }
11188 return;
11189 default:
11190 break;
11191 }
11192 }
11194}
11195
11196#define GET_REGISTER_MATCHER
11197#include "LoongArchGenAsmMatcher.inc"
11198
11201 const MachineFunction &MF) const {
11202 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
11203 std::string NewRegName = Name.second.str();
11204 Register Reg = MatchRegisterAltName(NewRegName);
11205 if (!Reg)
11206 Reg = MatchRegisterName(NewRegName);
11207 if (!Reg)
11208 return Reg;
11209 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
11210 if (!ReservedRegs.test(Reg))
11211 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
11212 StringRef(RegName) + "\"."));
11213 return Reg;
11214}
11215
11217 EVT VT, SDValue C) const {
11218 // TODO: Support vectors.
11219 if (!VT.isScalarInteger())
11220 return false;
11221
11222 // Omit the optimization if the data size exceeds GRLen.
11223 if (VT.getSizeInBits() > Subtarget.getGRLen())
11224 return false;
11225
11226 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
11227 const APInt &Imm = ConstNode->getAPIntValue();
11228 // Break MUL into (SLLI + ADD/SUB) or ALSL.
11229 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
11230 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
11231 return true;
11232 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
11233 if (ConstNode->hasOneUse() &&
11234 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
11235 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
11236 return true;
11237 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
11238 // in which the immediate has two set bits. Or Break (MUL x, imm)
11239 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
11240 // equals to (1 << s0) - (1 << s1).
11241 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
11242 unsigned Shifts = Imm.countr_zero();
11243 // Reject immediates which can be composed via a single LUI.
11244 if (Shifts >= 12)
11245 return false;
11246 // Reject multiplications can be optimized to
11247 // (SLLI (ALSL x, x, 1/2/3/4), s).
11248 APInt ImmPop = Imm.ashr(Shifts);
11249 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
11250 return false;
11251 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
11252 // since it needs one more instruction than other 3 cases.
11253 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
11254 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
11255 (ImmSmall - Imm).isPowerOf2())
11256 return true;
11257 }
11258 }
11259
11260 return false;
11261}
11262
11264 const AddrMode &AM,
11265 Type *Ty, unsigned AS,
11266 Instruction *I) const {
11267 // LoongArch has four basic addressing modes:
11268 // 1. reg
11269 // 2. reg + 12-bit signed offset
11270 // 3. reg + 14-bit signed offset left-shifted by 2
11271 // 4. reg1 + reg2
11272 // TODO: Add more checks after support vector extension.
11273
11274 // No global is ever allowed as a base.
11275 if (AM.BaseGV)
11276 return false;
11277
11278 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
11279 // with `UAL` feature.
11280 if (!isInt<12>(AM.BaseOffs) &&
11281 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
11282 return false;
11283
11284 switch (AM.Scale) {
11285 case 0:
11286 // "r+i" or just "i", depending on HasBaseReg.
11287 break;
11288 case 1:
11289 // "r+r+i" is not allowed.
11290 if (AM.HasBaseReg && AM.BaseOffs)
11291 return false;
11292 // Otherwise we have "r+r" or "r+i".
11293 break;
11294 case 2:
11295 // "2*r+r" or "2*r+i" is not allowed.
11296 if (AM.HasBaseReg || AM.BaseOffs)
11297 return false;
11298 // Allow "2*r" as "r+r".
11299 break;
11300 default:
11301 return false;
11302 }
11303
11304 return true;
11305}
11306
11308 return isInt<12>(Imm);
11309}
11310
11312 return isInt<12>(Imm);
11313}
11314
11316 // Zexts are free if they can be combined with a load.
11317 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
11318 // poorly with type legalization of compares preferring sext.
11319 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
11320 EVT MemVT = LD->getMemoryVT();
11321 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
11322 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
11323 LD->getExtensionType() == ISD::ZEXTLOAD))
11324 return true;
11325 }
11326
11327 return TargetLowering::isZExtFree(Val, VT2);
11328}
11329
11331 EVT DstVT) const {
11332 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
11333}
11334
11336 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
11337}
11338
11340 // TODO: Support vectors.
11341 if (Y.getValueType().isVector())
11342 return false;
11343
11344 return !isa<ConstantSDNode>(Y);
11345}
11346
11348 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
11349 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
11350}
11351
11353 Type *Ty, bool IsSigned) const {
11354 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
11355 return true;
11356
11357 return IsSigned;
11358}
11359
11361 // Return false to suppress the unnecessary extensions if the LibCall
11362 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
11363 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
11364 Type.getSizeInBits() < Subtarget.getGRLen()))
11365 return false;
11366 return true;
11367}
11368
11369// memcpy, and other memory intrinsics, typically tries to use wider load/store
11370// if the source/dest is aligned and the copy size is large enough. We therefore
11371// want to align such objects passed to memory intrinsics.
11373 unsigned &MinSize,
11374 Align &PrefAlign) const {
11375 if (!isa<MemIntrinsic>(CI))
11376 return false;
11377
11378 if (Subtarget.is64Bit()) {
11379 MinSize = 8;
11380 PrefAlign = Align(8);
11381 } else {
11382 MinSize = 4;
11383 PrefAlign = Align(4);
11384 }
11385
11386 return true;
11387}
11388
11391 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
11392 VT.getVectorElementType() != MVT::i1)
11393 return TypeWidenVector;
11394
11396}
11397
11398bool LoongArchTargetLowering::splitValueIntoRegisterParts(
11399 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
11400 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
11401 bool IsABIRegCopy = CC.has_value();
11402 EVT ValueVT = Val.getValueType();
11403
11404 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11405 PartVT == MVT::f32) {
11406 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
11407 // nan, and cast to f32.
11408 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
11409 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
11410 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
11411 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
11412 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
11413 Parts[0] = Val;
11414 return true;
11415 }
11416
11417 return false;
11418}
11419
11420SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
11421 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
11422 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
11423 bool IsABIRegCopy = CC.has_value();
11424
11425 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11426 PartVT == MVT::f32) {
11427 SDValue Val = Parts[0];
11428
11429 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
11430 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
11431 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
11432 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
11433 return Val;
11434 }
11435
11436 return SDValue();
11437}
11438
11439MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
11440 CallingConv::ID CC,
11441 EVT VT) const {
11442 // Use f32 to pass f16.
11443 if (VT == MVT::f16 && Subtarget.hasBasicF())
11444 return MVT::f32;
11445
11447}
11448
11449unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
11450 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
11451 // Use f32 to pass f16.
11452 if (VT == MVT::f16 && Subtarget.hasBasicF())
11453 return 1;
11454
11456}
11457
11459 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
11460 const SelectionDAG &DAG, unsigned Depth) const {
11461 unsigned Opc = Op.getOpcode();
11462 Known.resetAll();
11463 switch (Opc) {
11464 default:
11465 break;
11466 case LoongArchISD::VPICK_ZEXT_ELT: {
11467 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
11468 EVT VT = cast<VTSDNode>(Op->getOperand(2))->getVT();
11469 unsigned VTBits = VT.getScalarSizeInBits();
11470 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
11471 Known.Zero.setBitsFrom(VTBits);
11472 break;
11473 }
11474 }
11475}
11476
11478 SDValue Op, const APInt &OriginalDemandedBits,
11479 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
11480 unsigned Depth) const {
11481 EVT VT = Op.getValueType();
11482 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
11483 unsigned Opc = Op.getOpcode();
11484 switch (Opc) {
11485 default:
11486 break;
11487 case LoongArchISD::CRC_W_B_W:
11488 case LoongArchISD::CRC_W_H_W:
11489 case LoongArchISD::CRCC_W_B_W:
11490 case LoongArchISD::CRCC_W_H_W: {
11491 KnownBits KnownSrc;
11492 APInt DemandedSrcBits =
11493 APInt::getLowBitsSet(BitWidth, (Opc == LoongArchISD::CRC_W_B_W ||
11494 Opc == LoongArchISD::CRCC_W_B_W)
11495 ? 8
11496 : 16);
11497 return SimplifyDemandedBits(Op.getOperand(1), DemandedSrcBits,
11498 OriginalDemandedElts, KnownSrc, TLO, Depth + 1);
11499 }
11500 case LoongArchISD::VMSKLTZ:
11501 case LoongArchISD::XVMSKLTZ: {
11502 SDValue Src = Op.getOperand(0);
11503 MVT SrcVT = Src.getSimpleValueType();
11504 unsigned SrcBits = SrcVT.getScalarSizeInBits();
11505 unsigned NumElts = SrcVT.getVectorNumElements();
11506
11507 // If we don't need the sign bits at all just return zero.
11508 if (OriginalDemandedBits.countr_zero() >= NumElts)
11509 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
11510
11511 // Only demand the vector elements of the sign bits we need.
11512 APInt KnownUndef, KnownZero;
11513 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
11514 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
11515 TLO, Depth + 1))
11516 return true;
11517
11518 Known.Zero = KnownZero.zext(BitWidth);
11519 Known.Zero.setHighBits(BitWidth - NumElts);
11520
11521 // [X]VMSKLTZ only uses the MSB from each vector element.
11522 KnownBits KnownSrc;
11523 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
11524 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
11525 Depth + 1))
11526 return true;
11527
11528 if (KnownSrc.One[SrcBits - 1])
11529 Known.One.setLowBits(NumElts);
11530 else if (KnownSrc.Zero[SrcBits - 1])
11531 Known.Zero.setLowBits(NumElts);
11532
11533 // Attempt to avoid multi-use ops if we don't need anything from it.
11535 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
11536 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
11537 return false;
11538 }
11539 }
11540
11542 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
11543}
11544
11546 unsigned Opc = VecOp.getOpcode();
11547
11548 // Assume target opcodes can't be scalarized.
11549 // TODO - do we have any exceptions?
11550 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
11551 return false;
11552
11553 // If the vector op is not supported, try to convert to scalar.
11554 EVT VecVT = VecOp.getValueType();
11556 return true;
11557
11558 // If the vector op is supported, but the scalar op is not, the transform may
11559 // not be worthwhile.
11560 EVT ScalarVT = VecVT.getScalarType();
11561 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
11562}
11563
11565 unsigned Index) const {
11567 return false;
11568
11569 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
11570 return Index == 0;
11571}
11572
11574 unsigned Index) const {
11575 EVT EltVT = VT.getScalarType();
11576
11577 // Extract a scalar FP value from index 0 of a vector is free.
11578 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
11579}
11580
11582 const MachineFunction &MF) const {
11583
11584 // If the function specifically requests inline stack probes, emit them.
11585 if (MF.getFunction().hasFnAttribute("probe-stack"))
11586 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11587 "inline-asm";
11588
11589 return false;
11590}
11591
11593 Align StackAlign) const {
11594 // The default stack probe size is 4096 if the function has no
11595 // stack-probe-size attribute.
11596 const Function &Fn = MF.getFunction();
11597 unsigned StackProbeSize =
11598 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
11599 // Round down to the stack alignment.
11600 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
11601 return StackProbeSize ? StackProbeSize : StackAlign.value();
11602}
11603
11604SDValue
11605LoongArchTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
11606 SelectionDAG &DAG) const {
11608 if (!hasInlineStackProbe(MF))
11609 return SDValue();
11610
11611 const MVT GRLenVT = Subtarget.getGRLenVT();
11612 // Get the inputs.
11613 SDValue Chain = Op.getOperand(0);
11614 SDValue Size = Op.getOperand(1);
11615
11616 const MaybeAlign Align =
11617 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
11618 const SDLoc dl(Op);
11619 const EVT VT = Op.getValueType();
11620
11621 // Construct the new SP value in a GPR.
11622 SDValue SP = DAG.getCopyFromReg(Chain, dl, LoongArch::R3, GRLenVT);
11623 Chain = SP.getValue(1);
11624 SP = DAG.getNode(ISD::SUB, dl, GRLenVT, SP, Size);
11625 if (Align)
11626 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11627 DAG.getSignedConstant(-Align->value(), dl, VT));
11628
11629 // Set the real SP to the new value with a probing loop.
11630 Chain = DAG.getNode(LoongArchISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
11631 return DAG.getMergeValues({SP, Chain}, dl);
11632}
11633
11636 MachineBasicBlock *MBB) const {
11637 MachineFunction &MF = *MBB->getParent();
11638 MachineBasicBlock::iterator MBBI = MI.getIterator();
11639 DebugLoc DL = MBB->findDebugLoc(MBBI);
11640 const Register TargetReg = MI.getOperand(0).getReg();
11641
11642 const LoongArchInstrInfo *TII = Subtarget.getInstrInfo();
11643 const bool IsLA64 = Subtarget.is64Bit();
11644 const Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
11645 const LoongArchTargetLowering *TLI = Subtarget.getTargetLowering();
11646 const uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
11647
11648 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
11649 MachineBasicBlock *const LoopTestMBB =
11650 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11651 MF.insert(MBBInsertPoint, LoopTestMBB);
11652 MachineBasicBlock *const ExitMBB =
11653 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11654 MF.insert(MBBInsertPoint, ExitMBB);
11655 const Register SPReg = LoongArch::R3;
11656 const Register ScratchReg =
11657 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
11658
11659 // ScratchReg = ProbeSize
11660 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
11661
11662 // LoopTest:
11663 // sub.{w/d} $sp, $sp, ScratchReg
11664 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11665 TII->get(IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W), SPReg)
11666 .addReg(SPReg)
11667 .addReg(ScratchReg);
11668
11669 // st.{w/d} $zero, $sp, 0
11670 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11671 TII->get(IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
11672 .addReg(LoongArch::R0)
11673 .addReg(SPReg)
11674 .addImm(0);
11675
11676 // bltu TargetReg, $sp, LoopTest
11677 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(LoongArch::BLTU))
11678 .addReg(TargetReg)
11679 .addReg(SPReg)
11680 .addMBB(LoopTestMBB);
11681
11682 // move $sp, TargetReg
11683 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(LoongArch::OR), SPReg)
11684 .addReg(TargetReg)
11685 .addReg(LoongArch::R0);
11686
11687 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
11689
11690 LoopTestMBB->addSuccessor(ExitMBB);
11691 LoopTestMBB->addSuccessor(LoopTestMBB);
11692 MBB->addSuccessor(LoopTestMBB);
11693
11694 MI.eraseFromParent();
11695 MF.getInfo<LoongArchMachineFunctionInfo>()->setDynamicAllocation();
11696 return ExitMBB->begin()->getParent();
11697}
static MCRegister MatchRegisterName(StringRef Name)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSELECT_CCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static bool isSigned(unsigned Opcode)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue performHorizWideningCombine(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static SDValue performDemandedBitsCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static bool buildVPERMIInfo(ArrayRef< int > Mask, SDValue V1, SDValue V2, SmallVectorImpl< SDValue > &SrcVec, unsigned &MaskImm)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue matchLowHalfOf128BitLanes(SDValue N)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue performFP_TO_INTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue matchDeinterleaveBuildVector(SDValue N, unsigned &StartIndex)
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1527
bool isZero() const
Definition APFloat.h:1540
APInt bitcastToAPInt() const
Definition APFloat.h:1436
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition Argument.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
size_type count() const
Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:501
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
iterator_range< arg_iterator > args()
Definition Function.h:892
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:759
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:771
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2900
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
void setIncomingIndirectArg(unsigned ArgIndex, Register Reg)
Register getIncomingIndirectArg(unsigned ArgIndex) const
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this function.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:827
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:787
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:861
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:888
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:918
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:804
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:858
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:819
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:896
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:986
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:813
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:934
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:967
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:929
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:864
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:841
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:235
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...