LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
357 }
358 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
360 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
362 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
365 }
366 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
374 VT, Expand);
382 }
384 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
385 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
386 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
387 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
388
389 for (MVT VT :
390 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
391 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
401 }
404 // We want to legalize this to an f64 load rather than an i64 load.
405 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
406 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16})
408 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v16i32, MVT::v8i64,
409 MVT::v16i64})
411 }
412
413 // Set operations for 'LASX' feature.
414
415 if (Subtarget.hasExtLASX()) {
416 for (MVT VT : LASXVTs) {
420
426
430 }
431 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
434 Legal);
436 VT, Legal);
443 Expand);
459 }
460 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
462 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
464 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
467 }
468 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
476 VT, Expand);
484 }
487 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) {
490 }
491 for (MVT VT :
492 {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) {
495 }
496 }
497
498 // Set DAG combine for LA32 and LA64.
499 if (Subtarget.hasBasicF()) {
501 }
502
507
508 // Set DAG combine for 'LSX' feature.
509
510 if (Subtarget.hasExtLSX()) {
515 }
516
517 // Set DAG combine for 'LASX' feature.
518 if (Subtarget.hasExtLASX()) {
523 }
524
525 // Compute derived properties from the register classes.
526 computeRegisterProperties(Subtarget.getRegisterInfo());
527
529
532
533 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
534
536
537 // Function alignments.
539 // Set preferred alignments.
540 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
541 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
542 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
543
544 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
545 if (Subtarget.hasLAMCAS())
547
548 if (Subtarget.hasSCQ()) {
551 }
552
553 // Disable strict node mutation.
554 IsStrictFPEnabled = true;
555}
556
558 const GlobalAddressSDNode *GA) const {
559 // In order to maximise the opportunity for common subexpression elimination,
560 // keep a separate ADD node for the global address offset instead of folding
561 // it in the global address node. Later peephole optimisations may choose to
562 // fold it back in when profitable.
563 return false;
564}
565
567 SelectionDAG &DAG) const {
568 switch (Op.getOpcode()) {
570 return lowerATOMIC_FENCE(Op, DAG);
572 return lowerEH_DWARF_CFA(Op, DAG);
574 return lowerGlobalAddress(Op, DAG);
576 return lowerGlobalTLSAddress(Op, DAG);
578 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
580 return lowerINTRINSIC_W_CHAIN(Op, DAG);
582 return lowerINTRINSIC_VOID(Op, DAG);
584 return lowerBlockAddress(Op, DAG);
585 case ISD::JumpTable:
586 return lowerJumpTable(Op, DAG);
587 case ISD::SHL_PARTS:
588 return lowerShiftLeftParts(Op, DAG);
589 case ISD::SRA_PARTS:
590 return lowerShiftRightParts(Op, DAG, true);
591 case ISD::SRL_PARTS:
592 return lowerShiftRightParts(Op, DAG, false);
594 return lowerConstantPool(Op, DAG);
595 case ISD::FP_TO_SINT:
596 return lowerFP_TO_SINT(Op, DAG);
597 case ISD::BITCAST:
598 return lowerBITCAST(Op, DAG);
599 case ISD::UINT_TO_FP:
600 return lowerUINT_TO_FP(Op, DAG);
601 case ISD::SINT_TO_FP:
602 return lowerSINT_TO_FP(Op, DAG);
603 case ISD::VASTART:
604 return lowerVASTART(Op, DAG);
605 case ISD::FRAMEADDR:
606 return lowerFRAMEADDR(Op, DAG);
607 case ISD::RETURNADDR:
608 return lowerRETURNADDR(Op, DAG);
610 return lowerWRITE_REGISTER(Op, DAG);
612 return lowerINSERT_VECTOR_ELT(Op, DAG);
614 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
616 return lowerBUILD_VECTOR(Op, DAG);
618 return lowerCONCAT_VECTORS(Op, DAG);
620 return lowerVECTOR_SHUFFLE(Op, DAG);
621 case ISD::BITREVERSE:
622 return lowerBITREVERSE(Op, DAG);
624 return lowerSCALAR_TO_VECTOR(Op, DAG);
625 case ISD::PREFETCH:
626 return lowerPREFETCH(Op, DAG);
627 case ISD::SELECT:
628 return lowerSELECT(Op, DAG);
629 case ISD::BRCOND:
630 return lowerBRCOND(Op, DAG);
631 case ISD::FP_TO_FP16:
632 return lowerFP_TO_FP16(Op, DAG);
633 case ISD::FP16_TO_FP:
634 return lowerFP16_TO_FP(Op, DAG);
635 case ISD::FP_TO_BF16:
636 return lowerFP_TO_BF16(Op, DAG);
637 case ISD::BF16_TO_FP:
638 return lowerBF16_TO_FP(Op, DAG);
640 return lowerVECREDUCE_ADD(Op, DAG);
641 case ISD::ROTL:
642 case ISD::ROTR:
643 return lowerRotate(Op, DAG);
651 return lowerVECREDUCE(Op, DAG);
652 case ISD::ConstantFP:
653 return lowerConstantFP(Op, DAG);
654 case ISD::SETCC:
655 return lowerSETCC(Op, DAG);
656 case ISD::FP_ROUND:
657 return lowerFP_ROUND(Op, DAG);
658 case ISD::FP_EXTEND:
659 return lowerFP_EXTEND(Op, DAG);
661 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
663 return lowerDYNAMIC_STACKALLOC(Op, DAG);
664 }
665 return SDValue();
666}
667
668// Helper to attempt to return a cheaper, bit-inverted version of \p V.
670 // TODO: don't always ignore oneuse constraints.
671 V = peekThroughBitcasts(V);
672 EVT VT = V.getValueType();
673
674 // Match not(xor X, -1) -> X.
675 if (V.getOpcode() == ISD::XOR &&
676 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
677 isAllOnesConstant(V.getOperand(1))))
678 return V.getOperand(0);
679
680 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
681 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
682 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
683 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
684 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
685 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
686 V.getOperand(1));
687 }
688 }
689
690 // Match not(SplatVector(not(X)) -> SplatVector(X).
691 if (V.getOpcode() == ISD::BUILD_VECTOR) {
692 if (SDValue SplatValue =
693 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
694 if (!V->isOnlyUserOf(SplatValue.getNode()))
695 return SDValue();
696
697 if (SDValue Not = isNOT(SplatValue, DAG)) {
698 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
699 return DAG.getSplat(VT, SDLoc(Not), Not);
700 }
701 }
702 }
703
704 // Match not(or(not(X),not(Y))) -> and(X, Y).
705 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
706 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
707 // TODO: Handle cases with single NOT operand -> VANDN
708 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
709 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
710 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
711 DAG.getBitcast(VT, Op1));
712 }
713
714 // TODO: Add more matching patterns. Such as,
715 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
716 // not(slt(C, X)) -> slt(X - 1, C)
717 return SDValue();
718}
719
720// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
721// LoongArchISD::VFCVT. For example:
722// x1 = fp_round x, 0
723// y1 = fp_round y, 0
724// z = concat_vectors x1, y1
725// Or
726// x1 = LoongArch::VFCVT undef, x
727// y1 = LoongArch::VFCVT undef, y
728// z = LoongArchISD::VPACKEV y1, x1; or LoongArchISD::VPERMI y1, x1, 68
729// can be combined to:
730// z = LoongArch::VFCVT y, x
732 const LoongArchSubtarget &Subtarget) {
733 assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
734 (N->getOpcode() == LoongArchISD::VPACKEV) ||
735 (N->getOpcode() == LoongArchISD::VPERMI)) &&
736 "Invalid Node");
737
738 SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
739 SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
740 unsigned Opcode0 = Op0.getOpcode();
741 unsigned Opcode1 = Op1.getOpcode();
742 if (Opcode0 != Opcode1)
743 return SDValue();
744
745 if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
746 return SDValue();
747
748 // Check if two nodes have only one use.
749 if (!Op0.hasOneUse() || !Op1.hasOneUse())
750 return SDValue();
751
752 EVT VT = N.getValueType();
753 EVT SVT0 = Op0.getValueType();
754 EVT SVT1 = Op1.getValueType();
755 // Check if two nodes have the same result type.
756 if (SVT0 != SVT1)
757 return SDValue();
758
759 // Check if two nodes have the same operand type.
760 EVT SSVT0 = Op0.getOperand(0).getValueType();
761 EVT SSVT1 = Op1.getOperand(0).getValueType();
762 if (SSVT0 != SSVT1)
763 return SDValue();
764
765 if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
766 if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
767 SSVT0 == MVT::v4f64) {
768 // A vector_shuffle is required in the final step, as xvfcvt instruction
769 // operates on each 128-bit segament as a lane.
770 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
771 Op1.getOperand(0), Op0.getOperand(0));
772 SDValue Undef = DAG.getUNDEF(Res.getValueType());
773 // After VFCVT, the high part of Res comes from the high parts of Op0 and
774 // Op1, and the low part comes from the low parts of Op0 and Op1. However,
775 // the desired order requires Op0 to fully occupy the lower half and Op1
776 // the upper half of Res. The Mask reorders the elements of Res to achieve
777 // this:
778 // - The first four elements (0, 1, 4, 5) come from Op0.
779 // - The next four elements (2, 3, 6, 7) come from Op1.
780 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
781 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
782 return DAG.getBitcast(VT, Res);
783 }
784 }
785
786 if ((N->getOpcode() == LoongArchISD::VPACKEV ||
787 N->getOpcode() == LoongArchISD::VPERMI) &&
788 Opcode0 == LoongArchISD::VFCVT) {
789 // For VPACKEV or VPERMI, check if the first operation of VFCVT is undef.
790 if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
791 return SDValue();
792
793 if (!Subtarget.hasExtLSX() || SVT0 != MVT::v4f32 || SSVT0 != MVT::v2f64)
794 return SDValue();
795
796 if (N->getOpcode() == LoongArchISD::VPACKEV &&
797 (VT == MVT::v2i64 || VT == MVT::v2f64)) {
798 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
799 Op0.getOperand(1), Op1.getOperand(1));
800 return DAG.getBitcast(VT, Res);
801 }
802
803 if (N->getOpcode() == LoongArchISD::VPERMI && VT == MVT::v4f32) {
804 int64_t Imm = cast<ConstantSDNode>(N->getOperand(2))->getSExtValue();
805 if (Imm != 68)
806 return SDValue();
807 return DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Op0.getOperand(1),
808 Op1.getOperand(1));
809 }
810 }
811
812 return SDValue();
813}
814
815SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
816 SelectionDAG &DAG) const {
817 SDLoc DL(Op);
818 SDValue In = Op.getOperand(0);
819 MVT VT = Op.getSimpleValueType();
820 MVT SVT = In.getSimpleValueType();
821
822 if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
823 SDValue Lo, Hi;
824 std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
825 return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
826 }
827
828 return SDValue();
829}
830
831SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op,
832 SelectionDAG &DAG) const {
833
834 SDLoc DL(Op);
835 EVT VT = Op.getValueType();
836 SDValue Src = Op->getOperand(0);
837 EVT SVT = Src.getValueType();
838
839 bool V2F32ToV2F64 =
840 VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX();
841 bool V4F32ToV4F64 =
842 VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX();
843 if (!V2F32ToV2F64 && !V4F32ToV4F64)
844 return SDValue();
845
846 // Check if Op is the high part of vector.
847 auto CheckVecHighPart = [](SDValue Op) {
849 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
850 SDValue SOp = Op.getOperand(0);
851 EVT SVT = SOp.getValueType();
852 if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0))
853 return SDValue();
854
855 const uint64_t Imm = Op.getConstantOperandVal(1);
856 if (Imm == SVT.getVectorNumElements() / 2)
857 return SOp;
858 return SDValue();
859 }
860 return SDValue();
861 };
862
863 unsigned Opcode;
864 SDValue VFCVTOp;
865 EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT();
866 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
867
868 // If the operand of ISD::FP_EXTEND comes from the high part of vector,
869 // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL.
870 if (SDValue V = CheckVecHighPart(Src)) {
871 assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() &&
872 "Unexpected wide vector");
873 Opcode = LoongArchISD::VFCVTH;
874 VFCVTOp = DAG.getBitcast(WideOpVT, V);
875 } else {
876 Opcode = LoongArchISD::VFCVTL;
877 VFCVTOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideOpVT,
878 DAG.getUNDEF(WideOpVT), Src, ZeroIdx);
879 }
880
881 // v2f64 = fp_extend v2f32
882 if (V2F32ToV2F64)
883 return DAG.getNode(Opcode, DL, VT, VFCVTOp);
884
885 // v4f64 = fp_extend v4f32
886 if (V4F32ToV4F64) {
887 // XVFCVT instruction operates on each 128-bit segment as a lane, so a
888 // vector_shuffle is required firstly.
889 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
890 SDValue Res = DAG.getVectorShuffle(WideOpVT, DL, VFCVTOp,
891 DAG.getUNDEF(WideOpVT), Mask);
892 Res = DAG.getNode(Opcode, DL, VT, Res);
893 return Res;
894 }
895
896 return SDValue();
897}
898
899SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
900 SelectionDAG &DAG) const {
901 EVT VT = Op.getValueType();
902 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
903 const APFloat &FPVal = CFP->getValueAPF();
904 SDLoc DL(CFP);
905
906 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
907 (VT == MVT::f64 && Subtarget.hasBasicD()));
908
909 // If value is 0.0 or -0.0, just ignore it.
910 if (FPVal.isZero())
911 return SDValue();
912
913 // If lsx enabled, use cheaper 'vldi' instruction if possible.
914 if (isFPImmVLDILegal(FPVal, VT))
915 return SDValue();
916
917 // Construct as integer, and move to float register.
918 APInt INTVal = FPVal.bitcastToAPInt();
919
920 // If more than MaterializeFPImmInsNum instructions will be used to
921 // generate the INTVal and move it to float register, fallback to
922 // use floating point load from the constant pool.
924 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
925 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
926 return SDValue();
927
928 switch (VT.getSimpleVT().SimpleTy) {
929 default:
930 llvm_unreachable("Unexpected floating point type!");
931 break;
932 case MVT::f32: {
933 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
934 if (Subtarget.is64Bit())
935 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
936 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
937 : LoongArchISD::MOVGR2FR_W,
938 DL, VT, NewVal);
939 }
940 case MVT::f64: {
941 if (Subtarget.is64Bit()) {
942 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
943 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
944 }
945 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
946 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
947 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
948 }
949 }
950
951 return SDValue();
952}
953
954// Ensure SETCC result and operand have the same bit width; isel does not
955// support mismatched widths.
956SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
957 SelectionDAG &DAG) const {
958 SDLoc DL(Op);
959 EVT ResultVT = Op.getValueType();
960 EVT OperandVT = Op.getOperand(0).getValueType();
961
962 EVT SetCCResultVT =
963 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
964
965 if (ResultVT == SetCCResultVT)
966 return Op;
967
968 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
969 "SETCC operands must have the same type!");
970
971 SDValue SetCCNode =
972 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
973 Op.getOperand(1), Op.getOperand(2));
974
975 if (ResultVT.bitsGT(SetCCResultVT))
976 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
977 else if (ResultVT.bitsLT(SetCCResultVT))
978 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
979
980 return SetCCNode;
981}
982
983// Lower sext_invec using vslti instructions.
984// For example:
985// %b = sext <4 x i16> %a to <4 x i32>
986// can be lowered to:
987// VSLTI_H vr2, vr1, 0
988// VILVL.H vr1, vr2, vr1
989SDValue LoongArchTargetLowering::lowerSIGN_EXTEND_VECTOR_INREG(
990 SDValue Op, SelectionDAG &DAG) const {
991 SDLoc DL(Op);
992 SDValue Src = Op.getOperand(0);
993 MVT SrcVT = Src.getSimpleValueType();
994 MVT DstVT = Op.getSimpleValueType();
995
996 if (!SrcVT.is128BitVector())
997 return SDValue();
998
999 // lower to VSLTI + VILVL if extend could be done in single step.
1000 if (DstVT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits() == 2) {
1001 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1002 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Src, Zero,
1003 DAG.getCondCode(ISD::SETLT));
1004 SDValue LoInterleaved =
1005 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Src);
1006
1007 return DAG.getBitcast(DstVT, LoInterleaved);
1008 }
1009
1010 return SDValue();
1011}
1012
1013// Lower vecreduce_add using vhaddw instructions.
1014// For Example:
1015// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
1016// can be lowered to:
1017// VHADDW_D_W vr0, vr0, vr0
1018// VHADDW_Q_D vr0, vr0, vr0
1019// VPICKVE2GR_D a0, vr0, 0
1020// ADDI_W a0, a0, 0
1021SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
1022 SelectionDAG &DAG) const {
1023
1024 SDLoc DL(Op);
1025 MVT OpVT = Op.getSimpleValueType();
1026 SDValue Val = Op.getOperand(0);
1027
1028 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1029 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1030 unsigned ResBits = OpVT.getScalarSizeInBits();
1031
1032 unsigned LegalVecSize = 128;
1033 bool isLASX256Vector =
1034 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
1035
1036 // Ensure operand type legal or enable it legal.
1037 while (!isTypeLegal(Val.getSimpleValueType())) {
1038 Val = DAG.WidenVector(Val, DL);
1039 }
1040
1041 // NumEles is designed for iterations count, v4i32 for LSX
1042 // and v8i32 for LASX should have the same count.
1043 if (isLASX256Vector) {
1044 NumEles /= 2;
1045 LegalVecSize = 256;
1046 }
1047
1048 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
1049 MVT IntTy = MVT::getIntegerVT(EleBits);
1050 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
1051 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
1052 }
1053
1054 if (isLASX256Vector) {
1055 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
1056 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
1057 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
1058 }
1059
1060 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
1061 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1062 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
1063}
1064
1065// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
1066// For Example:
1067// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
1068// can be lowered to:
1069// VBSRL_V vr1, vr0, 8
1070// VMAX_W vr0, vr1, vr0
1071// VBSRL_V vr1, vr0, 4
1072// VMAX_W vr0, vr1, vr0
1073// VPICKVE2GR_W a0, vr0, 0
1074// For 256 bit vector, it is illegal and will be spilt into
1075// two 128 bit vector by default then processed by this.
1076SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
1077 SelectionDAG &DAG) const {
1078 SDLoc DL(Op);
1079
1080 MVT OpVT = Op.getSimpleValueType();
1081 SDValue Val = Op.getOperand(0);
1082
1083 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1084 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1085
1086 // Ensure operand type legal or enable it legal.
1087 while (!isTypeLegal(Val.getSimpleValueType())) {
1088 Val = DAG.WidenVector(Val, DL);
1089 }
1090
1091 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
1092 MVT VecTy = Val.getSimpleValueType();
1093 MVT GRLenVT = Subtarget.getGRLenVT();
1094
1095 for (int i = NumEles; i > 1; i /= 2) {
1096 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
1097 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
1098 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
1099 }
1100
1101 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1102 DAG.getConstant(0, DL, GRLenVT));
1103}
1104
1105SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
1106 SelectionDAG &DAG) const {
1107 unsigned IsData = Op.getConstantOperandVal(4);
1108
1109 // We don't support non-data prefetch.
1110 // Just preserve the chain.
1111 if (!IsData)
1112 return Op.getOperand(0);
1113
1114 return Op;
1115}
1116
1117SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
1118 SelectionDAG &DAG) const {
1119 MVT VT = Op.getSimpleValueType();
1120 assert(VT.isVector() && "Unexpected type");
1121
1122 SDLoc DL(Op);
1123 SDValue R = Op.getOperand(0);
1124 SDValue Amt = Op.getOperand(1);
1125 unsigned Opcode = Op.getOpcode();
1126 unsigned EltSizeInBits = VT.getScalarSizeInBits();
1127
1128 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
1129 if (V.getOpcode() != ISD::BUILD_VECTOR)
1130 return false;
1131 if (SDValue SplatValue =
1132 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
1133 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
1134 CstSplatValue = C->getAPIntValue();
1135 return true;
1136 }
1137 }
1138 return false;
1139 };
1140
1141 // Check for constant splat rotation amount.
1142 APInt CstSplatValue;
1143 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
1144 bool isROTL = Opcode == ISD::ROTL;
1145
1146 // Check for splat rotate by zero.
1147 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
1148 return R;
1149
1150 // LoongArch targets always prefer ISD::ROTR.
1151 if (isROTL) {
1152 SDValue Zero = DAG.getConstant(0, DL, VT);
1153 return DAG.getNode(ISD::ROTR, DL, VT, R,
1154 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
1155 }
1156
1157 // Rotate by a immediate.
1158 if (IsCstSplat) {
1159 // ISD::ROTR: Attemp to rotate by a positive immediate.
1160 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
1161 if (SDValue Urem =
1162 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
1163 return DAG.getNode(Opcode, DL, VT, R, Urem);
1164 }
1165
1166 return Op;
1167}
1168
1169// Return true if Val is equal to (setcc LHS, RHS, CC).
1170// Return false if Val is the inverse of (setcc LHS, RHS, CC).
1171// Otherwise, return std::nullopt.
1172static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
1173 ISD::CondCode CC, SDValue Val) {
1174 assert(Val->getOpcode() == ISD::SETCC);
1175 SDValue LHS2 = Val.getOperand(0);
1176 SDValue RHS2 = Val.getOperand(1);
1177 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
1178
1179 if (LHS == LHS2 && RHS == RHS2) {
1180 if (CC == CC2)
1181 return true;
1182 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1183 return false;
1184 } else if (LHS == RHS2 && RHS == LHS2) {
1186 if (CC == CC2)
1187 return true;
1188 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1189 return false;
1190 }
1191
1192 return std::nullopt;
1193}
1194
1196 const LoongArchSubtarget &Subtarget) {
1197 SDValue CondV = N->getOperand(0);
1198 SDValue TrueV = N->getOperand(1);
1199 SDValue FalseV = N->getOperand(2);
1200 MVT VT = N->getSimpleValueType(0);
1201 SDLoc DL(N);
1202
1203 // (select c, -1, y) -> -c | y
1204 if (isAllOnesConstant(TrueV)) {
1205 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1206 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
1207 }
1208 // (select c, y, -1) -> (c-1) | y
1209 if (isAllOnesConstant(FalseV)) {
1210 SDValue Neg =
1211 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1212 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
1213 }
1214
1215 // (select c, 0, y) -> (c-1) & y
1216 if (isNullConstant(TrueV)) {
1217 SDValue Neg =
1218 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1219 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
1220 }
1221 // (select c, y, 0) -> -c & y
1222 if (isNullConstant(FalseV)) {
1223 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1224 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
1225 }
1226
1227 // select c, ~x, x --> xor -c, x
1228 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
1229 const APInt &TrueVal = TrueV->getAsAPIntVal();
1230 const APInt &FalseVal = FalseV->getAsAPIntVal();
1231 if (~TrueVal == FalseVal) {
1232 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1233 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
1234 }
1235 }
1236
1237 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1238 // when both truev and falsev are also setcc.
1239 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1240 FalseV.getOpcode() == ISD::SETCC) {
1241 SDValue LHS = CondV.getOperand(0);
1242 SDValue RHS = CondV.getOperand(1);
1243 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1244
1245 // (select x, x, y) -> x | y
1246 // (select !x, x, y) -> x & y
1247 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1248 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1249 DAG.getFreeze(FalseV));
1250 }
1251 // (select x, y, x) -> x & y
1252 // (select !x, y, x) -> x | y
1253 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1254 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1255 DAG.getFreeze(TrueV), FalseV);
1256 }
1257 }
1258
1259 return SDValue();
1260}
1261
1262// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1263// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1264// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1265// being `0` or `-1`. In such cases we can replace `select` with `and`.
1266// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1267// than `c0`?
1268static SDValue
1270 const LoongArchSubtarget &Subtarget) {
1271 unsigned SelOpNo = 0;
1272 SDValue Sel = BO->getOperand(0);
1273 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1274 SelOpNo = 1;
1275 Sel = BO->getOperand(1);
1276 }
1277
1278 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1279 return SDValue();
1280
1281 unsigned ConstSelOpNo = 1;
1282 unsigned OtherSelOpNo = 2;
1283 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1284 ConstSelOpNo = 2;
1285 OtherSelOpNo = 1;
1286 }
1287 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1288 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1289 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1290 return SDValue();
1291
1292 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1293 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1294 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1295 return SDValue();
1296
1297 SDLoc DL(Sel);
1298 EVT VT = BO->getValueType(0);
1299
1300 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1301 if (SelOpNo == 1)
1302 std::swap(NewConstOps[0], NewConstOps[1]);
1303
1304 SDValue NewConstOp =
1305 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1306 if (!NewConstOp)
1307 return SDValue();
1308
1309 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1310 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1311 return SDValue();
1312
1313 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1314 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1315 if (SelOpNo == 1)
1316 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1317 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1318
1319 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1320 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1321 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1322}
1323
1324// Changes the condition code and swaps operands if necessary, so the SetCC
1325// operation matches one of the comparisons supported directly by branches
1326// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1327// compare with 1/-1.
1329 ISD::CondCode &CC, SelectionDAG &DAG) {
1330 // If this is a single bit test that can't be handled by ANDI, shift the
1331 // bit to be tested to the MSB and perform a signed compare with 0.
1332 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1333 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1334 isa<ConstantSDNode>(LHS.getOperand(1))) {
1335 uint64_t Mask = LHS.getConstantOperandVal(1);
1336 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1337 unsigned ShAmt = 0;
1338 if (isPowerOf2_64(Mask)) {
1339 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1340 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1341 } else {
1342 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1343 }
1344
1345 LHS = LHS.getOperand(0);
1346 if (ShAmt != 0)
1347 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1348 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1349 return;
1350 }
1351 }
1352
1353 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1354 int64_t C = RHSC->getSExtValue();
1355 switch (CC) {
1356 default:
1357 break;
1358 case ISD::SETGT:
1359 // Convert X > -1 to X >= 0.
1360 if (C == -1) {
1361 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1362 CC = ISD::SETGE;
1363 return;
1364 }
1365 break;
1366 case ISD::SETLT:
1367 // Convert X < 1 to 0 >= X.
1368 if (C == 1) {
1369 RHS = LHS;
1370 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1371 CC = ISD::SETGE;
1372 return;
1373 }
1374 break;
1375 }
1376 }
1377
1378 switch (CC) {
1379 default:
1380 break;
1381 case ISD::SETGT:
1382 case ISD::SETLE:
1383 case ISD::SETUGT:
1384 case ISD::SETULE:
1386 std::swap(LHS, RHS);
1387 break;
1388 }
1389}
1390
1391SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1392 SelectionDAG &DAG) const {
1393 SDValue CondV = Op.getOperand(0);
1394 SDValue TrueV = Op.getOperand(1);
1395 SDValue FalseV = Op.getOperand(2);
1396 SDLoc DL(Op);
1397 MVT VT = Op.getSimpleValueType();
1398 MVT GRLenVT = Subtarget.getGRLenVT();
1399
1400 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1401 return V;
1402
1403 if (Op.hasOneUse()) {
1404 unsigned UseOpc = Op->user_begin()->getOpcode();
1405 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1406 SDNode *BinOp = *Op->user_begin();
1407 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1408 DAG, Subtarget)) {
1409 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1410 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1411 // may return a constant node and cause crash in lowerSELECT.
1412 if (NewSel.getOpcode() == ISD::SELECT)
1413 return lowerSELECT(NewSel, DAG);
1414 return NewSel;
1415 }
1416 }
1417 }
1418
1419 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1420 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1421 // (select condv, truev, falsev)
1422 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1423 if (CondV.getOpcode() != ISD::SETCC ||
1424 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1425 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1426 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1427
1428 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1429
1430 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1431 }
1432
1433 // If the CondV is the output of a SETCC node which operates on GRLenVT
1434 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1435 // to take advantage of the integer compare+branch instructions. i.e.: (select
1436 // (setcc lhs, rhs, cc), truev, falsev)
1437 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1438 SDValue LHS = CondV.getOperand(0);
1439 SDValue RHS = CondV.getOperand(1);
1440 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1441
1442 // Special case for a select of 2 constants that have a difference of 1.
1443 // Normally this is done by DAGCombine, but if the select is introduced by
1444 // type legalization or op legalization, we miss it. Restricting to SETLT
1445 // case for now because that is what signed saturating add/sub need.
1446 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1447 // but we would probably want to swap the true/false values if the condition
1448 // is SETGE/SETLE to avoid an XORI.
1449 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1450 CCVal == ISD::SETLT) {
1451 const APInt &TrueVal = TrueV->getAsAPIntVal();
1452 const APInt &FalseVal = FalseV->getAsAPIntVal();
1453 if (TrueVal - 1 == FalseVal)
1454 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1455 if (TrueVal + 1 == FalseVal)
1456 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1457 }
1458
1459 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1460 // 1 < x ? x : 1 -> 0 < x ? x : 1
1461 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1462 RHS == TrueV && LHS == FalseV) {
1463 LHS = DAG.getConstant(0, DL, VT);
1464 // 0 <u x is the same as x != 0.
1465 if (CCVal == ISD::SETULT) {
1466 std::swap(LHS, RHS);
1467 CCVal = ISD::SETNE;
1468 }
1469 }
1470
1471 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1472 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1473 RHS == FalseV) {
1474 RHS = DAG.getConstant(0, DL, VT);
1475 }
1476
1477 SDValue TargetCC = DAG.getCondCode(CCVal);
1478
1479 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1480 // (select (setcc lhs, rhs, CC), constant, falsev)
1481 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1482 std::swap(TrueV, FalseV);
1483 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1484 }
1485
1486 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1487 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1488}
1489
1490SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1491 SelectionDAG &DAG) const {
1492 SDValue CondV = Op.getOperand(1);
1493 SDLoc DL(Op);
1494 MVT GRLenVT = Subtarget.getGRLenVT();
1495
1496 if (CondV.getOpcode() == ISD::SETCC) {
1497 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1498 SDValue LHS = CondV.getOperand(0);
1499 SDValue RHS = CondV.getOperand(1);
1500 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1501
1502 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1503
1504 SDValue TargetCC = DAG.getCondCode(CCVal);
1505 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1506 Op.getOperand(0), LHS, RHS, TargetCC,
1507 Op.getOperand(2));
1508 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1509 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1510 Op.getOperand(0), CondV, Op.getOperand(2));
1511 }
1512 }
1513
1514 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1515 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1516 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1517}
1518
1519SDValue
1520LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1521 SelectionDAG &DAG) const {
1522 SDLoc DL(Op);
1523 MVT OpVT = Op.getSimpleValueType();
1524
1525 SDValue Vector = DAG.getUNDEF(OpVT);
1526 SDValue Val = Op.getOperand(0);
1527 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1528
1529 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1530}
1531
1532SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1533 SelectionDAG &DAG) const {
1534 EVT ResTy = Op->getValueType(0);
1535 SDValue Src = Op->getOperand(0);
1536 SDLoc DL(Op);
1537
1538 // LoongArchISD::BITREV_8B is not supported on LA32.
1539 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1540 return SDValue();
1541
1542 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1543 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1544 unsigned int NewEltNum = NewVT.getVectorNumElements();
1545
1546 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1547
1549 for (unsigned int i = 0; i < NewEltNum; i++) {
1550 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1551 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1552 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1553 ? (unsigned)LoongArchISD::BITREV_8B
1554 : (unsigned)ISD::BITREVERSE;
1555 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1556 }
1557 SDValue Res =
1558 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1559
1560 switch (ResTy.getSimpleVT().SimpleTy) {
1561 default:
1562 return SDValue();
1563 case MVT::v16i8:
1564 case MVT::v32i8:
1565 return Res;
1566 case MVT::v8i16:
1567 case MVT::v16i16:
1568 case MVT::v4i32:
1569 case MVT::v8i32: {
1571 for (unsigned int i = 0; i < NewEltNum; i++)
1572 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1573 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1574 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1575 }
1576 }
1577}
1578
1579// Widen element type to get a new mask value (if possible).
1580// For example:
1581// shufflevector <4 x i32> %a, <4 x i32> %b,
1582// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1583// is equivalent to:
1584// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1585// can be lowered to:
1586// VPACKOD_D vr0, vr0, vr1
1588 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1589 unsigned EltBits = VT.getScalarSizeInBits();
1590
1591 if (EltBits > 32 || EltBits == 1)
1592 return SDValue();
1593
1594 SmallVector<int, 8> NewMask;
1595 if (widenShuffleMaskElts(Mask, NewMask)) {
1596 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1597 : MVT::getIntegerVT(EltBits * 2);
1598 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1599 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1600 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1601 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1602 return DAG.getBitcast(
1603 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1604 }
1605 }
1606
1607 return SDValue();
1608}
1609
1610/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1611/// instruction.
1612// The funciton matches elements from one of the input vector shuffled to the
1613// left or right with zeroable elements 'shifted in'. It handles both the
1614// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1615// lane.
1616// Mostly copied from X86.
1617static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1618 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1619 int MaskOffset, const APInt &Zeroable) {
1620 int Size = Mask.size();
1621 unsigned SizeInBits = Size * ScalarSizeInBits;
1622
1623 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1624 for (int i = 0; i < Size; i += Scale)
1625 for (int j = 0; j < Shift; ++j)
1626 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1627 return false;
1628
1629 return true;
1630 };
1631
1632 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1633 int Step = 1) {
1634 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1635 if (!(Mask[i] == -1 || Mask[i] == Low))
1636 return false;
1637 return true;
1638 };
1639
1640 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1641 for (int i = 0; i != Size; i += Scale) {
1642 unsigned Pos = Left ? i + Shift : i;
1643 unsigned Low = Left ? i : i + Shift;
1644 unsigned Len = Scale - Shift;
1645 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1646 return -1;
1647 }
1648
1649 int ShiftEltBits = ScalarSizeInBits * Scale;
1650 bool ByteShift = ShiftEltBits > 64;
1651 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1652 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1653 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1654
1655 // Normalize the scale for byte shifts to still produce an i64 element
1656 // type.
1657 Scale = ByteShift ? Scale / 2 : Scale;
1658
1659 // We need to round trip through the appropriate type for the shift.
1660 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1661 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1662 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1663 return (int)ShiftAmt;
1664 };
1665
1666 unsigned MaxWidth = 128;
1667 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1668 for (int Shift = 1; Shift != Scale; ++Shift)
1669 for (bool Left : {true, false})
1670 if (CheckZeros(Shift, Scale, Left)) {
1671 int ShiftAmt = MatchShift(Shift, Scale, Left);
1672 if (0 < ShiftAmt)
1673 return ShiftAmt;
1674 }
1675
1676 // no match
1677 return -1;
1678}
1679
1680/// Lower VECTOR_SHUFFLE as shift (if possible).
1681///
1682/// For example:
1683/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1684/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1685/// is lowered to:
1686/// (VBSLL_V $v0, $v0, 4)
1687///
1688/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1689/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1690/// is lowered to:
1691/// (VSLLI_D $v0, $v0, 32)
1693 MVT VT, SDValue V1, SDValue V2,
1694 SelectionDAG &DAG,
1695 const LoongArchSubtarget &Subtarget,
1696 const APInt &Zeroable) {
1697 int Size = Mask.size();
1698 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1699
1700 MVT ShiftVT;
1701 SDValue V = V1;
1702 unsigned Opcode;
1703
1704 // Try to match shuffle against V1 shift.
1705 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1706 Mask, 0, Zeroable);
1707
1708 // If V1 failed, try to match shuffle against V2 shift.
1709 if (ShiftAmt < 0) {
1710 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1711 Mask, Size, Zeroable);
1712 V = V2;
1713 }
1714
1715 if (ShiftAmt < 0)
1716 return SDValue();
1717
1718 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1719 "Illegal integer vector type");
1720 V = DAG.getBitcast(ShiftVT, V);
1721 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1722 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1723 return DAG.getBitcast(VT, V);
1724}
1725
1726/// Determine whether a range fits a regular pattern of values.
1727/// This function accounts for the possibility of jumping over the End iterator.
1728template <typename ValType>
1729static bool
1731 unsigned CheckStride,
1733 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1734 auto &I = Begin;
1735
1736 while (I != End) {
1737 if (*I != -1 && *I != ExpectedIndex)
1738 return false;
1739 ExpectedIndex += ExpectedIndexStride;
1740
1741 // Incrementing past End is undefined behaviour so we must increment one
1742 // step at a time and check for End at each step.
1743 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1744 ; // Empty loop body.
1745 }
1746 return true;
1747}
1748
1749/// Compute whether each element of a shuffle is zeroable.
1750///
1751/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1753 SDValue V2, APInt &KnownUndef,
1754 APInt &KnownZero) {
1755 int Size = Mask.size();
1756 KnownUndef = KnownZero = APInt::getZero(Size);
1757
1758 V1 = peekThroughBitcasts(V1);
1759 V2 = peekThroughBitcasts(V2);
1760
1761 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1762 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1763
1764 int VectorSizeInBits = V1.getValueSizeInBits();
1765 int ScalarSizeInBits = VectorSizeInBits / Size;
1766 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1767 (void)ScalarSizeInBits;
1768
1769 for (int i = 0; i < Size; ++i) {
1770 int M = Mask[i];
1771 if (M < 0) {
1772 KnownUndef.setBit(i);
1773 continue;
1774 }
1775 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1776 KnownZero.setBit(i);
1777 continue;
1778 }
1779 }
1780}
1781
1782/// Test whether a shuffle mask is equivalent within each sub-lane.
1783///
1784/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1785/// non-trivial to compute in the face of undef lanes. The representation is
1786/// suitable for use with existing 128-bit shuffles as entries from the second
1787/// vector have been remapped to [LaneSize, 2*LaneSize).
1788static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1789 ArrayRef<int> Mask,
1790 SmallVectorImpl<int> &RepeatedMask) {
1791 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1792 RepeatedMask.assign(LaneSize, -1);
1793 int Size = Mask.size();
1794 for (int i = 0; i < Size; ++i) {
1795 assert(Mask[i] == -1 || Mask[i] >= 0);
1796 if (Mask[i] < 0)
1797 continue;
1798 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1799 // This entry crosses lanes, so there is no way to model this shuffle.
1800 return false;
1801
1802 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1803 // Adjust second vector indices to start at LaneSize instead of Size.
1804 int LocalM =
1805 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1806 if (RepeatedMask[i % LaneSize] < 0)
1807 // This is the first non-undef entry in this slot of a 128-bit lane.
1808 RepeatedMask[i % LaneSize] = LocalM;
1809 else if (RepeatedMask[i % LaneSize] != LocalM)
1810 // Found a mismatch with the repeated mask.
1811 return false;
1812 }
1813 return true;
1814}
1815
1816/// Attempts to match vector shuffle as byte rotation.
1818 ArrayRef<int> Mask) {
1819
1820 SDValue Lo, Hi;
1821 SmallVector<int, 16> RepeatedMask;
1822
1823 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1824 return -1;
1825
1826 int NumElts = RepeatedMask.size();
1827 int Rotation = 0;
1828 int Scale = 16 / NumElts;
1829
1830 for (int i = 0; i < NumElts; ++i) {
1831 int M = RepeatedMask[i];
1832 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1833 "Unexpected mask index.");
1834 if (M < 0)
1835 continue;
1836
1837 // Determine where a rotated vector would have started.
1838 int StartIdx = i - (M % NumElts);
1839 if (StartIdx == 0)
1840 return -1;
1841
1842 // If we found the tail of a vector the rotation must be the missing
1843 // front. If we found the head of a vector, it must be how much of the
1844 // head.
1845 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1846
1847 if (Rotation == 0)
1848 Rotation = CandidateRotation;
1849 else if (Rotation != CandidateRotation)
1850 return -1;
1851
1852 // Compute which value this mask is pointing at.
1853 SDValue MaskV = M < NumElts ? V1 : V2;
1854
1855 // Compute which of the two target values this index should be assigned
1856 // to. This reflects whether the high elements are remaining or the low
1857 // elements are remaining.
1858 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1859
1860 // Either set up this value if we've not encountered it before, or check
1861 // that it remains consistent.
1862 if (!TargetV)
1863 TargetV = MaskV;
1864 else if (TargetV != MaskV)
1865 return -1;
1866 }
1867
1868 // Check that we successfully analyzed the mask, and normalize the results.
1869 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1870 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1871 if (!Lo)
1872 Lo = Hi;
1873 else if (!Hi)
1874 Hi = Lo;
1875
1876 V1 = Lo;
1877 V2 = Hi;
1878
1879 return Rotation * Scale;
1880}
1881
1882/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1883///
1884/// For example:
1885/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1886/// <2 x i32> <i32 3, i32 0>
1887/// is lowered to:
1888/// (VBSRL_V $v1, $v1, 8)
1889/// (VBSLL_V $v0, $v0, 8)
1890/// (VOR_V $v0, $V0, $v1)
1891static SDValue
1893 SDValue V1, SDValue V2, SelectionDAG &DAG,
1894 const LoongArchSubtarget &Subtarget) {
1895
1896 SDValue Lo = V1, Hi = V2;
1897 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1898 if (ByteRotation <= 0)
1899 return SDValue();
1900
1901 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1902 Lo = DAG.getBitcast(ByteVT, Lo);
1903 Hi = DAG.getBitcast(ByteVT, Hi);
1904
1905 int LoByteShift = 16 - ByteRotation;
1906 int HiByteShift = ByteRotation;
1907 MVT GRLenVT = Subtarget.getGRLenVT();
1908
1909 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1910 DAG.getConstant(LoByteShift, DL, GRLenVT));
1911 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1912 DAG.getConstant(HiByteShift, DL, GRLenVT));
1913 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1914}
1915
1916/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1917///
1918/// For example:
1919/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1920/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1921/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1922/// is lowered to:
1923/// (VREPLI $v1, 0)
1924/// (VILVL $v0, $v1, $v0)
1926 ArrayRef<int> Mask, MVT VT,
1927 SDValue V1, SDValue V2,
1928 SelectionDAG &DAG,
1929 const APInt &Zeroable) {
1930 int Bits = VT.getSizeInBits();
1931 int EltBits = VT.getScalarSizeInBits();
1932 int NumElements = VT.getVectorNumElements();
1933
1934 if (Zeroable.isAllOnes())
1935 return DAG.getConstant(0, DL, VT);
1936
1937 // Define a helper function to check a particular ext-scale and lower to it if
1938 // valid.
1939 auto Lower = [&](int Scale) -> SDValue {
1940 SDValue InputV;
1941 bool AnyExt = true;
1942 int Offset = 0;
1943 for (int i = 0; i < NumElements; i++) {
1944 int M = Mask[i];
1945 if (M < 0)
1946 continue;
1947 if (i % Scale != 0) {
1948 // Each of the extended elements need to be zeroable.
1949 if (!Zeroable[i])
1950 return SDValue();
1951
1952 AnyExt = false;
1953 continue;
1954 }
1955
1956 // Each of the base elements needs to be consecutive indices into the
1957 // same input vector.
1958 SDValue V = M < NumElements ? V1 : V2;
1959 M = M % NumElements;
1960 if (!InputV) {
1961 InputV = V;
1962 Offset = M - (i / Scale);
1963
1964 // These offset can't be handled
1965 if (Offset % (NumElements / Scale))
1966 return SDValue();
1967 } else if (InputV != V)
1968 return SDValue();
1969
1970 if (M != (Offset + (i / Scale)))
1971 return SDValue(); // Non-consecutive strided elements.
1972 }
1973
1974 // If we fail to find an input, we have a zero-shuffle which should always
1975 // have already been handled.
1976 if (!InputV)
1977 return SDValue();
1978
1979 do {
1980 unsigned VilVLoHi = LoongArchISD::VILVL;
1981 if (Offset >= (NumElements / 2)) {
1982 VilVLoHi = LoongArchISD::VILVH;
1983 Offset -= (NumElements / 2);
1984 }
1985
1986 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1987 SDValue Ext =
1988 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1989 InputV = DAG.getBitcast(InputVT, InputV);
1990 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1991 Scale /= 2;
1992 EltBits *= 2;
1993 NumElements /= 2;
1994 } while (Scale > 1);
1995 return DAG.getBitcast(VT, InputV);
1996 };
1997
1998 // Each iteration, try extending the elements half as much, but into twice as
1999 // many elements.
2000 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
2001 NumExtElements *= 2) {
2002 if (SDValue V = Lower(NumElements / NumExtElements))
2003 return V;
2004 }
2005 return SDValue();
2006}
2007
2008/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
2009///
2010/// VREPLVEI performs vector broadcast based on an element specified by an
2011/// integer immediate, with its mask being similar to:
2012/// <x, x, x, ...>
2013/// where x is any valid index.
2014///
2015/// When undef's appear in the mask they are treated as if they were whatever
2016/// value is necessary in order to fit the above form.
2017static SDValue
2019 SDValue V1, SelectionDAG &DAG,
2020 const LoongArchSubtarget &Subtarget) {
2021 int SplatIndex = -1;
2022 for (const auto &M : Mask) {
2023 if (M != -1) {
2024 SplatIndex = M;
2025 break;
2026 }
2027 }
2028
2029 if (SplatIndex == -1)
2030 return DAG.getUNDEF(VT);
2031
2032 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2033 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
2034 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2035 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2036 }
2037
2038 return SDValue();
2039}
2040
2041/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
2042///
2043/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
2044/// elements according to a <4 x i2> constant (encoded as an integer immediate).
2045///
2046/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
2047/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2048/// When undef's appear they are treated as if they were whatever value is
2049/// necessary in order to fit the above forms.
2050///
2051/// For example:
2052/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2053/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2054/// i32 7, i32 6, i32 5, i32 4>
2055/// is lowered to:
2056/// (VSHUF4I_H $v0, $v1, 27)
2057/// where the 27 comes from:
2058/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2059static SDValue
2061 SDValue V1, SDValue V2, SelectionDAG &DAG,
2062 const LoongArchSubtarget &Subtarget) {
2063
2064 unsigned SubVecSize = 4;
2065 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2066 SubVecSize = 2;
2067
2068 int SubMask[4] = {-1, -1, -1, -1};
2069 for (unsigned i = 0; i < SubVecSize; ++i) {
2070 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
2071 int M = Mask[j];
2072
2073 // Convert from vector index to 4-element subvector index
2074 // If an index refers to an element outside of the subvector then give up
2075 if (M != -1) {
2076 M -= 4 * (j / SubVecSize);
2077 if (M < 0 || M >= 4)
2078 return SDValue();
2079 }
2080
2081 // If the mask has an undef, replace it with the current index.
2082 // Note that it might still be undef if the current index is also undef
2083 if (SubMask[i] == -1)
2084 SubMask[i] = M;
2085 // Check that non-undef values are the same as in the mask. If they
2086 // aren't then give up
2087 else if (M != -1 && M != SubMask[i])
2088 return SDValue();
2089 }
2090 }
2091
2092 // Calculate the immediate. Replace any remaining undefs with zero
2093 int Imm = 0;
2094 for (int i = SubVecSize - 1; i >= 0; --i) {
2095 int M = SubMask[i];
2096
2097 if (M == -1)
2098 M = 0;
2099
2100 Imm <<= 2;
2101 Imm |= M & 0x3;
2102 }
2103
2104 MVT GRLenVT = Subtarget.getGRLenVT();
2105
2106 // Return vshuf4i.d
2107 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2108 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2109 DAG.getConstant(Imm, DL, GRLenVT));
2110
2111 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
2112 DAG.getConstant(Imm, DL, GRLenVT));
2113}
2114
2115/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
2116///
2117/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
2118/// reverse whose mask likes:
2119/// <7, 6, 5, 4, 3, 2, 1, 0>
2120///
2121/// When undef's appear in the mask they are treated as if they were whatever
2122/// value is necessary in order to fit the above forms.
2123static SDValue
2125 SDValue V1, SelectionDAG &DAG,
2126 const LoongArchSubtarget &Subtarget) {
2127 // Only vectors with i8/i16 elements which cannot match other patterns
2128 // directly needs to do this.
2129 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
2130 VT != MVT::v16i16)
2131 return SDValue();
2132
2133 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
2134 return SDValue();
2135
2136 int WidenNumElts = VT.getVectorNumElements() / 4;
2137 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
2138 for (int i = 0; i < WidenNumElts; ++i)
2139 WidenMask[i] = WidenNumElts - 1 - i;
2140
2141 MVT WidenVT = MVT::getVectorVT(
2142 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
2143 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
2144 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
2145 DAG.getUNDEF(WidenVT), WidenMask);
2146
2147 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
2148 DAG.getBitcast(VT, WidenRev),
2149 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
2150}
2151
2152/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
2153///
2154/// VPACKEV interleaves the even elements from each vector.
2155///
2156/// It is possible to lower into VPACKEV when the mask consists of two of the
2157/// following forms interleaved:
2158/// <0, 2, 4, ...>
2159/// <n, n+2, n+4, ...>
2160/// where n is the number of elements in the vector.
2161/// For example:
2162/// <0, 0, 2, 2, 4, 4, ...>
2163/// <0, n, 2, n+2, 4, n+4, ...>
2164///
2165/// When undef's appear in the mask they are treated as if they were whatever
2166/// value is necessary in order to fit the above forms.
2168 MVT VT, SDValue V1, SDValue V2,
2169 SelectionDAG &DAG) {
2170
2171 const auto &Begin = Mask.begin();
2172 const auto &End = Mask.end();
2173 SDValue OriV1 = V1, OriV2 = V2;
2174
2175 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2176 V1 = OriV1;
2177 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
2178 V1 = OriV2;
2179 else
2180 return SDValue();
2181
2182 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2183 V2 = OriV1;
2184 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
2185 V2 = OriV2;
2186 else
2187 return SDValue();
2188
2189 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
2190}
2191
2192/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
2193///
2194/// VPACKOD interleaves the odd elements from each vector.
2195///
2196/// It is possible to lower into VPACKOD when the mask consists of two of the
2197/// following forms interleaved:
2198/// <1, 3, 5, ...>
2199/// <n+1, n+3, n+5, ...>
2200/// where n is the number of elements in the vector.
2201/// For example:
2202/// <1, 1, 3, 3, 5, 5, ...>
2203/// <1, n+1, 3, n+3, 5, n+5, ...>
2204///
2205/// When undef's appear in the mask they are treated as if they were whatever
2206/// value is necessary in order to fit the above forms.
2208 MVT VT, SDValue V1, SDValue V2,
2209 SelectionDAG &DAG) {
2210
2211 const auto &Begin = Mask.begin();
2212 const auto &End = Mask.end();
2213 SDValue OriV1 = V1, OriV2 = V2;
2214
2215 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2216 V1 = OriV1;
2217 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
2218 V1 = OriV2;
2219 else
2220 return SDValue();
2221
2222 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2223 V2 = OriV1;
2224 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
2225 V2 = OriV2;
2226 else
2227 return SDValue();
2228
2229 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
2230}
2231
2232/// Lower VECTOR_SHUFFLE into VILVH (if possible).
2233///
2234/// VILVH interleaves consecutive elements from the left (highest-indexed) half
2235/// of each vector.
2236///
2237/// It is possible to lower into VILVH when the mask consists of two of the
2238/// following forms interleaved:
2239/// <x, x+1, x+2, ...>
2240/// <n+x, n+x+1, n+x+2, ...>
2241/// where n is the number of elements in the vector and x is half n.
2242/// For example:
2243/// <x, x, x+1, x+1, x+2, x+2, ...>
2244/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2245///
2246/// When undef's appear in the mask they are treated as if they were whatever
2247/// value is necessary in order to fit the above forms.
2249 MVT VT, SDValue V1, SDValue V2,
2250 SelectionDAG &DAG) {
2251
2252 const auto &Begin = Mask.begin();
2253 const auto &End = Mask.end();
2254 unsigned HalfSize = Mask.size() / 2;
2255 SDValue OriV1 = V1, OriV2 = V2;
2256
2257 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2258 V1 = OriV1;
2259 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2260 V1 = OriV2;
2261 else
2262 return SDValue();
2263
2264 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2265 V2 = OriV1;
2266 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2267 1))
2268 V2 = OriV2;
2269 else
2270 return SDValue();
2271
2272 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2273}
2274
2275/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2276///
2277/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2278/// of each vector.
2279///
2280/// It is possible to lower into VILVL when the mask consists of two of the
2281/// following forms interleaved:
2282/// <0, 1, 2, ...>
2283/// <n, n+1, n+2, ...>
2284/// where n is the number of elements in the vector.
2285/// For example:
2286/// <0, 0, 1, 1, 2, 2, ...>
2287/// <0, n, 1, n+1, 2, n+2, ...>
2288///
2289/// When undef's appear in the mask they are treated as if they were whatever
2290/// value is necessary in order to fit the above forms.
2292 MVT VT, SDValue V1, SDValue V2,
2293 SelectionDAG &DAG) {
2294
2295 const auto &Begin = Mask.begin();
2296 const auto &End = Mask.end();
2297 SDValue OriV1 = V1, OriV2 = V2;
2298
2299 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2300 V1 = OriV1;
2301 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2302 V1 = OriV2;
2303 else
2304 return SDValue();
2305
2306 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2307 V2 = OriV1;
2308 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2309 V2 = OriV2;
2310 else
2311 return SDValue();
2312
2313 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2314}
2315
2316/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2317///
2318/// VPICKEV copies the even elements of each vector into the result vector.
2319///
2320/// It is possible to lower into VPICKEV when the mask consists of two of the
2321/// following forms concatenated:
2322/// <0, 2, 4, ...>
2323/// <n, n+2, n+4, ...>
2324/// where n is the number of elements in the vector.
2325/// For example:
2326/// <0, 2, 4, ..., 0, 2, 4, ...>
2327/// <0, 2, 4, ..., n, n+2, n+4, ...>
2328///
2329/// When undef's appear in the mask they are treated as if they were whatever
2330/// value is necessary in order to fit the above forms.
2332 MVT VT, SDValue V1, SDValue V2,
2333 SelectionDAG &DAG) {
2334
2335 const auto &Begin = Mask.begin();
2336 const auto &Mid = Mask.begin() + Mask.size() / 2;
2337 const auto &End = Mask.end();
2338 SDValue OriV1 = V1, OriV2 = V2;
2339
2340 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2341 V1 = OriV1;
2342 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2343 V1 = OriV2;
2344 else
2345 return SDValue();
2346
2347 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2348 V2 = OriV1;
2349 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2350 V2 = OriV2;
2351
2352 else
2353 return SDValue();
2354
2355 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2356}
2357
2358/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2359///
2360/// VPICKOD copies the odd elements of each vector into the result vector.
2361///
2362/// It is possible to lower into VPICKOD when the mask consists of two of the
2363/// following forms concatenated:
2364/// <1, 3, 5, ...>
2365/// <n+1, n+3, n+5, ...>
2366/// where n is the number of elements in the vector.
2367/// For example:
2368/// <1, 3, 5, ..., 1, 3, 5, ...>
2369/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2370///
2371/// When undef's appear in the mask they are treated as if they were whatever
2372/// value is necessary in order to fit the above forms.
2374 MVT VT, SDValue V1, SDValue V2,
2375 SelectionDAG &DAG) {
2376
2377 const auto &Begin = Mask.begin();
2378 const auto &Mid = Mask.begin() + Mask.size() / 2;
2379 const auto &End = Mask.end();
2380 SDValue OriV1 = V1, OriV2 = V2;
2381
2382 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2383 V1 = OriV1;
2384 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2385 V1 = OriV2;
2386 else
2387 return SDValue();
2388
2389 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2390 V2 = OriV1;
2391 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2392 V2 = OriV2;
2393 else
2394 return SDValue();
2395
2396 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2397}
2398
2399/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2400///
2401/// VEXTRINS copies one element of a vector into any place of the result
2402/// vector and makes no change to the rest elements of the result vector.
2403///
2404/// It is possible to lower into VEXTRINS when the mask takes the form:
2405/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2406/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2407/// where n is the number of elements in the vector and i is in [0, n).
2408/// For example:
2409/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2410/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2411///
2412/// When undef's appear in the mask they are treated as if they were whatever
2413/// value is necessary in order to fit the above forms.
2414static SDValue
2416 SDValue V1, SDValue V2, SelectionDAG &DAG,
2417 const LoongArchSubtarget &Subtarget) {
2418 unsigned NumElts = VT.getVectorNumElements();
2419 MVT EltVT = VT.getVectorElementType();
2420 MVT GRLenVT = Subtarget.getGRLenVT();
2421
2422 if (Mask.size() != NumElts)
2423 return SDValue();
2424
2425 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2426 int DiffCount = 0;
2427 int DiffPos = -1;
2428 for (unsigned i = 0; i < NumElts; ++i) {
2429 if (Mask[i] == -1)
2430 continue;
2431 if (Mask[i] != int(Base + i)) {
2432 ++DiffCount;
2433 DiffPos = int(i);
2434 if (DiffCount > 1)
2435 return SDValue();
2436 }
2437 }
2438
2439 // Need exactly one differing element to lower into VEXTRINS.
2440 if (DiffCount != 1)
2441 return SDValue();
2442
2443 // DiffMask must be in [0, 2N).
2444 int DiffMask = Mask[DiffPos];
2445 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2446 return SDValue();
2447
2448 // Determine source vector and source index.
2449 SDValue SrcVec;
2450 unsigned SrcIdx;
2451 if (unsigned(DiffMask) < NumElts) {
2452 SrcVec = V1;
2453 SrcIdx = unsigned(DiffMask);
2454 } else {
2455 SrcVec = V2;
2456 SrcIdx = unsigned(DiffMask) - NumElts;
2457 }
2458
2459 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2460 // patterns of VEXTRINS in tablegen.
2461 SDValue Extracted = DAG.getNode(
2462 ISD::EXTRACT_VECTOR_ELT, DL, EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2463 SrcVec, DAG.getConstant(SrcIdx, DL, GRLenVT));
2464 SDValue Result =
2465 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, (Base == 0) ? V1 : V2,
2466 Extracted, DAG.getConstant(DiffPos, DL, GRLenVT));
2467
2468 return Result;
2469 };
2470
2471 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2472 if (SDValue Result = tryLowerToExtrAndIns(0))
2473 return Result;
2474 return tryLowerToExtrAndIns(NumElts);
2475}
2476
2477// Check the Mask and then build SrcVec and MaskImm infos which will
2478// be used to build LoongArchISD nodes for VPERMI_W or XVPERMI_W.
2479// On success, return true. Otherwise, return false.
2482 unsigned &MaskImm) {
2483 unsigned MaskSize = Mask.size();
2484
2485 auto isValid = [&](int M, int Off) {
2486 return (M == -1) || (M >= Off && M < Off + 4);
2487 };
2488
2489 auto buildImm = [&](int MLo, int MHi, unsigned Off, unsigned I) {
2490 auto immPart = [&](int M, unsigned Off) {
2491 return (M == -1 ? 0 : (M - Off)) & 0x3;
2492 };
2493 MaskImm |= immPart(MLo, Off) << (I * 2);
2494 MaskImm |= immPart(MHi, Off) << ((I + 1) * 2);
2495 };
2496
2497 for (unsigned i = 0; i < 4; i += 2) {
2498 int MLo = Mask[i];
2499 int MHi = Mask[i + 1];
2500
2501 if (MaskSize == 8) { // Only v8i32/v8f32 need this check.
2502 int M2Lo = Mask[i + 4];
2503 int M2Hi = Mask[i + 5];
2504 if (M2Lo != MLo + 4 || M2Hi != MHi + 4)
2505 return false;
2506 }
2507
2508 if (isValid(MLo, 0) && isValid(MHi, 0)) {
2509 SrcVec.push_back(V1);
2510 buildImm(MLo, MHi, 0, i);
2511 } else if (isValid(MLo, MaskSize) && isValid(MHi, MaskSize)) {
2512 SrcVec.push_back(V2);
2513 buildImm(MLo, MHi, MaskSize, i);
2514 } else {
2515 return false;
2516 }
2517 }
2518
2519 return true;
2520}
2521
2522/// Lower VECTOR_SHUFFLE into VPERMI (if possible).
2523///
2524/// VPERMI selects two elements from each of the two vectors based on the
2525/// mask and places them in the corresponding positions of the result vector
2526/// in order. Only v4i32 and v4f32 types are allowed.
2527///
2528/// It is possible to lower into VPERMI when the mask consists of two of the
2529/// following forms concatenated:
2530/// <i, j, u, v>
2531/// <u, v, i, j>
2532/// where i,j are in [0,4) and u,v are in [4, 8).
2533/// For example:
2534/// <2, 3, 4, 5>
2535/// <5, 7, 0, 2>
2536///
2537/// When undef's appear in the mask they are treated as if they were whatever
2538/// value is necessary in order to fit the above forms.
2540 MVT VT, SDValue V1, SDValue V2,
2541 SelectionDAG &DAG,
2542 const LoongArchSubtarget &Subtarget) {
2543 if ((VT != MVT::v4i32 && VT != MVT::v4f32) ||
2544 Mask.size() != VT.getVectorNumElements())
2545 return SDValue();
2546
2548 unsigned MaskImm = 0;
2549 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2550 return SDValue();
2551
2552 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2553 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2554}
2555
2556/// Lower VECTOR_SHUFFLE into VSHUF.
2557///
2558/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2559/// adding it as an operand to the resulting VSHUF.
2561 MVT VT, SDValue V1, SDValue V2,
2562 SelectionDAG &DAG,
2563 const LoongArchSubtarget &Subtarget) {
2564
2566 for (auto M : Mask)
2567 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2568
2569 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2570 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2571
2572 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2573 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2574 // VSHF concatenates the vectors in a bitwise fashion:
2575 // <0b00, 0b01> + <0b10, 0b11> ->
2576 // 0b0100 + 0b1110 -> 0b01001110
2577 // <0b10, 0b11, 0b00, 0b01>
2578 // We must therefore swap the operands to get the correct result.
2579 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2580}
2581
2582/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2583///
2584/// This routine breaks down the specific type of 128-bit shuffle and
2585/// dispatches to the lowering routines accordingly.
2587 SDValue V1, SDValue V2, SelectionDAG &DAG,
2588 const LoongArchSubtarget &Subtarget) {
2589 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2590 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2591 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2592 "Vector type is unsupported for lsx!");
2594 "Two operands have different types!");
2595 assert(VT.getVectorNumElements() == Mask.size() &&
2596 "Unexpected mask size for shuffle!");
2597 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2598
2599 APInt KnownUndef, KnownZero;
2600 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2601 APInt Zeroable = KnownUndef | KnownZero;
2602
2603 SDValue Result;
2604 // TODO: Add more comparison patterns.
2605 if (V2.isUndef()) {
2606 if ((Result =
2607 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2608 return Result;
2609 if ((Result =
2610 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2611 return Result;
2612 if ((Result =
2613 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2614 return Result;
2615
2616 // TODO: This comment may be enabled in the future to better match the
2617 // pattern for instruction selection.
2618 /* V2 = V1; */
2619 }
2620
2621 // It is recommended not to change the pattern comparison order for better
2622 // performance.
2623 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2624 return Result;
2625 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2626 return Result;
2627 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2628 return Result;
2629 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2630 return Result;
2631 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2632 return Result;
2633 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2634 return Result;
2635 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2636 (Result =
2637 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2638 return Result;
2639 if ((Result =
2640 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2641 return Result;
2642 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2643 Zeroable)))
2644 return Result;
2645 if ((Result =
2646 lowerVECTOR_SHUFFLE_VPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2647 return Result;
2648 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2649 Zeroable)))
2650 return Result;
2651 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2652 Subtarget)))
2653 return Result;
2654 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2655 return NewShuffle;
2656 if ((Result =
2657 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2658 return Result;
2659 return SDValue();
2660}
2661
2662/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2663///
2664/// It is a XVREPLVEI when the mask is:
2665/// <x, x, x, ..., x+n, x+n, x+n, ...>
2666/// where the number of x is equal to n and n is half the length of vector.
2667///
2668/// When undef's appear in the mask they are treated as if they were whatever
2669/// value is necessary in order to fit the above form.
2670static SDValue
2672 SDValue V1, SelectionDAG &DAG,
2673 const LoongArchSubtarget &Subtarget) {
2674 int SplatIndex = -1;
2675 for (const auto &M : Mask) {
2676 if (M != -1) {
2677 SplatIndex = M;
2678 break;
2679 }
2680 }
2681
2682 if (SplatIndex == -1)
2683 return DAG.getUNDEF(VT);
2684
2685 const auto &Begin = Mask.begin();
2686 const auto &End = Mask.end();
2687 int HalfSize = Mask.size() / 2;
2688
2689 if (SplatIndex >= HalfSize)
2690 return SDValue();
2691
2692 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2693 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2694 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2695 0)) {
2696 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2697 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2698 }
2699
2700 return SDValue();
2701}
2702
2703/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2704static SDValue
2706 SDValue V1, SDValue V2, SelectionDAG &DAG,
2707 const LoongArchSubtarget &Subtarget) {
2708 // XVSHUF4I_D must be handled separately because it is different from other
2709 // types of [X]VSHUF4I instructions.
2710 if (Mask.size() == 4) {
2711 unsigned MaskImm = 0;
2712 for (int i = 1; i >= 0; --i) {
2713 int MLo = Mask[i];
2714 int MHi = Mask[i + 2];
2715 if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) ||
2716 !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7)))
2717 return SDValue();
2718 if (MHi != -1 && MLo != -1 && MHi != MLo + 2)
2719 return SDValue();
2720
2721 MaskImm <<= 2;
2722 if (MLo != -1)
2723 MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3;
2724 else if (MHi != -1)
2725 MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3;
2726 }
2727
2728 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2729 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2730 }
2731
2732 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2733}
2734
2735/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2736static SDValue
2738 SDValue V1, SDValue V2, SelectionDAG &DAG,
2739 const LoongArchSubtarget &Subtarget) {
2740 MVT GRLenVT = Subtarget.getGRLenVT();
2741 unsigned MaskSize = Mask.size();
2742 if (MaskSize != VT.getVectorNumElements())
2743 return SDValue();
2744
2745 // Consider XVPERMI_W.
2746 if (VT == MVT::v8i32 || VT == MVT::v8f32) {
2748 unsigned MaskImm = 0;
2749 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2750 return SDValue();
2751
2752 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2753 DAG.getConstant(MaskImm, DL, GRLenVT));
2754 }
2755
2756 // Consider XVPERMI_D.
2757 if (VT == MVT::v4i64 || VT == MVT::v4f64) {
2758 unsigned MaskImm = 0;
2759 for (unsigned i = 0; i < MaskSize; ++i) {
2760 if (Mask[i] == -1)
2761 continue;
2762 if (Mask[i] >= (int)MaskSize)
2763 return SDValue();
2764 MaskImm |= Mask[i] << (i * 2);
2765 }
2766
2767 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2768 DAG.getConstant(MaskImm, DL, GRLenVT));
2769 }
2770
2771 return SDValue();
2772}
2773
2774/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2776 MVT VT, SDValue V1, SelectionDAG &DAG,
2777 const LoongArchSubtarget &Subtarget) {
2778 // LoongArch LASX only have XVPERM_W.
2779 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2780 return SDValue();
2781
2782 unsigned NumElts = VT.getVectorNumElements();
2783 unsigned HalfSize = NumElts / 2;
2784 bool FrontLo = true, FrontHi = true;
2785 bool BackLo = true, BackHi = true;
2786
2787 auto inRange = [](int val, int low, int high) {
2788 return (val == -1) || (val >= low && val < high);
2789 };
2790
2791 for (unsigned i = 0; i < HalfSize; ++i) {
2792 int Fronti = Mask[i];
2793 int Backi = Mask[i + HalfSize];
2794
2795 FrontLo &= inRange(Fronti, 0, HalfSize);
2796 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2797 BackLo &= inRange(Backi, 0, HalfSize);
2798 BackHi &= inRange(Backi, HalfSize, NumElts);
2799 }
2800
2801 // If both the lower and upper 128-bit parts access only one half of the
2802 // vector (either lower or upper), avoid using xvperm.w. The latency of
2803 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2804 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2805 return SDValue();
2806
2808 MVT GRLenVT = Subtarget.getGRLenVT();
2809 for (unsigned i = 0; i < NumElts; ++i)
2810 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2811 : DAG.getConstant(Mask[i], DL, GRLenVT));
2812 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2813
2814 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2815}
2816
2817/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2819 MVT VT, SDValue V1, SDValue V2,
2820 SelectionDAG &DAG) {
2821 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2822}
2823
2824/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2826 MVT VT, SDValue V1, SDValue V2,
2827 SelectionDAG &DAG) {
2828 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2829}
2830
2831/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2833 MVT VT, SDValue V1, SDValue V2,
2834 SelectionDAG &DAG) {
2835
2836 const auto &Begin = Mask.begin();
2837 const auto &End = Mask.end();
2838 unsigned HalfSize = Mask.size() / 2;
2839 unsigned LeftSize = HalfSize / 2;
2840 SDValue OriV1 = V1, OriV2 = V2;
2841
2842 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2843 1) &&
2844 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2845 V1 = OriV1;
2846 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2847 Mask.size() + HalfSize - LeftSize, 1) &&
2848 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2849 Mask.size() + HalfSize + LeftSize, 1))
2850 V1 = OriV2;
2851 else
2852 return SDValue();
2853
2854 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2855 1) &&
2856 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2857 1))
2858 V2 = OriV1;
2859 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2860 Mask.size() + HalfSize - LeftSize, 1) &&
2861 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2862 Mask.size() + HalfSize + LeftSize, 1))
2863 V2 = OriV2;
2864 else
2865 return SDValue();
2866
2867 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2868}
2869
2870/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2872 MVT VT, SDValue V1, SDValue V2,
2873 SelectionDAG &DAG) {
2874
2875 const auto &Begin = Mask.begin();
2876 const auto &End = Mask.end();
2877 unsigned HalfSize = Mask.size() / 2;
2878 SDValue OriV1 = V1, OriV2 = V2;
2879
2880 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2881 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2882 V1 = OriV1;
2883 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2884 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2885 Mask.size() + HalfSize, 1))
2886 V1 = OriV2;
2887 else
2888 return SDValue();
2889
2890 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2891 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2892 V2 = OriV1;
2893 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2894 1) &&
2895 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2896 Mask.size() + HalfSize, 1))
2897 V2 = OriV2;
2898 else
2899 return SDValue();
2900
2901 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2902}
2903
2904/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2906 MVT VT, SDValue V1, SDValue V2,
2907 SelectionDAG &DAG) {
2908
2909 const auto &Begin = Mask.begin();
2910 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2911 const auto &Mid = Mask.begin() + Mask.size() / 2;
2912 const auto &RightMid = Mask.end() - Mask.size() / 4;
2913 const auto &End = Mask.end();
2914 unsigned HalfSize = Mask.size() / 2;
2915 SDValue OriV1 = V1, OriV2 = V2;
2916
2917 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2918 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2919 V1 = OriV1;
2920 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2921 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2922 V1 = OriV2;
2923 else
2924 return SDValue();
2925
2926 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2927 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2928 V2 = OriV1;
2929 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2930 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2931 V2 = OriV2;
2932
2933 else
2934 return SDValue();
2935
2936 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2937}
2938
2939/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2941 MVT VT, SDValue V1, SDValue V2,
2942 SelectionDAG &DAG) {
2943
2944 const auto &Begin = Mask.begin();
2945 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2946 const auto &Mid = Mask.begin() + Mask.size() / 2;
2947 const auto &RightMid = Mask.end() - Mask.size() / 4;
2948 const auto &End = Mask.end();
2949 unsigned HalfSize = Mask.size() / 2;
2950 SDValue OriV1 = V1, OriV2 = V2;
2951
2952 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2953 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2954 V1 = OriV1;
2955 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2956 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2957 2))
2958 V1 = OriV2;
2959 else
2960 return SDValue();
2961
2962 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2963 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2964 V2 = OriV1;
2965 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2966 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2967 2))
2968 V2 = OriV2;
2969 else
2970 return SDValue();
2971
2972 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2973}
2974
2975/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
2976static SDValue
2978 SDValue V1, SDValue V2, SelectionDAG &DAG,
2979 const LoongArchSubtarget &Subtarget) {
2980 int NumElts = VT.getVectorNumElements();
2981 int HalfSize = NumElts / 2;
2982 MVT EltVT = VT.getVectorElementType();
2983 MVT GRLenVT = Subtarget.getGRLenVT();
2984
2985 if ((int)Mask.size() != NumElts)
2986 return SDValue();
2987
2988 auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
2989 SmallVector<int> DiffPos;
2990 for (int i = 0; i < NumElts; ++i) {
2991 if (Mask[i] == -1)
2992 continue;
2993 if (Mask[i] != Base + i) {
2994 DiffPos.push_back(i);
2995 if (DiffPos.size() > 2)
2996 return SDValue();
2997 }
2998 }
2999
3000 // Need exactly two differing element to lower into XVEXTRINS.
3001 // If only one differing element, the element at a distance of
3002 // HalfSize from it must be undef.
3003 if (DiffPos.size() == 1) {
3004 if (DiffPos[0] < HalfSize && Mask[DiffPos[0] + HalfSize] == -1)
3005 DiffPos.push_back(DiffPos[0] + HalfSize);
3006 else if (DiffPos[0] >= HalfSize && Mask[DiffPos[0] - HalfSize] == -1)
3007 DiffPos.insert(DiffPos.begin(), DiffPos[0] - HalfSize);
3008 else
3009 return SDValue();
3010 }
3011 if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + HalfSize)
3012 return SDValue();
3013
3014 // DiffMask must be in its low or high part.
3015 int DiffMaskLo = Mask[DiffPos[0]];
3016 int DiffMaskHi = Mask[DiffPos[1]];
3017 DiffMaskLo = DiffMaskLo == -1 ? DiffMaskHi - HalfSize : DiffMaskLo;
3018 DiffMaskHi = DiffMaskHi == -1 ? DiffMaskLo + HalfSize : DiffMaskHi;
3019 if (!(DiffMaskLo >= 0 && DiffMaskLo < HalfSize) &&
3020 !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + HalfSize))
3021 return SDValue();
3022 if (!(DiffMaskHi >= HalfSize && DiffMaskHi < NumElts) &&
3023 !(DiffMaskHi >= NumElts + HalfSize && DiffMaskHi < 2 * NumElts))
3024 return SDValue();
3025 if (DiffMaskHi != DiffMaskLo + HalfSize)
3026 return SDValue();
3027
3028 // Determine source vector and source index.
3029 SDValue SrcVec = (DiffMaskLo < HalfSize) ? V1 : V2;
3030 int SrcIdxLo =
3031 (DiffMaskLo < HalfSize) ? DiffMaskLo : (DiffMaskLo - NumElts);
3032 bool IsEltFP = EltVT.isFloatingPoint();
3033
3034 // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
3035 // the patterns of XVEXTRINS in tablegen.
3036 SDValue BaseVec = (Base == 0) ? V1 : V2;
3037 SDValue EltLo =
3038 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3039 SrcVec, DAG.getConstant(SrcIdxLo, DL, GRLenVT));
3040 SDValue InsLo = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, BaseVec, EltLo,
3041 DAG.getConstant(DiffPos[0], DL, GRLenVT));
3042 SDValue EltHi =
3043 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3044 SrcVec, DAG.getConstant(SrcIdxLo + HalfSize, DL, GRLenVT));
3045 SDValue Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsLo, EltHi,
3046 DAG.getConstant(DiffPos[1], DL, GRLenVT));
3047
3048 return Result;
3049 };
3050
3051 // Try [0, n-1) insertion then [n, 2n-1) insertion.
3052 if (SDValue Result = tryLowerToExtrAndIns(0))
3053 return Result;
3054 return tryLowerToExtrAndIns(NumElts);
3055}
3056
3057/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
3058static SDValue
3060 SDValue V1, SDValue V2, SelectionDAG &DAG,
3061 const LoongArchSubtarget &Subtarget) {
3062 // LoongArch LASX only supports xvinsve0.{w/d}.
3063 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
3064 VT != MVT::v4f64)
3065 return SDValue();
3066
3067 MVT GRLenVT = Subtarget.getGRLenVT();
3068 int MaskSize = Mask.size();
3069 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
3070
3071 // Check if exactly one element of the Mask is replaced by 'Replaced', while
3072 // all other elements are either 'Base + i' or undef (-1). On success, return
3073 // the index of the replaced element. Otherwise, just return -1.
3074 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
3075 int Idx = -1;
3076 for (int i = 0; i < MaskSize; ++i) {
3077 if (Mask[i] == Base + i || Mask[i] == -1)
3078 continue;
3079 if (Mask[i] != Replaced)
3080 return -1;
3081 if (Idx == -1)
3082 Idx = i;
3083 else
3084 return -1;
3085 }
3086 return Idx;
3087 };
3088
3089 // Case 1: the lowest element of V2 replaces one element in V1.
3090 int Idx = checkReplaceOne(0, MaskSize);
3091 if (Idx != -1)
3092 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
3093 DAG.getConstant(Idx, DL, GRLenVT));
3094
3095 // Case 2: the lowest element of V1 replaces one element in V2.
3096 Idx = checkReplaceOne(MaskSize, 0);
3097 if (Idx != -1)
3098 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
3099 DAG.getConstant(Idx, DL, GRLenVT));
3100
3101 return SDValue();
3102}
3103
3104/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
3106 MVT VT, SDValue V1, SDValue V2,
3107 SelectionDAG &DAG) {
3108
3109 int MaskSize = Mask.size();
3110 int HalfSize = Mask.size() / 2;
3111 const auto &Begin = Mask.begin();
3112 const auto &Mid = Mask.begin() + HalfSize;
3113 const auto &End = Mask.end();
3114
3115 // VECTOR_SHUFFLE concatenates the vectors:
3116 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
3117 // shuffling ->
3118 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
3119 //
3120 // XVSHUF concatenates the vectors:
3121 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
3122 // shuffling ->
3123 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
3124 SmallVector<SDValue, 8> MaskAlloc;
3125 for (auto it = Begin; it < Mid; it++) {
3126 if (*it < 0) // UNDEF
3127 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3128 else if ((*it >= 0 && *it < HalfSize) ||
3129 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
3130 int M = *it < HalfSize ? *it : *it - HalfSize;
3131 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3132 } else
3133 return SDValue();
3134 }
3135 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
3136
3137 for (auto it = Mid; it < End; it++) {
3138 if (*it < 0) // UNDEF
3139 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3140 else if ((*it >= HalfSize && *it < MaskSize) ||
3141 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
3142 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
3143 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3144 } else
3145 return SDValue();
3146 }
3147 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
3148
3149 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
3150 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
3151 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
3152}
3153
3154/// Shuffle vectors by lane to generate more optimized instructions.
3155/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
3156///
3157/// Therefore, except for the following four cases, other cases are regarded
3158/// as cross-lane shuffles, where optimization is relatively limited.
3159///
3160/// - Shuffle high, low lanes of two inputs vector
3161/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
3162/// - Shuffle low, high lanes of two inputs vector
3163/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
3164/// - Shuffle low, low lanes of two inputs vector
3165/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
3166/// - Shuffle high, high lanes of two inputs vector
3167/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
3168///
3169/// The first case is the closest to LoongArch instructions and the other
3170/// cases need to be converted to it for processing.
3171///
3172/// This function will return true for the last three cases above and will
3173/// modify V1, V2 and Mask. Otherwise, return false for the first case and
3174/// cross-lane shuffle cases.
3176 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
3177 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
3178
3179 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
3180
3181 int MaskSize = Mask.size();
3182 int HalfSize = Mask.size() / 2;
3183 MVT GRLenVT = Subtarget.getGRLenVT();
3184
3185 HalfMaskType preMask = None, postMask = None;
3186
3187 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3188 return M < 0 || (M >= 0 && M < HalfSize) ||
3189 (M >= MaskSize && M < MaskSize + HalfSize);
3190 }))
3191 preMask = HighLaneTy;
3192 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3193 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3194 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3195 }))
3196 preMask = LowLaneTy;
3197
3198 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3199 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3200 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3201 }))
3202 postMask = LowLaneTy;
3203 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3204 return M < 0 || (M >= 0 && M < HalfSize) ||
3205 (M >= MaskSize && M < MaskSize + HalfSize);
3206 }))
3207 postMask = HighLaneTy;
3208
3209 // The pre-half of mask is high lane type, and the post-half of mask
3210 // is low lane type, which is closest to the LoongArch instructions.
3211 //
3212 // Note: In the LoongArch architecture, the high lane of mask corresponds
3213 // to the lower 128-bit of vector register, and the low lane of mask
3214 // corresponds the higher 128-bit of vector register.
3215 if (preMask == HighLaneTy && postMask == LowLaneTy) {
3216 return false;
3217 }
3218 if (preMask == LowLaneTy && postMask == HighLaneTy) {
3219 V1 = DAG.getBitcast(MVT::v4i64, V1);
3220 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3221 DAG.getConstant(0b01001110, DL, GRLenVT));
3222 V1 = DAG.getBitcast(VT, V1);
3223
3224 if (!V2.isUndef()) {
3225 V2 = DAG.getBitcast(MVT::v4i64, V2);
3226 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3227 DAG.getConstant(0b01001110, DL, GRLenVT));
3228 V2 = DAG.getBitcast(VT, V2);
3229 }
3230
3231 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3232 *it = *it < 0 ? *it : *it - HalfSize;
3233 }
3234 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3235 *it = *it < 0 ? *it : *it + HalfSize;
3236 }
3237 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
3238 V1 = DAG.getBitcast(MVT::v4i64, V1);
3239 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3240 DAG.getConstant(0b11101110, DL, GRLenVT));
3241 V1 = DAG.getBitcast(VT, V1);
3242
3243 if (!V2.isUndef()) {
3244 V2 = DAG.getBitcast(MVT::v4i64, V2);
3245 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3246 DAG.getConstant(0b11101110, DL, GRLenVT));
3247 V2 = DAG.getBitcast(VT, V2);
3248 }
3249
3250 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3251 *it = *it < 0 ? *it : *it - HalfSize;
3252 }
3253 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
3254 V1 = DAG.getBitcast(MVT::v4i64, V1);
3255 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3256 DAG.getConstant(0b01000100, DL, GRLenVT));
3257 V1 = DAG.getBitcast(VT, V1);
3258
3259 if (!V2.isUndef()) {
3260 V2 = DAG.getBitcast(MVT::v4i64, V2);
3261 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3262 DAG.getConstant(0b01000100, DL, GRLenVT));
3263 V2 = DAG.getBitcast(VT, V2);
3264 }
3265
3266 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3267 *it = *it < 0 ? *it : *it + HalfSize;
3268 }
3269 } else { // cross-lane
3270 return false;
3271 }
3272
3273 return true;
3274}
3275
3276/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
3277/// Only for 256-bit vector.
3278///
3279/// For example:
3280/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
3281/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
3282/// is lowerded to:
3283/// (XVPERMI $xr2, $xr0, 78)
3284/// (XVSHUF $xr1, $xr2, $xr0)
3285/// (XVORI $xr0, $xr1, 0)
3287 ArrayRef<int> Mask,
3288 MVT VT, SDValue V1,
3289 SDValue V2,
3290 SelectionDAG &DAG) {
3291 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
3292 int Size = Mask.size();
3293 int LaneSize = Size / 2;
3294
3295 bool LaneCrossing[2] = {false, false};
3296 for (int i = 0; i < Size; ++i)
3297 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
3298 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
3299
3300 // Ensure that all lanes ared involved.
3301 if (!LaneCrossing[0] && !LaneCrossing[1])
3302 return SDValue();
3303
3304 SmallVector<int> InLaneMask;
3305 InLaneMask.assign(Mask.begin(), Mask.end());
3306 for (int i = 0; i < Size; ++i) {
3307 int &M = InLaneMask[i];
3308 if (M < 0)
3309 continue;
3310 if (((M % Size) / LaneSize) != (i / LaneSize))
3311 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
3312 }
3313
3314 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
3315 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
3316 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
3317 Flipped = DAG.getBitcast(VT, Flipped);
3318 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
3319}
3320
3321/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
3322///
3323/// This routine breaks down the specific type of 256-bit shuffle and
3324/// dispatches to the lowering routines accordingly.
3326 SDValue V1, SDValue V2, SelectionDAG &DAG,
3327 const LoongArchSubtarget &Subtarget) {
3328 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
3329 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
3330 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
3331 "Vector type is unsupported for lasx!");
3333 "Two operands have different types!");
3334 assert(VT.getVectorNumElements() == Mask.size() &&
3335 "Unexpected mask size for shuffle!");
3336 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
3337 assert(Mask.size() >= 4 && "Mask size is less than 4.");
3338
3339 APInt KnownUndef, KnownZero;
3340 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
3341 APInt Zeroable = KnownUndef | KnownZero;
3342
3343 SDValue Result;
3344 // TODO: Add more comparison patterns.
3345 if (V2.isUndef()) {
3346 if ((Result =
3347 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
3348 return Result;
3349 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
3350 Subtarget)))
3351 return Result;
3352 // Try to widen vectors to gain more optimization opportunities.
3353 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
3354 return NewShuffle;
3355 if ((Result =
3356 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3357 return Result;
3358 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
3359 return Result;
3360 if ((Result =
3361 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
3362 return Result;
3363
3364 // TODO: This comment may be enabled in the future to better match the
3365 // pattern for instruction selection.
3366 /* V2 = V1; */
3367 }
3368
3369 // It is recommended not to change the pattern comparison order for better
3370 // performance.
3371 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
3372 return Result;
3373 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
3374 return Result;
3375 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
3376 return Result;
3377 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
3378 return Result;
3379 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
3380 return Result;
3381 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
3382 return Result;
3383 if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
3384 (Result =
3385 lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3386 return Result;
3387 if ((Result =
3388 lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3389 return Result;
3390 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
3391 Zeroable)))
3392 return Result;
3393 if ((Result =
3394 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3395 return Result;
3396 if ((Result =
3397 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3398 return Result;
3399 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
3400 Subtarget)))
3401 return Result;
3402
3403 // canonicalize non cross-lane shuffle vector
3404 SmallVector<int> NewMask(Mask);
3405 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
3406 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
3407
3408 // FIXME: Handling the remaining cases earlier can degrade performance
3409 // in some situations. Further analysis is required to enable more
3410 // effective optimizations.
3411 if (V2.isUndef()) {
3412 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
3413 V1, V2, DAG)))
3414 return Result;
3415 }
3416
3417 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
3418 return NewShuffle;
3419 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
3420 return Result;
3421
3422 return SDValue();
3423}
3424
3425SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3426 SelectionDAG &DAG) const {
3427 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
3428 ArrayRef<int> OrigMask = SVOp->getMask();
3429 SDValue V1 = Op.getOperand(0);
3430 SDValue V2 = Op.getOperand(1);
3431 MVT VT = Op.getSimpleValueType();
3432 int NumElements = VT.getVectorNumElements();
3433 SDLoc DL(Op);
3434
3435 bool V1IsUndef = V1.isUndef();
3436 bool V2IsUndef = V2.isUndef();
3437 if (V1IsUndef && V2IsUndef)
3438 return DAG.getUNDEF(VT);
3439
3440 // When we create a shuffle node we put the UNDEF node to second operand,
3441 // but in some cases the first operand may be transformed to UNDEF.
3442 // In this case we should just commute the node.
3443 if (V1IsUndef)
3444 return DAG.getCommutedVectorShuffle(*SVOp);
3445
3446 // Check for non-undef masks pointing at an undef vector and make the masks
3447 // undef as well. This makes it easier to match the shuffle based solely on
3448 // the mask.
3449 if (V2IsUndef &&
3450 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
3451 SmallVector<int, 8> NewMask(OrigMask);
3452 for (int &M : NewMask)
3453 if (M >= NumElements)
3454 M = -1;
3455 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
3456 }
3457
3458 // Check for illegal shuffle mask element index values.
3459 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3460 (void)MaskUpperLimit;
3461 assert(llvm::all_of(OrigMask,
3462 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3463 "Out of bounds shuffle index");
3464
3465 // For each vector width, delegate to a specialized lowering routine.
3466 if (VT.is128BitVector())
3467 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3468
3469 if (VT.is256BitVector())
3470 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3471
3472 return SDValue();
3473}
3474
3475SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3476 SelectionDAG &DAG) const {
3477 // Custom lower to ensure the libcall return is passed in an FPR on hard
3478 // float ABIs.
3479 SDLoc DL(Op);
3480 MakeLibCallOptions CallOptions;
3481 SDValue Op0 = Op.getOperand(0);
3482 SDValue Chain = SDValue();
3483 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
3484 SDValue Res;
3485 std::tie(Res, Chain) =
3486 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
3487 if (Subtarget.is64Bit())
3488 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3489 return DAG.getBitcast(MVT::i32, Res);
3490}
3491
3492SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3493 SelectionDAG &DAG) const {
3494 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3495 // float ABIs.
3496 SDLoc DL(Op);
3497 MakeLibCallOptions CallOptions;
3498 SDValue Op0 = Op.getOperand(0);
3499 SDValue Chain = SDValue();
3500 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3501 DL, MVT::f32, Op0)
3502 : DAG.getBitcast(MVT::f32, Op0);
3503 SDValue Res;
3504 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
3505 CallOptions, DL, Chain);
3506 return Res;
3507}
3508
3509SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3510 SelectionDAG &DAG) const {
3511 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3512 SDLoc DL(Op);
3513 MakeLibCallOptions CallOptions;
3514 RTLIB::Libcall LC =
3515 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
3516 SDValue Res =
3517 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
3518 if (Subtarget.is64Bit())
3519 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3520 return DAG.getBitcast(MVT::i32, Res);
3521}
3522
3523SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3524 SelectionDAG &DAG) const {
3525 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3526 MVT VT = Op.getSimpleValueType();
3527 SDLoc DL(Op);
3528 Op = DAG.getNode(
3529 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
3530 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3531 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3532 DL, MVT::f32, Op)
3533 : DAG.getBitcast(MVT::f32, Op);
3534 if (VT != MVT::f32)
3535 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3536 return Res;
3537}
3538
3539// Lower BUILD_VECTOR as broadcast load (if possible).
3540// For example:
3541// %a = load i8, ptr %ptr
3542// %b = build_vector %a, %a, %a, %a
3543// is lowered to :
3544// (VLDREPL_B $a0, 0)
3546 const SDLoc &DL,
3547 SelectionDAG &DAG) {
3548 MVT VT = BVOp->getSimpleValueType(0);
3549 int NumOps = BVOp->getNumOperands();
3550
3551 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3552 "Unsupported vector type for broadcast.");
3553
3554 SDValue IdentitySrc;
3555 bool IsIdeneity = true;
3556
3557 for (int i = 0; i != NumOps; i++) {
3558 SDValue Op = BVOp->getOperand(i);
3559 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3560 IsIdeneity = false;
3561 break;
3562 }
3563 IdentitySrc = BVOp->getOperand(0);
3564 }
3565
3566 // make sure that this load is valid and only has one user.
3567 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3568 return SDValue();
3569
3570 auto *LN = cast<LoadSDNode>(IdentitySrc);
3571 auto ExtType = LN->getExtensionType();
3572
3573 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3574 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3575 // Indexed loads and stores are not supported on LoongArch.
3576 assert(LN->isUnindexed() && "Unexpected indexed load.");
3577
3578 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3579 // The offset operand of unindexed load is always undefined, so there is
3580 // no need to pass it to VLDREPL.
3581 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3582 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3583 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3584 return BCast;
3585 }
3586 return SDValue();
3587}
3588
3589// Sequentially insert elements from Ops into Vector, from low to high indices.
3590// Note: Ops can have fewer elements than Vector.
3592 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3593 EVT ResTy) {
3594 assert(Ops.size() <= ResTy.getVectorNumElements());
3595
3596 SDValue Op0 = Ops[0];
3597 if (!Op0.isUndef())
3598 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3599 for (unsigned i = 1; i < Ops.size(); ++i) {
3600 SDValue Opi = Ops[i];
3601 if (Opi.isUndef())
3602 continue;
3603 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3604 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3605 }
3606}
3607
3608// Build a ResTy subvector from Node, taking NumElts elements starting at index
3609// 'first'.
3611 SelectionDAG &DAG, SDLoc DL,
3612 const LoongArchSubtarget &Subtarget,
3613 EVT ResTy, unsigned first) {
3614 unsigned NumElts = ResTy.getVectorNumElements();
3615
3616 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3617
3618 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3619 Node->op_begin() + first + NumElts);
3620 SDValue Vector = DAG.getUNDEF(ResTy);
3621 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3622 return Vector;
3623}
3624
3625SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3626 SelectionDAG &DAG) const {
3627 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3628 MVT VT = Node->getSimpleValueType(0);
3629 EVT ResTy = Op->getValueType(0);
3630 unsigned NumElts = ResTy.getVectorNumElements();
3631 SDLoc DL(Op);
3632 APInt SplatValue, SplatUndef;
3633 unsigned SplatBitSize;
3634 bool HasAnyUndefs;
3635 bool IsConstant = false;
3636 bool UseSameConstant = true;
3637 SDValue ConstantValue;
3638 bool Is128Vec = ResTy.is128BitVector();
3639 bool Is256Vec = ResTy.is256BitVector();
3640
3641 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3642 (!Subtarget.hasExtLASX() || !Is256Vec))
3643 return SDValue();
3644
3645 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3646 return Result;
3647
3648 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3649 /*MinSplatBits=*/8) &&
3650 SplatBitSize <= 64) {
3651 // We can only cope with 8, 16, 32, or 64-bit elements.
3652 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3653 SplatBitSize != 64)
3654 return SDValue();
3655
3656 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3657 // We can only handle 64-bit elements that are within
3658 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3659 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3660 if (!SplatValue.isSignedIntN(10) &&
3661 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3662 return SDValue();
3663 if ((Is128Vec && ResTy == MVT::v4i32) ||
3664 (Is256Vec && ResTy == MVT::v8i32))
3665 return Op;
3666 }
3667
3668 EVT ViaVecTy;
3669
3670 switch (SplatBitSize) {
3671 default:
3672 return SDValue();
3673 case 8:
3674 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3675 break;
3676 case 16:
3677 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3678 break;
3679 case 32:
3680 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3681 break;
3682 case 64:
3683 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3684 break;
3685 }
3686
3687 // SelectionDAG::getConstant will promote SplatValue appropriately.
3688 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3689
3690 // Bitcast to the type we originally wanted.
3691 if (ViaVecTy != ResTy)
3692 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3693
3694 return Result;
3695 }
3696
3697 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3698 return Op;
3699
3700 for (unsigned i = 0; i < NumElts; ++i) {
3701 SDValue Opi = Node->getOperand(i);
3702 if (isIntOrFPConstant(Opi)) {
3703 IsConstant = true;
3704 if (!ConstantValue.getNode())
3705 ConstantValue = Opi;
3706 else if (ConstantValue != Opi)
3707 UseSameConstant = false;
3708 }
3709 }
3710
3711 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3712 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3713 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3714 for (unsigned i = 0; i < NumElts; ++i) {
3715 SDValue Opi = Node->getOperand(i);
3716 if (!isIntOrFPConstant(Opi))
3717 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3718 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3719 }
3720 return Result;
3721 }
3722
3723 if (!IsConstant) {
3724 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3725 // the sub-sequence of the vector and then broadcast the sub-sequence.
3726 //
3727 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3728 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3729 // generates worse code in some cases. This could be further optimized
3730 // with more consideration.
3732 BitVector UndefElements;
3733 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3734 UndefElements.count() == 0) {
3735 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3736 // because the high part can be simply treated as undef.
3737 SDValue Vector = DAG.getUNDEF(ResTy);
3738 EVT FillTy = Is256Vec
3740 : ResTy;
3741 SDValue FillVec =
3742 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3743
3744 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3745
3746 unsigned SeqLen = Sequence.size();
3747 unsigned SplatLen = NumElts / SeqLen;
3748 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3749 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3750
3751 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3752 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3753 if (SplatEltTy == MVT::i128)
3754 SplatTy = MVT::v4i64;
3755
3756 SDValue SplatVec;
3757 SDValue SrcVec = DAG.getBitcast(
3758 SplatTy,
3759 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3760 if (Is256Vec) {
3761 SplatVec =
3762 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3763 : LoongArchISD::XVREPLVE0,
3764 DL, SplatTy, SrcVec);
3765 } else {
3766 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3767 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3768 }
3769
3770 return DAG.getBitcast(ResTy, SplatVec);
3771 }
3772
3773 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3774 // using memory operations is much lower.
3775 //
3776 // For 256-bit vectors, normally split into two halves and concatenate.
3777 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3778 // one non-undef element, skip spliting to avoid a worse result.
3779 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3780 ResTy == MVT::v4f64) {
3781 unsigned NonUndefCount = 0;
3782 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3783 if (!Node->getOperand(i).isUndef()) {
3784 ++NonUndefCount;
3785 if (NonUndefCount > 1)
3786 break;
3787 }
3788 }
3789 if (NonUndefCount == 1)
3790 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3791 }
3792
3793 EVT VecTy =
3794 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3795 SDValue Vector =
3796 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3797
3798 if (Is128Vec)
3799 return Vector;
3800
3801 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3802 VecTy, NumElts / 2);
3803
3804 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3805 }
3806
3807 return SDValue();
3808}
3809
3810SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3811 SelectionDAG &DAG) const {
3812 SDLoc DL(Op);
3813 MVT ResVT = Op.getSimpleValueType();
3814 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3815
3816 unsigned NumOperands = Op.getNumOperands();
3817 unsigned NumFreezeUndef = 0;
3818 unsigned NumZero = 0;
3819 unsigned NumNonZero = 0;
3820 unsigned NonZeros = 0;
3821 SmallSet<SDValue, 4> Undefs;
3822 for (unsigned i = 0; i != NumOperands; ++i) {
3823 SDValue SubVec = Op.getOperand(i);
3824 if (SubVec.isUndef())
3825 continue;
3826 if (ISD::isFreezeUndef(SubVec.getNode())) {
3827 // If the freeze(undef) has multiple uses then we must fold to zero.
3828 if (SubVec.hasOneUse()) {
3829 ++NumFreezeUndef;
3830 } else {
3831 ++NumZero;
3832 Undefs.insert(SubVec);
3833 }
3834 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3835 ++NumZero;
3836 else {
3837 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3838 NonZeros |= 1 << i;
3839 ++NumNonZero;
3840 }
3841 }
3842
3843 // If we have more than 2 non-zeros, build each half separately.
3844 if (NumNonZero > 2) {
3845 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3846 ArrayRef<SDUse> Ops = Op->ops();
3847 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3848 Ops.slice(0, NumOperands / 2));
3849 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3850 Ops.slice(NumOperands / 2));
3851 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3852 }
3853
3854 // Otherwise, build it up through insert_subvectors.
3855 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3856 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3857 : DAG.getUNDEF(ResVT));
3858
3859 // Replace Undef operands with ZeroVector.
3860 for (SDValue U : Undefs)
3861 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3862
3863 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3864 unsigned NumSubElems = SubVT.getVectorNumElements();
3865 for (unsigned i = 0; i != NumOperands; ++i) {
3866 if ((NonZeros & (1 << i)) == 0)
3867 continue;
3868
3869 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3870 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3871 }
3872
3873 return Vec;
3874}
3875
3876SDValue
3877LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3878 SelectionDAG &DAG) const {
3879 MVT EltVT = Op.getSimpleValueType();
3880 SDValue Vec = Op->getOperand(0);
3881 EVT VecTy = Vec->getValueType(0);
3882 SDValue Idx = Op->getOperand(1);
3883 SDLoc DL(Op);
3884 MVT GRLenVT = Subtarget.getGRLenVT();
3885
3886 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3887
3888 if (isa<ConstantSDNode>(Idx))
3889 return Op;
3890
3891 switch (VecTy.getSimpleVT().SimpleTy) {
3892 default:
3893 llvm_unreachable("Unexpected type");
3894 case MVT::v32i8:
3895 case MVT::v16i16:
3896 case MVT::v4i64:
3897 case MVT::v4f64: {
3898 // Extract the high half subvector and place it to the low half of a new
3899 // vector. It doesn't matter what the high half of the new vector is.
3900 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3901 SDValue VecHi =
3902 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3903 SDValue TmpVec =
3904 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3905 VecHi, DAG.getConstant(0, DL, GRLenVT));
3906
3907 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3908 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3909 // desired element.
3910 SDValue IdxCp =
3911 Subtarget.is64Bit()
3912 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3913 : DAG.getBitcast(MVT::f32, Idx);
3914 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3915 SDValue MaskVec =
3916 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3917 SDValue ResVec =
3918 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3919
3920 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3921 DAG.getConstant(0, DL, GRLenVT));
3922 }
3923 case MVT::v8i32:
3924 case MVT::v8f32: {
3925 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3926 SDValue SplatValue =
3927 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3928
3929 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3930 DAG.getConstant(0, DL, GRLenVT));
3931 }
3932 }
3933}
3934
3935SDValue
3936LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3937 SelectionDAG &DAG) const {
3938 MVT VT = Op.getSimpleValueType();
3939 MVT EltVT = VT.getVectorElementType();
3940 unsigned NumElts = VT.getVectorNumElements();
3941 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3942 SDLoc DL(Op);
3943 SDValue Op0 = Op.getOperand(0);
3944 SDValue Op1 = Op.getOperand(1);
3945 SDValue Op2 = Op.getOperand(2);
3946
3947 if (isa<ConstantSDNode>(Op2))
3948 return Op;
3949
3950 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3951 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3952
3953 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3954 return SDValue();
3955
3956 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3957 SmallVector<SDValue, 32> RawIndices;
3958 SDValue SplatIdx;
3959 SDValue Indices;
3960
3961 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3962 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3963 for (unsigned i = 0; i < NumElts; ++i) {
3964 RawIndices.push_back(Op2);
3965 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3966 }
3967 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3968 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3969
3970 RawIndices.clear();
3971 for (unsigned i = 0; i < NumElts; ++i) {
3972 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3973 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3974 }
3975 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3976 Indices = DAG.getBitcast(IdxVTy, Indices);
3977 } else {
3978 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3979
3980 for (unsigned i = 0; i < NumElts; ++i)
3981 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3982 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3983 }
3984
3985 // insert vec, elt, idx
3986 // =>
3987 // select (splatidx == {0,1,2...}) ? splatelt : vec
3988 SDValue SelectCC =
3989 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3990 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3991}
3992
3993SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3994 SelectionDAG &DAG) const {
3995 SDLoc DL(Op);
3996 SyncScope::ID FenceSSID =
3997 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3998
3999 // singlethread fences only synchronize with signal handlers on the same
4000 // thread and thus only need to preserve instruction order, not actually
4001 // enforce memory ordering.
4002 if (FenceSSID == SyncScope::SingleThread)
4003 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4004 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4005
4006 return Op;
4007}
4008
4009SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
4010 SelectionDAG &DAG) const {
4011
4012 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
4013 DAG.getContext()->emitError(
4014 "On LA64, only 64-bit registers can be written.");
4015 return Op.getOperand(0);
4016 }
4017
4018 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
4019 DAG.getContext()->emitError(
4020 "On LA32, only 32-bit registers can be written.");
4021 return Op.getOperand(0);
4022 }
4023
4024 return Op;
4025}
4026
4027SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
4028 SelectionDAG &DAG) const {
4029 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
4030 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
4031 "be a constant integer");
4032 return SDValue();
4033 }
4034
4035 MachineFunction &MF = DAG.getMachineFunction();
4037 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
4038 EVT VT = Op.getValueType();
4039 SDLoc DL(Op);
4040 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
4041 unsigned Depth = Op.getConstantOperandVal(0);
4042 int GRLenInBytes = Subtarget.getGRLen() / 8;
4043
4044 while (Depth--) {
4045 int Offset = -(GRLenInBytes * 2);
4046 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
4047 DAG.getSignedConstant(Offset, DL, VT));
4048 FrameAddr =
4049 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
4050 }
4051 return FrameAddr;
4052}
4053
4054SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
4055 SelectionDAG &DAG) const {
4056 // Currently only support lowering return address for current frame.
4057 if (Op.getConstantOperandVal(0) != 0) {
4058 DAG.getContext()->emitError(
4059 "return address can only be determined for the current frame");
4060 return SDValue();
4061 }
4062
4063 MachineFunction &MF = DAG.getMachineFunction();
4065 MVT GRLenVT = Subtarget.getGRLenVT();
4066
4067 // Return the value of the return address register, marking it an implicit
4068 // live-in.
4069 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
4070 getRegClassFor(GRLenVT));
4071 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
4072}
4073
4074SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
4075 SelectionDAG &DAG) const {
4076 MachineFunction &MF = DAG.getMachineFunction();
4077 auto Size = Subtarget.getGRLen() / 8;
4078 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
4079 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4080}
4081
4082SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
4083 SelectionDAG &DAG) const {
4084 MachineFunction &MF = DAG.getMachineFunction();
4085 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
4086
4087 SDLoc DL(Op);
4088 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4090
4091 // vastart just stores the address of the VarArgsFrameIndex slot into the
4092 // memory location argument.
4093 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4094 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
4095 MachinePointerInfo(SV));
4096}
4097
4098SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
4099 SelectionDAG &DAG) const {
4100 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4101 !Subtarget.hasBasicD() && "unexpected target features");
4102
4103 SDLoc DL(Op);
4104 SDValue Op0 = Op.getOperand(0);
4105 if (Op0->getOpcode() == ISD::AND) {
4106 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
4107 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
4108 return Op;
4109 }
4110
4111 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
4112 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
4113 Op0.getConstantOperandVal(2) == UINT64_C(0))
4114 return Op;
4115
4116 if (Op0.getOpcode() == ISD::AssertZext &&
4117 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
4118 return Op;
4119
4120 EVT OpVT = Op0.getValueType();
4121 EVT RetVT = Op.getValueType();
4122 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
4123 MakeLibCallOptions CallOptions;
4124 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4125 SDValue Chain = SDValue();
4127 std::tie(Result, Chain) =
4128 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4129 return Result;
4130}
4131
4132SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
4133 SelectionDAG &DAG) const {
4134 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4135 !Subtarget.hasBasicD() && "unexpected target features");
4136
4137 SDLoc DL(Op);
4138 SDValue Op0 = Op.getOperand(0);
4139
4140 if ((Op0.getOpcode() == ISD::AssertSext ||
4142 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
4143 return Op;
4144
4145 EVT OpVT = Op0.getValueType();
4146 EVT RetVT = Op.getValueType();
4147 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
4148 MakeLibCallOptions CallOptions;
4149 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4150 SDValue Chain = SDValue();
4152 std::tie(Result, Chain) =
4153 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4154 return Result;
4155}
4156
4157SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
4158 SelectionDAG &DAG) const {
4159
4160 SDLoc DL(Op);
4161 EVT VT = Op.getValueType();
4162 SDValue Op0 = Op.getOperand(0);
4163 EVT Op0VT = Op0.getValueType();
4164
4165 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
4166 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
4167 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4168 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
4169 }
4170 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
4171 SDValue Lo, Hi;
4172 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
4173 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
4174 }
4175 return Op;
4176}
4177
4178SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
4179 SelectionDAG &DAG) const {
4180
4181 SDLoc DL(Op);
4182 SDValue Op0 = Op.getOperand(0);
4183
4184 if (Op0.getValueType() == MVT::f16)
4185 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
4186
4187 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
4188 !Subtarget.hasBasicD()) {
4189 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
4190 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
4191 }
4192
4193 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
4194 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
4195 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
4196}
4197
4199 SelectionDAG &DAG, unsigned Flags) {
4200 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
4201}
4202
4204 SelectionDAG &DAG, unsigned Flags) {
4205 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
4206 Flags);
4207}
4208
4210 SelectionDAG &DAG, unsigned Flags) {
4211 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
4212 N->getOffset(), Flags);
4213}
4214
4216 SelectionDAG &DAG, unsigned Flags) {
4217 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
4218}
4219
4220template <class NodeTy>
4221SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4223 bool IsLocal) const {
4224 SDLoc DL(N);
4225 EVT Ty = getPointerTy(DAG.getDataLayout());
4226 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4227 SDValue Load;
4228
4229 switch (M) {
4230 default:
4231 report_fatal_error("Unsupported code model");
4232
4233 case CodeModel::Large: {
4234 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4235
4236 // This is not actually used, but is necessary for successfully matching
4237 // the PseudoLA_*_LARGE nodes.
4238 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4239 if (IsLocal) {
4240 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
4241 // eventually becomes the desired 5-insn code sequence.
4242 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
4243 Tmp, Addr),
4244 0);
4245 } else {
4246 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
4247 // eventually becomes the desired 5-insn code sequence.
4248 Load = SDValue(
4249 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
4250 0);
4251 }
4252 break;
4253 }
4254
4255 case CodeModel::Small:
4256 case CodeModel::Medium:
4257 if (IsLocal) {
4258 // This generates the pattern (PseudoLA_PCREL sym), which
4259 //
4260 // for la32r expands to:
4261 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4262 //
4263 // for la32s and la64 expands to:
4264 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
4265 Load = SDValue(
4266 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
4267 } else {
4268 // This generates the pattern (PseudoLA_GOT sym), which
4269 //
4270 // for la32r expands to:
4271 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4272 //
4273 // for la32s and la64 expands to:
4274 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
4275 Load =
4276 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
4277 }
4278 }
4279
4280 if (!IsLocal) {
4281 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4282 MachineFunction &MF = DAG.getMachineFunction();
4283 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4287 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4288 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
4289 }
4290
4291 return Load;
4292}
4293
4294SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
4295 SelectionDAG &DAG) const {
4296 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
4297 DAG.getTarget().getCodeModel());
4298}
4299
4300SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
4301 SelectionDAG &DAG) const {
4302 return getAddr(cast<JumpTableSDNode>(Op), DAG,
4303 DAG.getTarget().getCodeModel());
4304}
4305
4306SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
4307 SelectionDAG &DAG) const {
4308 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
4309 DAG.getTarget().getCodeModel());
4310}
4311
4312SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
4313 SelectionDAG &DAG) const {
4314 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4315 assert(N->getOffset() == 0 && "unexpected offset in global node");
4316 auto CM = DAG.getTarget().getCodeModel();
4317 const GlobalValue *GV = N->getGlobal();
4318
4319 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
4320 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
4321 CM = *GCM;
4322 }
4323
4324 return getAddr(N, DAG, CM, GV->isDSOLocal());
4325}
4326
4327SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
4328 SelectionDAG &DAG,
4329 unsigned Opc, bool UseGOT,
4330 bool Large) const {
4331 SDLoc DL(N);
4332 EVT Ty = getPointerTy(DAG.getDataLayout());
4333 MVT GRLenVT = Subtarget.getGRLenVT();
4334
4335 // This is not actually used, but is necessary for successfully matching the
4336 // PseudoLA_*_LARGE nodes.
4337 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4338 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4339
4340 // Only IE needs an extra argument for large code model.
4341 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
4342 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4343 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4344
4345 // If it is LE for normal/medium code model, the add tp operation will occur
4346 // during the pseudo-instruction expansion.
4347 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
4348 return Offset;
4349
4350 if (UseGOT) {
4351 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4352 MachineFunction &MF = DAG.getMachineFunction();
4353 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4357 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4358 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
4359 }
4360
4361 // Add the thread pointer.
4362 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
4363 DAG.getRegister(LoongArch::R2, GRLenVT));
4364}
4365
4366SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
4367 SelectionDAG &DAG,
4368 unsigned Opc,
4369 bool Large) const {
4370 SDLoc DL(N);
4371 EVT Ty = getPointerTy(DAG.getDataLayout());
4372 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
4373
4374 // This is not actually used, but is necessary for successfully matching the
4375 // PseudoLA_*_LARGE nodes.
4376 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4377
4378 // Use a PC-relative addressing mode to access the dynamic GOT address.
4379 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4380 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4381 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4382
4383 // Prepare argument list to generate call.
4385 Args.emplace_back(Load, CallTy);
4386
4387 // Setup call to __tls_get_addr.
4388 TargetLowering::CallLoweringInfo CLI(DAG);
4389 CLI.setDebugLoc(DL)
4390 .setChain(DAG.getEntryNode())
4391 .setLibCallee(CallingConv::C, CallTy,
4392 DAG.getExternalSymbol("__tls_get_addr", Ty),
4393 std::move(Args));
4394
4395 return LowerCallTo(CLI).first;
4396}
4397
4398SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
4399 SelectionDAG &DAG, unsigned Opc,
4400 bool Large) const {
4401 SDLoc DL(N);
4402 EVT Ty = getPointerTy(DAG.getDataLayout());
4403 const GlobalValue *GV = N->getGlobal();
4404
4405 // This is not actually used, but is necessary for successfully matching the
4406 // PseudoLA_*_LARGE nodes.
4407 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4408
4409 // Use a PC-relative addressing mode to access the global dynamic GOT address.
4410 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
4411 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
4412 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4413 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4414}
4415
4416SDValue
4417LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
4418 SelectionDAG &DAG) const {
4421 report_fatal_error("In GHC calling convention TLS is not supported");
4422
4423 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
4424 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
4425
4426 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4427 assert(N->getOffset() == 0 && "unexpected offset in global node");
4428
4429 if (DAG.getTarget().useEmulatedTLS())
4430 reportFatalUsageError("the emulated TLS is prohibited");
4431
4432 bool IsDesc = DAG.getTarget().useTLSDESC();
4433
4434 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
4436 // In this model, application code calls the dynamic linker function
4437 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
4438 // runtime.
4439 if (!IsDesc)
4440 return getDynamicTLSAddr(N, DAG,
4441 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
4442 : LoongArch::PseudoLA_TLS_GD,
4443 Large);
4444 break;
4446 // Same as GeneralDynamic, except for assembly modifiers and relocation
4447 // records.
4448 if (!IsDesc)
4449 return getDynamicTLSAddr(N, DAG,
4450 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
4451 : LoongArch::PseudoLA_TLS_LD,
4452 Large);
4453 break;
4455 // This model uses the GOT to resolve TLS offsets.
4456 return getStaticTLSAddr(N, DAG,
4457 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
4458 : LoongArch::PseudoLA_TLS_IE,
4459 /*UseGOT=*/true, Large);
4461 // This model is used when static linking as the TLS offsets are resolved
4462 // during program linking.
4463 //
4464 // This node doesn't need an extra argument for the large code model.
4465 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
4466 /*UseGOT=*/false, Large);
4467 }
4468
4469 return getTLSDescAddr(N, DAG,
4470 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4471 : LoongArch::PseudoLA_TLS_DESC,
4472 Large);
4473}
4474
4475template <unsigned N>
4477 SelectionDAG &DAG, bool IsSigned = false) {
4478 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
4479 // Check the ImmArg.
4480 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4481 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4482 DAG.getContext()->emitError(Op->getOperationName(0) +
4483 ": argument out of range.");
4484 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
4485 }
4486 return SDValue();
4487}
4488
4489SDValue
4490LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4491 SelectionDAG &DAG) const {
4492 switch (Op.getConstantOperandVal(0)) {
4493 default:
4494 return SDValue(); // Don't custom lower most intrinsics.
4495 case Intrinsic::thread_pointer: {
4496 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4497 return DAG.getRegister(LoongArch::R2, PtrVT);
4498 }
4499 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4500 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4501 case Intrinsic::loongarch_lsx_vreplvei_d:
4502 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4503 return checkIntrinsicImmArg<1>(Op, 2, DAG);
4504 case Intrinsic::loongarch_lsx_vreplvei_w:
4505 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4506 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4507 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4508 case Intrinsic::loongarch_lasx_xvpickve_d:
4509 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4510 return checkIntrinsicImmArg<2>(Op, 2, DAG);
4511 case Intrinsic::loongarch_lasx_xvinsve0_d:
4512 return checkIntrinsicImmArg<2>(Op, 3, DAG);
4513 case Intrinsic::loongarch_lsx_vsat_b:
4514 case Intrinsic::loongarch_lsx_vsat_bu:
4515 case Intrinsic::loongarch_lsx_vrotri_b:
4516 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4517 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4518 case Intrinsic::loongarch_lsx_vsrlri_b:
4519 case Intrinsic::loongarch_lsx_vsrari_b:
4520 case Intrinsic::loongarch_lsx_vreplvei_h:
4521 case Intrinsic::loongarch_lasx_xvsat_b:
4522 case Intrinsic::loongarch_lasx_xvsat_bu:
4523 case Intrinsic::loongarch_lasx_xvrotri_b:
4524 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4525 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4526 case Intrinsic::loongarch_lasx_xvsrlri_b:
4527 case Intrinsic::loongarch_lasx_xvsrari_b:
4528 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4529 case Intrinsic::loongarch_lasx_xvpickve_w:
4530 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4531 return checkIntrinsicImmArg<3>(Op, 2, DAG);
4532 case Intrinsic::loongarch_lasx_xvinsve0_w:
4533 return checkIntrinsicImmArg<3>(Op, 3, DAG);
4534 case Intrinsic::loongarch_lsx_vsat_h:
4535 case Intrinsic::loongarch_lsx_vsat_hu:
4536 case Intrinsic::loongarch_lsx_vrotri_h:
4537 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4538 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4539 case Intrinsic::loongarch_lsx_vsrlri_h:
4540 case Intrinsic::loongarch_lsx_vsrari_h:
4541 case Intrinsic::loongarch_lsx_vreplvei_b:
4542 case Intrinsic::loongarch_lasx_xvsat_h:
4543 case Intrinsic::loongarch_lasx_xvsat_hu:
4544 case Intrinsic::loongarch_lasx_xvrotri_h:
4545 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4546 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4547 case Intrinsic::loongarch_lasx_xvsrlri_h:
4548 case Intrinsic::loongarch_lasx_xvsrari_h:
4549 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4550 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4551 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4552 case Intrinsic::loongarch_lsx_vsrani_b_h:
4553 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4554 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4555 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4556 case Intrinsic::loongarch_lsx_vssrani_b_h:
4557 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4558 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4559 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4560 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4561 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4562 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4563 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4564 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4565 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4566 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4567 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4568 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4569 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4570 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4571 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4572 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4573 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4574 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4575 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4576 case Intrinsic::loongarch_lsx_vsat_w:
4577 case Intrinsic::loongarch_lsx_vsat_wu:
4578 case Intrinsic::loongarch_lsx_vrotri_w:
4579 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4580 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4581 case Intrinsic::loongarch_lsx_vsrlri_w:
4582 case Intrinsic::loongarch_lsx_vsrari_w:
4583 case Intrinsic::loongarch_lsx_vslei_bu:
4584 case Intrinsic::loongarch_lsx_vslei_hu:
4585 case Intrinsic::loongarch_lsx_vslei_wu:
4586 case Intrinsic::loongarch_lsx_vslei_du:
4587 case Intrinsic::loongarch_lsx_vslti_bu:
4588 case Intrinsic::loongarch_lsx_vslti_hu:
4589 case Intrinsic::loongarch_lsx_vslti_wu:
4590 case Intrinsic::loongarch_lsx_vslti_du:
4591 case Intrinsic::loongarch_lsx_vbsll_v:
4592 case Intrinsic::loongarch_lsx_vbsrl_v:
4593 case Intrinsic::loongarch_lasx_xvsat_w:
4594 case Intrinsic::loongarch_lasx_xvsat_wu:
4595 case Intrinsic::loongarch_lasx_xvrotri_w:
4596 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4597 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4598 case Intrinsic::loongarch_lasx_xvsrlri_w:
4599 case Intrinsic::loongarch_lasx_xvsrari_w:
4600 case Intrinsic::loongarch_lasx_xvslei_bu:
4601 case Intrinsic::loongarch_lasx_xvslei_hu:
4602 case Intrinsic::loongarch_lasx_xvslei_wu:
4603 case Intrinsic::loongarch_lasx_xvslei_du:
4604 case Intrinsic::loongarch_lasx_xvslti_bu:
4605 case Intrinsic::loongarch_lasx_xvslti_hu:
4606 case Intrinsic::loongarch_lasx_xvslti_wu:
4607 case Intrinsic::loongarch_lasx_xvslti_du:
4608 case Intrinsic::loongarch_lasx_xvbsll_v:
4609 case Intrinsic::loongarch_lasx_xvbsrl_v:
4610 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4611 case Intrinsic::loongarch_lsx_vseqi_b:
4612 case Intrinsic::loongarch_lsx_vseqi_h:
4613 case Intrinsic::loongarch_lsx_vseqi_w:
4614 case Intrinsic::loongarch_lsx_vseqi_d:
4615 case Intrinsic::loongarch_lsx_vslei_b:
4616 case Intrinsic::loongarch_lsx_vslei_h:
4617 case Intrinsic::loongarch_lsx_vslei_w:
4618 case Intrinsic::loongarch_lsx_vslei_d:
4619 case Intrinsic::loongarch_lsx_vslti_b:
4620 case Intrinsic::loongarch_lsx_vslti_h:
4621 case Intrinsic::loongarch_lsx_vslti_w:
4622 case Intrinsic::loongarch_lsx_vslti_d:
4623 case Intrinsic::loongarch_lasx_xvseqi_b:
4624 case Intrinsic::loongarch_lasx_xvseqi_h:
4625 case Intrinsic::loongarch_lasx_xvseqi_w:
4626 case Intrinsic::loongarch_lasx_xvseqi_d:
4627 case Intrinsic::loongarch_lasx_xvslei_b:
4628 case Intrinsic::loongarch_lasx_xvslei_h:
4629 case Intrinsic::loongarch_lasx_xvslei_w:
4630 case Intrinsic::loongarch_lasx_xvslei_d:
4631 case Intrinsic::loongarch_lasx_xvslti_b:
4632 case Intrinsic::loongarch_lasx_xvslti_h:
4633 case Intrinsic::loongarch_lasx_xvslti_w:
4634 case Intrinsic::loongarch_lasx_xvslti_d:
4635 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4636 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4637 case Intrinsic::loongarch_lsx_vsrani_h_w:
4638 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4639 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4640 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4641 case Intrinsic::loongarch_lsx_vssrani_h_w:
4642 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4643 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4644 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4645 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4646 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4647 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4648 case Intrinsic::loongarch_lsx_vfrstpi_b:
4649 case Intrinsic::loongarch_lsx_vfrstpi_h:
4650 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4651 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4652 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4653 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4654 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4655 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4656 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4657 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4658 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4659 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4660 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4661 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4662 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4663 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4664 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4665 case Intrinsic::loongarch_lsx_vsat_d:
4666 case Intrinsic::loongarch_lsx_vsat_du:
4667 case Intrinsic::loongarch_lsx_vrotri_d:
4668 case Intrinsic::loongarch_lsx_vsrlri_d:
4669 case Intrinsic::loongarch_lsx_vsrari_d:
4670 case Intrinsic::loongarch_lasx_xvsat_d:
4671 case Intrinsic::loongarch_lasx_xvsat_du:
4672 case Intrinsic::loongarch_lasx_xvrotri_d:
4673 case Intrinsic::loongarch_lasx_xvsrlri_d:
4674 case Intrinsic::loongarch_lasx_xvsrari_d:
4675 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4676 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4677 case Intrinsic::loongarch_lsx_vsrani_w_d:
4678 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4679 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4680 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4681 case Intrinsic::loongarch_lsx_vssrani_w_d:
4682 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4683 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4684 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4685 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4686 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4687 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4688 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4689 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4690 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4691 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4692 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4693 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4694 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4695 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4696 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4697 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4698 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4699 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4700 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4701 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4702 case Intrinsic::loongarch_lsx_vsrani_d_q:
4703 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4704 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4705 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4706 case Intrinsic::loongarch_lsx_vssrani_d_q:
4707 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4708 case Intrinsic::loongarch_lsx_vssrani_du_q:
4709 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4710 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4711 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4712 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4713 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4714 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4715 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4716 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4717 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4718 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4719 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4720 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4721 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4722 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4723 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4724 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4725 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4726 case Intrinsic::loongarch_lsx_vnori_b:
4727 case Intrinsic::loongarch_lsx_vshuf4i_b:
4728 case Intrinsic::loongarch_lsx_vshuf4i_h:
4729 case Intrinsic::loongarch_lsx_vshuf4i_w:
4730 case Intrinsic::loongarch_lasx_xvnori_b:
4731 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4732 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4733 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4734 case Intrinsic::loongarch_lasx_xvpermi_d:
4735 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4736 case Intrinsic::loongarch_lsx_vshuf4i_d:
4737 case Intrinsic::loongarch_lsx_vpermi_w:
4738 case Intrinsic::loongarch_lsx_vbitseli_b:
4739 case Intrinsic::loongarch_lsx_vextrins_b:
4740 case Intrinsic::loongarch_lsx_vextrins_h:
4741 case Intrinsic::loongarch_lsx_vextrins_w:
4742 case Intrinsic::loongarch_lsx_vextrins_d:
4743 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4744 case Intrinsic::loongarch_lasx_xvpermi_w:
4745 case Intrinsic::loongarch_lasx_xvpermi_q:
4746 case Intrinsic::loongarch_lasx_xvbitseli_b:
4747 case Intrinsic::loongarch_lasx_xvextrins_b:
4748 case Intrinsic::loongarch_lasx_xvextrins_h:
4749 case Intrinsic::loongarch_lasx_xvextrins_w:
4750 case Intrinsic::loongarch_lasx_xvextrins_d:
4751 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4752 case Intrinsic::loongarch_lsx_vrepli_b:
4753 case Intrinsic::loongarch_lsx_vrepli_h:
4754 case Intrinsic::loongarch_lsx_vrepli_w:
4755 case Intrinsic::loongarch_lsx_vrepli_d:
4756 case Intrinsic::loongarch_lasx_xvrepli_b:
4757 case Intrinsic::loongarch_lasx_xvrepli_h:
4758 case Intrinsic::loongarch_lasx_xvrepli_w:
4759 case Intrinsic::loongarch_lasx_xvrepli_d:
4760 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4761 case Intrinsic::loongarch_lsx_vldi:
4762 case Intrinsic::loongarch_lasx_xvldi:
4763 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4764 }
4765}
4766
4767// Helper function that emits error message for intrinsics with chain and return
4768// merge values of a UNDEF and the chain.
4770 StringRef ErrorMsg,
4771 SelectionDAG &DAG) {
4772 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4773 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4774 SDLoc(Op));
4775}
4776
4777SDValue
4778LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4779 SelectionDAG &DAG) const {
4780 SDLoc DL(Op);
4781 MVT GRLenVT = Subtarget.getGRLenVT();
4782 EVT VT = Op.getValueType();
4783 SDValue Chain = Op.getOperand(0);
4784 const StringRef ErrorMsgOOR = "argument out of range";
4785 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4786 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4787
4788 switch (Op.getConstantOperandVal(1)) {
4789 default:
4790 return Op;
4791 case Intrinsic::loongarch_crc_w_b_w:
4792 case Intrinsic::loongarch_crc_w_h_w:
4793 case Intrinsic::loongarch_crc_w_w_w:
4794 case Intrinsic::loongarch_crc_w_d_w:
4795 case Intrinsic::loongarch_crcc_w_b_w:
4796 case Intrinsic::loongarch_crcc_w_h_w:
4797 case Intrinsic::loongarch_crcc_w_w_w:
4798 case Intrinsic::loongarch_crcc_w_d_w:
4799 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4800 case Intrinsic::loongarch_csrrd_w:
4801 case Intrinsic::loongarch_csrrd_d: {
4802 unsigned Imm = Op.getConstantOperandVal(2);
4803 return !isUInt<14>(Imm)
4804 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4805 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4806 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4807 }
4808 case Intrinsic::loongarch_csrwr_w:
4809 case Intrinsic::loongarch_csrwr_d: {
4810 unsigned Imm = Op.getConstantOperandVal(3);
4811 return !isUInt<14>(Imm)
4812 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4813 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4814 {Chain, Op.getOperand(2),
4815 DAG.getConstant(Imm, DL, GRLenVT)});
4816 }
4817 case Intrinsic::loongarch_csrxchg_w:
4818 case Intrinsic::loongarch_csrxchg_d: {
4819 unsigned Imm = Op.getConstantOperandVal(4);
4820 return !isUInt<14>(Imm)
4821 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4822 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4823 {Chain, Op.getOperand(2), Op.getOperand(3),
4824 DAG.getConstant(Imm, DL, GRLenVT)});
4825 }
4826 case Intrinsic::loongarch_iocsrrd_d: {
4827 return DAG.getNode(
4828 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4829 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4830 }
4831#define IOCSRRD_CASE(NAME, NODE) \
4832 case Intrinsic::loongarch_##NAME: { \
4833 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4834 {Chain, Op.getOperand(2)}); \
4835 }
4836 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4837 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4838 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4839#undef IOCSRRD_CASE
4840 case Intrinsic::loongarch_cpucfg: {
4841 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4842 {Chain, Op.getOperand(2)});
4843 }
4844 case Intrinsic::loongarch_lddir_d: {
4845 unsigned Imm = Op.getConstantOperandVal(3);
4846 return !isUInt<8>(Imm)
4847 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4848 : Op;
4849 }
4850 case Intrinsic::loongarch_movfcsr2gr: {
4851 if (!Subtarget.hasBasicF())
4852 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4853 unsigned Imm = Op.getConstantOperandVal(2);
4854 return !isUInt<2>(Imm)
4855 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4856 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4857 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4858 }
4859 case Intrinsic::loongarch_lsx_vld:
4860 case Intrinsic::loongarch_lsx_vldrepl_b:
4861 case Intrinsic::loongarch_lasx_xvld:
4862 case Intrinsic::loongarch_lasx_xvldrepl_b:
4863 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4864 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4865 : SDValue();
4866 case Intrinsic::loongarch_lsx_vldrepl_h:
4867 case Intrinsic::loongarch_lasx_xvldrepl_h:
4868 return !isShiftedInt<11, 1>(
4869 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4871 Op, "argument out of range or not a multiple of 2", DAG)
4872 : SDValue();
4873 case Intrinsic::loongarch_lsx_vldrepl_w:
4874 case Intrinsic::loongarch_lasx_xvldrepl_w:
4875 return !isShiftedInt<10, 2>(
4876 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4878 Op, "argument out of range or not a multiple of 4", DAG)
4879 : SDValue();
4880 case Intrinsic::loongarch_lsx_vldrepl_d:
4881 case Intrinsic::loongarch_lasx_xvldrepl_d:
4882 return !isShiftedInt<9, 3>(
4883 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4885 Op, "argument out of range or not a multiple of 8", DAG)
4886 : SDValue();
4887 }
4888}
4889
4890// Helper function that emits error message for intrinsics with void return
4891// value and return the chain.
4893 SelectionDAG &DAG) {
4894
4895 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4896 return Op.getOperand(0);
4897}
4898
4899SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4900 SelectionDAG &DAG) const {
4901 SDLoc DL(Op);
4902 MVT GRLenVT = Subtarget.getGRLenVT();
4903 SDValue Chain = Op.getOperand(0);
4904 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4905 SDValue Op2 = Op.getOperand(2);
4906 const StringRef ErrorMsgOOR = "argument out of range";
4907 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4908 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4909 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4910
4911 switch (IntrinsicEnum) {
4912 default:
4913 // TODO: Add more Intrinsics.
4914 return SDValue();
4915 case Intrinsic::loongarch_cacop_d:
4916 case Intrinsic::loongarch_cacop_w: {
4917 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4918 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4919 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4920 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4921 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4922 unsigned Imm1 = Op2->getAsZExtVal();
4923 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4924 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4925 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4926 return Op;
4927 }
4928 case Intrinsic::loongarch_dbar: {
4929 unsigned Imm = Op2->getAsZExtVal();
4930 return !isUInt<15>(Imm)
4931 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4932 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4933 DAG.getConstant(Imm, DL, GRLenVT));
4934 }
4935 case Intrinsic::loongarch_ibar: {
4936 unsigned Imm = Op2->getAsZExtVal();
4937 return !isUInt<15>(Imm)
4938 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4939 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4940 DAG.getConstant(Imm, DL, GRLenVT));
4941 }
4942 case Intrinsic::loongarch_break: {
4943 unsigned Imm = Op2->getAsZExtVal();
4944 return !isUInt<15>(Imm)
4945 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4946 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4947 DAG.getConstant(Imm, DL, GRLenVT));
4948 }
4949 case Intrinsic::loongarch_movgr2fcsr: {
4950 if (!Subtarget.hasBasicF())
4951 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4952 unsigned Imm = Op2->getAsZExtVal();
4953 return !isUInt<2>(Imm)
4954 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4955 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4956 DAG.getConstant(Imm, DL, GRLenVT),
4957 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4958 Op.getOperand(3)));
4959 }
4960 case Intrinsic::loongarch_syscall: {
4961 unsigned Imm = Op2->getAsZExtVal();
4962 return !isUInt<15>(Imm)
4963 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4964 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4965 DAG.getConstant(Imm, DL, GRLenVT));
4966 }
4967#define IOCSRWR_CASE(NAME, NODE) \
4968 case Intrinsic::loongarch_##NAME: { \
4969 SDValue Op3 = Op.getOperand(3); \
4970 return Subtarget.is64Bit() \
4971 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4972 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4973 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4974 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4975 Op3); \
4976 }
4977 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4978 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4979 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4980#undef IOCSRWR_CASE
4981 case Intrinsic::loongarch_iocsrwr_d: {
4982 return !Subtarget.is64Bit()
4983 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4984 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4985 Op2,
4986 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4987 Op.getOperand(3)));
4988 }
4989#define ASRT_LE_GT_CASE(NAME) \
4990 case Intrinsic::loongarch_##NAME: { \
4991 return !Subtarget.is64Bit() \
4992 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4993 : Op; \
4994 }
4995 ASRT_LE_GT_CASE(asrtle_d)
4996 ASRT_LE_GT_CASE(asrtgt_d)
4997#undef ASRT_LE_GT_CASE
4998 case Intrinsic::loongarch_ldpte_d: {
4999 unsigned Imm = Op.getConstantOperandVal(3);
5000 return !Subtarget.is64Bit()
5001 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
5002 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5003 : Op;
5004 }
5005 case Intrinsic::loongarch_lsx_vst:
5006 case Intrinsic::loongarch_lasx_xvst:
5007 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
5008 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5009 : SDValue();
5010 case Intrinsic::loongarch_lasx_xvstelm_b:
5011 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5012 !isUInt<5>(Op.getConstantOperandVal(5)))
5013 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5014 : SDValue();
5015 case Intrinsic::loongarch_lsx_vstelm_b:
5016 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5017 !isUInt<4>(Op.getConstantOperandVal(5)))
5018 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5019 : SDValue();
5020 case Intrinsic::loongarch_lasx_xvstelm_h:
5021 return (!isShiftedInt<8, 1>(
5022 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5023 !isUInt<4>(Op.getConstantOperandVal(5)))
5025 Op, "argument out of range or not a multiple of 2", DAG)
5026 : SDValue();
5027 case Intrinsic::loongarch_lsx_vstelm_h:
5028 return (!isShiftedInt<8, 1>(
5029 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5030 !isUInt<3>(Op.getConstantOperandVal(5)))
5032 Op, "argument out of range or not a multiple of 2", DAG)
5033 : SDValue();
5034 case Intrinsic::loongarch_lasx_xvstelm_w:
5035 return (!isShiftedInt<8, 2>(
5036 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5037 !isUInt<3>(Op.getConstantOperandVal(5)))
5039 Op, "argument out of range or not a multiple of 4", DAG)
5040 : SDValue();
5041 case Intrinsic::loongarch_lsx_vstelm_w:
5042 return (!isShiftedInt<8, 2>(
5043 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5044 !isUInt<2>(Op.getConstantOperandVal(5)))
5046 Op, "argument out of range or not a multiple of 4", DAG)
5047 : SDValue();
5048 case Intrinsic::loongarch_lasx_xvstelm_d:
5049 return (!isShiftedInt<8, 3>(
5050 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5051 !isUInt<2>(Op.getConstantOperandVal(5)))
5053 Op, "argument out of range or not a multiple of 8", DAG)
5054 : SDValue();
5055 case Intrinsic::loongarch_lsx_vstelm_d:
5056 return (!isShiftedInt<8, 3>(
5057 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5058 !isUInt<1>(Op.getConstantOperandVal(5)))
5060 Op, "argument out of range or not a multiple of 8", DAG)
5061 : SDValue();
5062 }
5063}
5064
5065SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
5066 SelectionDAG &DAG) const {
5067 SDLoc DL(Op);
5068 SDValue Lo = Op.getOperand(0);
5069 SDValue Hi = Op.getOperand(1);
5070 SDValue Shamt = Op.getOperand(2);
5071 EVT VT = Lo.getValueType();
5072
5073 // if Shamt-GRLen < 0: // Shamt < GRLen
5074 // Lo = Lo << Shamt
5075 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
5076 // else:
5077 // Lo = 0
5078 // Hi = Lo << (Shamt-GRLen)
5079
5080 SDValue Zero = DAG.getConstant(0, DL, VT);
5081 SDValue One = DAG.getConstant(1, DL, VT);
5082 SDValue MinusGRLen =
5083 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5084 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5085 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5086 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5087
5088 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
5089 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
5090 SDValue ShiftRightLo =
5091 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
5092 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
5093 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
5094 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
5095
5096 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5097
5098 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
5099 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5100
5101 SDValue Parts[2] = {Lo, Hi};
5102 return DAG.getMergeValues(Parts, DL);
5103}
5104
5105SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
5106 SelectionDAG &DAG,
5107 bool IsSRA) const {
5108 SDLoc DL(Op);
5109 SDValue Lo = Op.getOperand(0);
5110 SDValue Hi = Op.getOperand(1);
5111 SDValue Shamt = Op.getOperand(2);
5112 EVT VT = Lo.getValueType();
5113
5114 // SRA expansion:
5115 // if Shamt-GRLen < 0: // Shamt < GRLen
5116 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5117 // Hi = Hi >>s Shamt
5118 // else:
5119 // Lo = Hi >>s (Shamt-GRLen);
5120 // Hi = Hi >>s (GRLen-1)
5121 //
5122 // SRL expansion:
5123 // if Shamt-GRLen < 0: // Shamt < GRLen
5124 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5125 // Hi = Hi >>u Shamt
5126 // else:
5127 // Lo = Hi >>u (Shamt-GRLen);
5128 // Hi = 0;
5129
5130 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
5131
5132 SDValue Zero = DAG.getConstant(0, DL, VT);
5133 SDValue One = DAG.getConstant(1, DL, VT);
5134 SDValue MinusGRLen =
5135 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5136 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5137 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5138 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5139
5140 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
5141 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
5142 SDValue ShiftLeftHi =
5143 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
5144 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
5145 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
5146 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
5147 SDValue HiFalse =
5148 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
5149
5150 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5151
5152 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
5153 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5154
5155 SDValue Parts[2] = {Lo, Hi};
5156 return DAG.getMergeValues(Parts, DL);
5157}
5158
5159// Returns the opcode of the target-specific SDNode that implements the 32-bit
5160// form of the given Opcode.
5161static unsigned getLoongArchWOpcode(unsigned Opcode) {
5162 switch (Opcode) {
5163 default:
5164 llvm_unreachable("Unexpected opcode");
5165 case ISD::SDIV:
5166 return LoongArchISD::DIV_W;
5167 case ISD::UDIV:
5168 return LoongArchISD::DIV_WU;
5169 case ISD::SREM:
5170 return LoongArchISD::MOD_W;
5171 case ISD::UREM:
5172 return LoongArchISD::MOD_WU;
5173 case ISD::SHL:
5174 return LoongArchISD::SLL_W;
5175 case ISD::SRA:
5176 return LoongArchISD::SRA_W;
5177 case ISD::SRL:
5178 return LoongArchISD::SRL_W;
5179 case ISD::ROTL:
5180 case ISD::ROTR:
5181 return LoongArchISD::ROTR_W;
5182 case ISD::CTTZ:
5183 return LoongArchISD::CTZ_W;
5184 case ISD::CTLZ:
5185 return LoongArchISD::CLZ_W;
5186 }
5187}
5188
5189// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
5190// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
5191// otherwise be promoted to i64, making it difficult to select the
5192// SLL_W/.../*W later one because the fact the operation was originally of
5193// type i8/i16/i32 is lost.
5195 unsigned ExtOpc = ISD::ANY_EXTEND) {
5196 SDLoc DL(N);
5197 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
5198 SDValue NewOp0, NewRes;
5199
5200 switch (NumOp) {
5201 default:
5202 llvm_unreachable("Unexpected NumOp");
5203 case 1: {
5204 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5205 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
5206 break;
5207 }
5208 case 2: {
5209 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5210 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
5211 if (N->getOpcode() == ISD::ROTL) {
5212 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
5213 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
5214 }
5215 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
5216 break;
5217 }
5218 // TODO:Handle more NumOp.
5219 }
5220
5221 // ReplaceNodeResults requires we maintain the same type for the return
5222 // value.
5223 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
5224}
5225
5226// Converts the given 32-bit operation to a i64 operation with signed extension
5227// semantic to reduce the signed extension instructions.
5229 SDLoc DL(N);
5230 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5231 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5232 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
5233 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
5234 DAG.getValueType(MVT::i32));
5235 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
5236}
5237
5238// Helper function that emits error message for intrinsics with/without chain
5239// and return a UNDEF or and the chain as the results.
5242 StringRef ErrorMsg, bool WithChain = true) {
5243 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
5244 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
5245 if (!WithChain)
5246 return;
5247 Results.push_back(N->getOperand(0));
5248}
5249
5250template <unsigned N>
5251static void
5253 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
5254 unsigned ResOp) {
5255 const StringRef ErrorMsgOOR = "argument out of range";
5256 unsigned Imm = Node->getConstantOperandVal(2);
5257 if (!isUInt<N>(Imm)) {
5259 /*WithChain=*/false);
5260 return;
5261 }
5262 SDLoc DL(Node);
5263 SDValue Vec = Node->getOperand(1);
5264
5265 SDValue PickElt =
5266 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
5267 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
5269 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
5270 PickElt.getValue(0)));
5271}
5272
5275 SelectionDAG &DAG,
5276 const LoongArchSubtarget &Subtarget,
5277 unsigned ResOp) {
5278 SDLoc DL(N);
5279 SDValue Vec = N->getOperand(1);
5280
5281 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
5282 Results.push_back(
5283 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
5284}
5285
5286static void
5288 SelectionDAG &DAG,
5289 const LoongArchSubtarget &Subtarget) {
5290 switch (N->getConstantOperandVal(0)) {
5291 default:
5292 llvm_unreachable("Unexpected Intrinsic.");
5293 case Intrinsic::loongarch_lsx_vpickve2gr_b:
5294 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5295 LoongArchISD::VPICK_SEXT_ELT);
5296 break;
5297 case Intrinsic::loongarch_lsx_vpickve2gr_h:
5298 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
5299 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5300 LoongArchISD::VPICK_SEXT_ELT);
5301 break;
5302 case Intrinsic::loongarch_lsx_vpickve2gr_w:
5303 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5304 LoongArchISD::VPICK_SEXT_ELT);
5305 break;
5306 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
5307 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5308 LoongArchISD::VPICK_ZEXT_ELT);
5309 break;
5310 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
5311 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
5312 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5313 LoongArchISD::VPICK_ZEXT_ELT);
5314 break;
5315 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
5316 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5317 LoongArchISD::VPICK_ZEXT_ELT);
5318 break;
5319 case Intrinsic::loongarch_lsx_bz_b:
5320 case Intrinsic::loongarch_lsx_bz_h:
5321 case Intrinsic::loongarch_lsx_bz_w:
5322 case Intrinsic::loongarch_lsx_bz_d:
5323 case Intrinsic::loongarch_lasx_xbz_b:
5324 case Intrinsic::loongarch_lasx_xbz_h:
5325 case Intrinsic::loongarch_lasx_xbz_w:
5326 case Intrinsic::loongarch_lasx_xbz_d:
5327 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5328 LoongArchISD::VALL_ZERO);
5329 break;
5330 case Intrinsic::loongarch_lsx_bz_v:
5331 case Intrinsic::loongarch_lasx_xbz_v:
5332 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5333 LoongArchISD::VANY_ZERO);
5334 break;
5335 case Intrinsic::loongarch_lsx_bnz_b:
5336 case Intrinsic::loongarch_lsx_bnz_h:
5337 case Intrinsic::loongarch_lsx_bnz_w:
5338 case Intrinsic::loongarch_lsx_bnz_d:
5339 case Intrinsic::loongarch_lasx_xbnz_b:
5340 case Intrinsic::loongarch_lasx_xbnz_h:
5341 case Intrinsic::loongarch_lasx_xbnz_w:
5342 case Intrinsic::loongarch_lasx_xbnz_d:
5343 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5344 LoongArchISD::VALL_NONZERO);
5345 break;
5346 case Intrinsic::loongarch_lsx_bnz_v:
5347 case Intrinsic::loongarch_lasx_xbnz_v:
5348 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5349 LoongArchISD::VANY_NONZERO);
5350 break;
5351 }
5352}
5353
5356 SelectionDAG &DAG) {
5357 assert(N->getValueType(0) == MVT::i128 &&
5358 "AtomicCmpSwap on types less than 128 should be legal");
5359 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
5360
5361 unsigned Opcode;
5362 switch (MemOp->getMergedOrdering()) {
5366 Opcode = LoongArch::PseudoCmpXchg128Acquire;
5367 break;
5370 Opcode = LoongArch::PseudoCmpXchg128;
5371 break;
5372 default:
5373 llvm_unreachable("Unexpected ordering!");
5374 }
5375
5376 SDLoc DL(N);
5377 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
5378 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
5379 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
5380 NewVal.first, NewVal.second, N->getOperand(0)};
5381
5382 SDNode *CmpSwap = DAG.getMachineNode(
5383 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
5384 Ops);
5385 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
5386 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
5387 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
5388 Results.push_back(SDValue(CmpSwap, 3));
5389}
5390
5393 SDLoc DL(N);
5394 EVT VT = N->getValueType(0);
5395 switch (N->getOpcode()) {
5396 default:
5397 llvm_unreachable("Don't know how to legalize this operation");
5398 case ISD::ADD:
5399 case ISD::SUB:
5400 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5401 "Unexpected custom legalisation");
5402 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
5403 break;
5404 case ISD::SDIV:
5405 case ISD::UDIV:
5406 case ISD::SREM:
5407 case ISD::UREM:
5408 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5409 "Unexpected custom legalisation");
5410 Results.push_back(customLegalizeToWOp(N, DAG, 2,
5411 Subtarget.hasDiv32() && VT == MVT::i32
5413 : ISD::SIGN_EXTEND));
5414 break;
5415 case ISD::SHL:
5416 case ISD::SRA:
5417 case ISD::SRL:
5418 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5419 "Unexpected custom legalisation");
5420 if (N->getOperand(1).getOpcode() != ISD::Constant) {
5421 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5422 break;
5423 }
5424 break;
5425 case ISD::ROTL:
5426 case ISD::ROTR:
5427 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5428 "Unexpected custom legalisation");
5429 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5430 break;
5431 case ISD::LOAD: {
5432 // Use an f64 load and a scalar_to_vector for v2f32 loads. This avoids
5433 // scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
5434 // cast since type legalization will try to use an i64 load.
5435 MVT VT = N->getSimpleValueType(0);
5436 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5437 "Unexpected custom legalisation");
5439 "Unexpected type action!");
5440 if (!ISD::isNON_EXTLoad(N))
5441 return;
5442 auto *Ld = cast<LoadSDNode>(N);
5443 SDValue Res = DAG.getLoad(MVT::f64, DL, Ld->getChain(), Ld->getBasePtr(),
5444 Ld->getPointerInfo(), Ld->getBaseAlign(),
5445 Ld->getMemOperand()->getFlags());
5446 SDValue Chain = Res.getValue(1);
5447 MVT VecVT = MVT::getVectorVT(MVT::f64, 2);
5448 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Res);
5449 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
5450 Res = DAG.getBitcast(WideVT, Res);
5451 Results.push_back(Res);
5452 Results.push_back(Chain);
5453 break;
5454 }
5455 case ISD::FP_TO_SINT: {
5456 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5457 "Unexpected custom legalisation");
5458 SDValue Src = N->getOperand(0);
5459 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
5460 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
5462 if (!isTypeLegal(Src.getValueType()))
5463 return;
5464 if (Src.getValueType() == MVT::f16)
5465 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
5466 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
5467 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
5468 return;
5469 }
5470 // If the FP type needs to be softened, emit a library call using the 'si'
5471 // version. If we left it to default legalization we'd end up with 'di'.
5472 RTLIB::Libcall LC;
5473 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
5474 MakeLibCallOptions CallOptions;
5475 EVT OpVT = Src.getValueType();
5476 CallOptions.setTypeListBeforeSoften(OpVT, VT);
5477 SDValue Chain = SDValue();
5478 SDValue Result;
5479 std::tie(Result, Chain) =
5480 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
5481 Results.push_back(Result);
5482 break;
5483 }
5484 case ISD::BITCAST: {
5485 SDValue Src = N->getOperand(0);
5486 EVT SrcVT = Src.getValueType();
5487 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5488 Subtarget.hasBasicF()) {
5489 SDValue Dst =
5490 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
5491 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
5492 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5493 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
5494 DAG.getVTList(MVT::i32, MVT::i32), Src);
5495 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5496 NewReg.getValue(0), NewReg.getValue(1));
5497 Results.push_back(RetReg);
5498 }
5499 break;
5500 }
5501 case ISD::FP_TO_UINT: {
5502 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5503 "Unexpected custom legalisation");
5504 auto &TLI = DAG.getTargetLoweringInfo();
5505 SDValue Tmp1, Tmp2;
5506 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
5507 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
5508 break;
5509 }
5510 case ISD::FP_ROUND: {
5511 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5512 "Unexpected custom legalisation");
5513 // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
5514 // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
5515 // a target-specific LoongArchISD::VFCVT to optimize it.
5516 SDValue Op0 = N->getOperand(0);
5517 EVT OpVT = Op0.getValueType();
5518 if (OpVT == MVT::v2f64) {
5519 SDValue Undef = DAG.getUNDEF(OpVT);
5520 SDValue Dst =
5521 DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Op0);
5522 Results.push_back(Dst);
5523 }
5524 break;
5525 }
5526 case ISD::BSWAP: {
5527 SDValue Src = N->getOperand(0);
5528 assert((VT == MVT::i16 || VT == MVT::i32) &&
5529 "Unexpected custom legalization");
5530 MVT GRLenVT = Subtarget.getGRLenVT();
5531 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5532 SDValue Tmp;
5533 switch (VT.getSizeInBits()) {
5534 default:
5535 llvm_unreachable("Unexpected operand width");
5536 case 16:
5537 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
5538 break;
5539 case 32:
5540 // Only LA64 will get to here due to the size mismatch between VT and
5541 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5542 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
5543 break;
5544 }
5545 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5546 break;
5547 }
5548 case ISD::BITREVERSE: {
5549 SDValue Src = N->getOperand(0);
5550 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5551 "Unexpected custom legalization");
5552 MVT GRLenVT = Subtarget.getGRLenVT();
5553 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5554 SDValue Tmp;
5555 switch (VT.getSizeInBits()) {
5556 default:
5557 llvm_unreachable("Unexpected operand width");
5558 case 8:
5559 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
5560 break;
5561 case 32:
5562 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
5563 break;
5564 }
5565 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5566 break;
5567 }
5568 case ISD::CTLZ:
5569 case ISD::CTTZ: {
5570 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5571 "Unexpected custom legalisation");
5572 Results.push_back(customLegalizeToWOp(N, DAG, 1));
5573 break;
5574 }
5576 SDValue Chain = N->getOperand(0);
5577 SDValue Op2 = N->getOperand(2);
5578 MVT GRLenVT = Subtarget.getGRLenVT();
5579 const StringRef ErrorMsgOOR = "argument out of range";
5580 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5581 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5582
5583 switch (N->getConstantOperandVal(1)) {
5584 default:
5585 llvm_unreachable("Unexpected Intrinsic.");
5586 case Intrinsic::loongarch_movfcsr2gr: {
5587 if (!Subtarget.hasBasicF()) {
5588 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5589 return;
5590 }
5591 unsigned Imm = Op2->getAsZExtVal();
5592 if (!isUInt<2>(Imm)) {
5593 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5594 return;
5595 }
5596 SDValue MOVFCSR2GRResults = DAG.getNode(
5597 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5598 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5599 Results.push_back(
5600 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5601 Results.push_back(MOVFCSR2GRResults.getValue(1));
5602 break;
5603 }
5604#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5605 case Intrinsic::loongarch_##NAME: { \
5606 SDValue NODE = DAG.getNode( \
5607 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5608 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5609 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5610 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5611 Results.push_back(NODE.getValue(1)); \
5612 break; \
5613 }
5614 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5615 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5616 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5617 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5618 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5619 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5620#undef CRC_CASE_EXT_BINARYOP
5621
5622#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5623 case Intrinsic::loongarch_##NAME: { \
5624 SDValue NODE = DAG.getNode( \
5625 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5626 {Chain, Op2, \
5627 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5628 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5629 Results.push_back(NODE.getValue(1)); \
5630 break; \
5631 }
5632 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5633 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5634#undef CRC_CASE_EXT_UNARYOP
5635#define CSR_CASE(ID) \
5636 case Intrinsic::loongarch_##ID: { \
5637 if (!Subtarget.is64Bit()) \
5638 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5639 break; \
5640 }
5641 CSR_CASE(csrrd_d);
5642 CSR_CASE(csrwr_d);
5643 CSR_CASE(csrxchg_d);
5644 CSR_CASE(iocsrrd_d);
5645#undef CSR_CASE
5646 case Intrinsic::loongarch_csrrd_w: {
5647 unsigned Imm = Op2->getAsZExtVal();
5648 if (!isUInt<14>(Imm)) {
5649 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5650 return;
5651 }
5652 SDValue CSRRDResults =
5653 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5654 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5655 Results.push_back(
5656 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5657 Results.push_back(CSRRDResults.getValue(1));
5658 break;
5659 }
5660 case Intrinsic::loongarch_csrwr_w: {
5661 unsigned Imm = N->getConstantOperandVal(3);
5662 if (!isUInt<14>(Imm)) {
5663 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5664 return;
5665 }
5666 SDValue CSRWRResults =
5667 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5668 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5669 DAG.getConstant(Imm, DL, GRLenVT)});
5670 Results.push_back(
5671 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5672 Results.push_back(CSRWRResults.getValue(1));
5673 break;
5674 }
5675 case Intrinsic::loongarch_csrxchg_w: {
5676 unsigned Imm = N->getConstantOperandVal(4);
5677 if (!isUInt<14>(Imm)) {
5678 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5679 return;
5680 }
5681 SDValue CSRXCHGResults = DAG.getNode(
5682 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5683 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5684 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5685 DAG.getConstant(Imm, DL, GRLenVT)});
5686 Results.push_back(
5687 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5688 Results.push_back(CSRXCHGResults.getValue(1));
5689 break;
5690 }
5691#define IOCSRRD_CASE(NAME, NODE) \
5692 case Intrinsic::loongarch_##NAME: { \
5693 SDValue IOCSRRDResults = \
5694 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5695 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5696 Results.push_back( \
5697 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5698 Results.push_back(IOCSRRDResults.getValue(1)); \
5699 break; \
5700 }
5701 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5702 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5703 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5704#undef IOCSRRD_CASE
5705 case Intrinsic::loongarch_cpucfg: {
5706 SDValue CPUCFGResults =
5707 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5708 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5709 Results.push_back(
5710 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5711 Results.push_back(CPUCFGResults.getValue(1));
5712 break;
5713 }
5714 case Intrinsic::loongarch_lddir_d: {
5715 if (!Subtarget.is64Bit()) {
5716 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5717 return;
5718 }
5719 break;
5720 }
5721 }
5722 break;
5723 }
5724 case ISD::READ_REGISTER: {
5725 if (Subtarget.is64Bit())
5726 DAG.getContext()->emitError(
5727 "On LA64, only 64-bit registers can be read.");
5728 else
5729 DAG.getContext()->emitError(
5730 "On LA32, only 32-bit registers can be read.");
5731 Results.push_back(DAG.getUNDEF(VT));
5732 Results.push_back(N->getOperand(0));
5733 break;
5734 }
5736 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5737 break;
5738 }
5739 case ISD::LROUND: {
5740 SDValue Op0 = N->getOperand(0);
5741 EVT OpVT = Op0.getValueType();
5742 RTLIB::Libcall LC =
5743 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5744 MakeLibCallOptions CallOptions;
5745 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5746 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5747 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5748 Results.push_back(Result);
5749 break;
5750 }
5751 case ISD::ATOMIC_CMP_SWAP: {
5753 break;
5754 }
5755 case ISD::TRUNCATE: {
5756 MVT VT = N->getSimpleValueType(0);
5757 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5758 return;
5759
5760 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5761 SDValue In = N->getOperand(0);
5762 EVT InVT = In.getValueType();
5763 EVT InEltVT = InVT.getVectorElementType();
5764 EVT EltVT = VT.getVectorElementType();
5765 unsigned MinElts = VT.getVectorNumElements();
5766 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5767 unsigned InBits = InVT.getSizeInBits();
5768
5769 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5770 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5771 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5772 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5773 for (unsigned I = 0; I < MinElts; ++I)
5774 TruncMask[I] = Scale * I;
5775
5776 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5777 MVT SVT = In.getSimpleValueType().getScalarType();
5778 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5779 SDValue WidenIn =
5780 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5781 DAG.getVectorIdxConstant(0, DL));
5782 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5783 "Illegal vector type in truncation");
5784 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5785 Results.push_back(
5786 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5787 return;
5788 }
5789 }
5790
5791 break;
5792 }
5793 case ISD::SIGN_EXTEND: {
5794 // LASX has native VEXT2XV_* for sign extension.
5795 if (!Subtarget.hasExtLSX() || Subtarget.hasExtLASX())
5796 return;
5797
5798 EVT DstVT = N->getValueType(0);
5799 SDValue Src = N->getOperand(0);
5800 MVT SrcVT = Src.getSimpleValueType();
5801
5802 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5803 unsigned DstEltBits = DstVT.getScalarSizeInBits();
5804 unsigned NumElts = DstVT.getVectorNumElements();
5805
5806 if (SrcVT.getSizeInBits() > 128)
5807 return;
5808
5809 if (!DstVT.isVector() || DstVT.getSizeInBits() <= 128)
5810 return;
5811
5812 // Legalize and extend the src to 128-bit first.
5813 if (SrcVT.getSizeInBits() < 128) {
5814 unsigned WidenSrcElts = 128 / SrcEltBits;
5815 MVT WidenSrcVT = MVT::getVectorVT(SrcVT.getScalarType(), WidenSrcElts);
5816 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WidenSrcVT,
5817 DAG.getUNDEF(WidenSrcVT), Src,
5818 DAG.getVectorIdxConstant(0, DL));
5819 SrcVT = WidenSrcVT;
5820
5821 unsigned FirstStageEltBits = 128 / NumElts;
5822 MVT FirstStageEltVT = MVT::getIntegerVT(FirstStageEltBits);
5823 MVT FirstStageVT = MVT::getVectorVT(FirstStageEltVT, NumElts);
5824 Src = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, FirstStageVT, Src);
5825 SrcVT = FirstStageVT;
5826 SrcEltBits = FirstStageEltBits;
5827 }
5828
5830 Blocks.push_back(Src);
5831
5832 // Sign-extend the src by using SLTI + VILVL + VILVH recursively.
5833 while (SrcEltBits < DstEltBits) {
5834 unsigned NextEltBits = SrcEltBits * 2;
5835 MVT NextEltVT = MVT::getIntegerVT(NextEltBits);
5836 unsigned CurEltsPerBlock = SrcVT.getVectorNumElements();
5837 unsigned NextEltsPerBlock = CurEltsPerBlock / 2;
5838 MVT NextBlockVT = MVT::getVectorVT(NextEltVT, NextEltsPerBlock);
5839
5840 SmallVector<SDValue, 8> NextBlocks;
5841 NextBlocks.reserve(Blocks.size() * 2);
5842 for (SDValue Block : Blocks) {
5843 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
5844 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Block, Zero,
5845 DAG.getCondCode(ISD::SETLT));
5846 SDValue LoInterleaved =
5847 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Block);
5848 SDValue HiInterleaved =
5849 DAG.getNode(LoongArchISD::VILVH, DL, SrcVT, Mask, Block);
5850
5851 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, LoInterleaved));
5852 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, HiInterleaved));
5853 }
5854
5855 Blocks = std::move(NextBlocks);
5856 SrcVT = NextBlockVT;
5857 SrcEltBits = NextEltBits;
5858 }
5859
5860 Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Blocks));
5861 break;
5862 }
5863 }
5864}
5865
5866/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5868 SelectionDAG &DAG) {
5869 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5870
5871 MVT VT = N->getSimpleValueType(0);
5872 if (!VT.is128BitVector() && !VT.is256BitVector())
5873 return SDValue();
5874
5875 SDValue X, Y;
5876 SDValue N0 = N->getOperand(0);
5877 SDValue N1 = N->getOperand(1);
5878
5879 if (SDValue Not = isNOT(N0, DAG)) {
5880 X = Not;
5881 Y = N1;
5882 } else if (SDValue Not = isNOT(N1, DAG)) {
5883 X = Not;
5884 Y = N0;
5885 } else
5886 return SDValue();
5887
5888 X = DAG.getBitcast(VT, X);
5889 Y = DAG.getBitcast(VT, Y);
5890 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5891}
5892
5893static bool isConstantSplatVector(SDValue N, APInt &SplatValue,
5894 unsigned MinSizeInBits) {
5897
5898 if (!Node)
5899 return false;
5900
5901 APInt SplatUndef;
5902 unsigned SplatBitSize;
5903 bool HasAnyUndefs;
5904
5905 return Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
5906 HasAnyUndefs, MinSizeInBits,
5907 /*IsBigEndian=*/false);
5908}
5909
5912 const LoongArchSubtarget &Subtarget) {
5913 if (DCI.isBeforeLegalizeOps())
5914 return SDValue();
5915
5916 EVT VT = N->getValueType(0);
5917 if (!VT.isVector())
5918 return SDValue();
5919
5920 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
5921 return SDValue();
5922
5923 EVT EltVT = VT.getVectorElementType();
5924 if (!EltVT.isInteger())
5925 return SDValue();
5926
5927 // match:
5928 //
5929 // add
5930 // (and
5931 // (srl X, shift-1) / X
5932 // 1)
5933 // (srl/sra X, shift)
5934
5935 SDValue Add0 = N->getOperand(0);
5936 SDValue Add1 = N->getOperand(1);
5937 SDValue And;
5938 SDValue Shr;
5939
5940 if (Add0.getOpcode() == ISD::AND) {
5941 And = Add0;
5942 Shr = Add1;
5943 } else if (Add1.getOpcode() == ISD::AND) {
5944 And = Add1;
5945 Shr = Add0;
5946 } else {
5947 return SDValue();
5948 }
5949
5950 // match:
5951 //
5952 // srl/sra X, shift
5953
5954 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
5955 return SDValue();
5956
5957 SDValue X = Shr.getOperand(0);
5958 SDValue Shift = Shr.getOperand(1);
5959 APInt ShiftVal;
5960
5961 if (!isConstantSplatVector(Shift, ShiftVal, EltVT.getSizeInBits()))
5962 return SDValue();
5963
5964 if (ShiftVal == 0)
5965 return SDValue();
5966
5967 // match:
5968 //
5969 // and
5970 // (srl X, shift-1) / X
5971 // 1
5972
5973 SDValue One = And.getOperand(1);
5974 APInt SplatVal;
5975
5976 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
5977 return SDValue();
5978
5979 if (SplatVal != 1)
5980 return SDValue();
5981
5982 if (And.getOperand(0) == X) {
5983 // match:
5984 //
5985 // shift == 1
5986
5987 if (ShiftVal != 1)
5988 return SDValue();
5989 } else {
5990 // match:
5991 //
5992 // srl X, shift-1
5993
5994 SDValue Srl = And.getOperand(0);
5995
5996 if (Srl.getOpcode() != ISD::SRL)
5997 return SDValue();
5998
5999 if (Srl.getOperand(0) != X)
6000 return SDValue();
6001
6002 // match:
6003 //
6004 // shift-1
6005
6006 SDValue ShiftMinus1 = Srl.getOperand(1);
6007
6008 if (!isConstantSplatVector(ShiftMinus1, SplatVal, EltVT.getSizeInBits()))
6009 return SDValue();
6010
6011 if (ShiftVal != (SplatVal + 1))
6012 return SDValue();
6013 }
6014
6015 // We matched a rounded right shift pattern and can lower it
6016 // to a single vector rounded shift instruction.
6017
6018 SDLoc DL(N);
6019 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
6020 : LoongArchISD::VSRAR,
6021 DL, VT, X, Shift);
6022}
6023
6026 const LoongArchSubtarget &Subtarget) {
6027 if (DCI.isBeforeLegalizeOps())
6028 return SDValue();
6029
6030 SDValue FirstOperand = N->getOperand(0);
6031 SDValue SecondOperand = N->getOperand(1);
6032 unsigned FirstOperandOpc = FirstOperand.getOpcode();
6033 EVT ValTy = N->getValueType(0);
6034 SDLoc DL(N);
6035 uint64_t lsb, msb;
6036 unsigned SMIdx, SMLen;
6037 ConstantSDNode *CN;
6038 SDValue NewOperand;
6039 MVT GRLenVT = Subtarget.getGRLenVT();
6040
6041 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
6042 return R;
6043
6044 // BSTRPICK requires the 32S feature.
6045 if (!Subtarget.has32S())
6046 return SDValue();
6047
6048 // Op's second operand must be a shifted mask.
6049 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
6050 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
6051 return SDValue();
6052
6053 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
6054 // Pattern match BSTRPICK.
6055 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
6056 // => BSTRPICK $dst, $src, msb, lsb
6057 // where msb = lsb + len - 1
6058
6059 // The second operand of the shift must be an immediate.
6060 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
6061 return SDValue();
6062
6063 lsb = CN->getZExtValue();
6064
6065 // Return if the shifted mask does not start at bit 0 or the sum of its
6066 // length and lsb exceeds the word's size.
6067 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
6068 return SDValue();
6069
6070 NewOperand = FirstOperand.getOperand(0);
6071 } else {
6072 // Pattern match BSTRPICK.
6073 // $dst = and $src, (2**len- 1) , if len > 12
6074 // => BSTRPICK $dst, $src, msb, lsb
6075 // where lsb = 0 and msb = len - 1
6076
6077 // If the mask is <= 0xfff, andi can be used instead.
6078 if (CN->getZExtValue() <= 0xfff)
6079 return SDValue();
6080
6081 // Return if the MSB exceeds.
6082 if (SMIdx + SMLen > ValTy.getSizeInBits())
6083 return SDValue();
6084
6085 if (SMIdx > 0) {
6086 // Omit if the constant has more than 2 uses. This a conservative
6087 // decision. Whether it is a win depends on the HW microarchitecture.
6088 // However it should always be better for 1 and 2 uses.
6089 if (CN->use_size() > 2)
6090 return SDValue();
6091 // Return if the constant can be composed by a single LU12I.W.
6092 if ((CN->getZExtValue() & 0xfff) == 0)
6093 return SDValue();
6094 // Return if the constand can be composed by a single ADDI with
6095 // the zero register.
6096 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
6097 return SDValue();
6098 }
6099
6100 lsb = SMIdx;
6101 NewOperand = FirstOperand;
6102 }
6103
6104 msb = lsb + SMLen - 1;
6105 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
6106 DAG.getConstant(msb, DL, GRLenVT),
6107 DAG.getConstant(lsb, DL, GRLenVT));
6108 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
6109 return NR0;
6110 // Try to optimize to
6111 // bstrpick $Rd, $Rs, msb, lsb
6112 // slli $Rd, $Rd, lsb
6113 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
6114 DAG.getConstant(lsb, DL, GRLenVT));
6115}
6116
6119 const LoongArchSubtarget &Subtarget) {
6120 // BSTRPICK requires the 32S feature.
6121 if (!Subtarget.has32S())
6122 return SDValue();
6123
6124 if (DCI.isBeforeLegalizeOps())
6125 return SDValue();
6126
6127 // $dst = srl (and $src, Mask), Shamt
6128 // =>
6129 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
6130 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
6131 //
6132
6133 SDValue FirstOperand = N->getOperand(0);
6134 ConstantSDNode *CN;
6135 EVT ValTy = N->getValueType(0);
6136 SDLoc DL(N);
6137 MVT GRLenVT = Subtarget.getGRLenVT();
6138 unsigned MaskIdx, MaskLen;
6139 uint64_t Shamt;
6140
6141 // The first operand must be an AND and the second operand of the AND must be
6142 // a shifted mask.
6143 if (FirstOperand.getOpcode() != ISD::AND ||
6144 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
6145 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
6146 return SDValue();
6147
6148 // The second operand (shift amount) must be an immediate.
6149 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
6150 return SDValue();
6151
6152 Shamt = CN->getZExtValue();
6153 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
6154 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
6155 FirstOperand->getOperand(0),
6156 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6157 DAG.getConstant(Shamt, DL, GRLenVT));
6158
6159 return SDValue();
6160}
6161
6162// Helper to peek through bitops/trunc/setcc to determine size of source vector.
6163// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
6164static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
6165 unsigned Depth) {
6166 // Limit recursion.
6168 return false;
6169 switch (Src.getOpcode()) {
6170 case ISD::SETCC:
6171 case ISD::TRUNCATE:
6172 return Src.getOperand(0).getValueSizeInBits() == Size;
6173 case ISD::FREEZE:
6174 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
6175 case ISD::AND:
6176 case ISD::XOR:
6177 case ISD::OR:
6178 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
6179 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
6180 case ISD::SELECT:
6181 case ISD::VSELECT:
6182 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
6183 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
6184 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
6185 case ISD::BUILD_VECTOR:
6186 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
6187 ISD::isBuildVectorAllOnes(Src.getNode());
6188 }
6189 return false;
6190}
6191
6192// Helper to push sign extension of vXi1 SETCC result through bitops.
6194 SDValue Src, const SDLoc &DL) {
6195 switch (Src.getOpcode()) {
6196 case ISD::SETCC:
6197 case ISD::FREEZE:
6198 case ISD::TRUNCATE:
6199 case ISD::BUILD_VECTOR:
6200 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6201 case ISD::AND:
6202 case ISD::XOR:
6203 case ISD::OR:
6204 return DAG.getNode(
6205 Src.getOpcode(), DL, SExtVT,
6206 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
6207 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
6208 case ISD::SELECT:
6209 case ISD::VSELECT:
6210 return DAG.getSelect(
6211 DL, SExtVT, Src.getOperand(0),
6212 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
6213 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
6214 }
6215 llvm_unreachable("Unexpected node type for vXi1 sign extension");
6216}
6217
6218static SDValue
6221 const LoongArchSubtarget &Subtarget) {
6222 SDLoc DL(N);
6223 EVT VT = N->getValueType(0);
6224 SDValue Src = N->getOperand(0);
6225 EVT SrcVT = Src.getValueType();
6226
6227 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
6228 return SDValue();
6229
6230 bool UseLASX;
6231 unsigned Opc = ISD::DELETED_NODE;
6232 EVT CmpVT = Src.getOperand(0).getValueType();
6233 EVT EltVT = CmpVT.getVectorElementType();
6234
6235 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
6236 UseLASX = false;
6237 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
6238 CmpVT.getSizeInBits() == 256)
6239 UseLASX = true;
6240 else
6241 return SDValue();
6242
6243 SDValue SrcN1 = Src.getOperand(1);
6244 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
6245 default:
6246 break;
6247 case ISD::SETEQ:
6248 // x == 0 => not (vmsknez.b x)
6249 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6250 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
6251 break;
6252 case ISD::SETGT:
6253 // x > -1 => vmskgez.b x
6254 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
6255 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6256 break;
6257 case ISD::SETGE:
6258 // x >= 0 => vmskgez.b x
6259 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6260 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6261 break;
6262 case ISD::SETLT:
6263 // x < 0 => vmskltz.{b,h,w,d} x
6264 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
6265 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6266 EltVT == MVT::i64))
6267 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6268 break;
6269 case ISD::SETLE:
6270 // x <= -1 => vmskltz.{b,h,w,d} x
6271 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
6272 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6273 EltVT == MVT::i64))
6274 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6275 break;
6276 case ISD::SETNE:
6277 // x != 0 => vmsknez.b x
6278 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6279 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
6280 break;
6281 }
6282
6283 if (Opc == ISD::DELETED_NODE)
6284 return SDValue();
6285
6286 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
6288 V = DAG.getZExtOrTrunc(V, DL, T);
6289 return DAG.getBitcast(VT, V);
6290}
6291
6294 const LoongArchSubtarget &Subtarget) {
6295 SDLoc DL(N);
6296 EVT VT = N->getValueType(0);
6297 SDValue Src = N->getOperand(0);
6298 EVT SrcVT = Src.getValueType();
6299 MVT GRLenVT = Subtarget.getGRLenVT();
6300
6301 if (!DCI.isBeforeLegalizeOps())
6302 return SDValue();
6303
6304 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
6305 return SDValue();
6306
6307 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
6308 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
6309 if (Res)
6310 return Res;
6311
6312 // Generate vXi1 using [X]VMSKLTZ
6313 MVT SExtVT;
6314 unsigned Opc;
6315 bool UseLASX = false;
6316 bool PropagateSExt = false;
6317
6318 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
6319 EVT CmpVT = Src.getOperand(0).getValueType();
6320 if (CmpVT.getSizeInBits() > 256)
6321 return SDValue();
6322 }
6323
6324 switch (SrcVT.getSimpleVT().SimpleTy) {
6325 default:
6326 return SDValue();
6327 case MVT::v2i1:
6328 SExtVT = MVT::v2i64;
6329 break;
6330 case MVT::v4i1:
6331 SExtVT = MVT::v4i32;
6332 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6333 SExtVT = MVT::v4i64;
6334 UseLASX = true;
6335 PropagateSExt = true;
6336 }
6337 break;
6338 case MVT::v8i1:
6339 SExtVT = MVT::v8i16;
6340 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6341 SExtVT = MVT::v8i32;
6342 UseLASX = true;
6343 PropagateSExt = true;
6344 }
6345 break;
6346 case MVT::v16i1:
6347 SExtVT = MVT::v16i8;
6348 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6349 SExtVT = MVT::v16i16;
6350 UseLASX = true;
6351 PropagateSExt = true;
6352 }
6353 break;
6354 case MVT::v32i1:
6355 SExtVT = MVT::v32i8;
6356 UseLASX = true;
6357 break;
6358 };
6359 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
6360 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6361
6362 SDValue V;
6363 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
6364 if (Src.getSimpleValueType() == MVT::v32i8) {
6365 SDValue Lo, Hi;
6366 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
6367 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
6368 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
6369 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
6370 DAG.getShiftAmountConstant(16, GRLenVT, DL));
6371 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
6372 } else if (UseLASX) {
6373 return SDValue();
6374 }
6375 }
6376
6377 if (!V) {
6378 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6379 V = DAG.getNode(Opc, DL, GRLenVT, Src);
6380 }
6381
6383 V = DAG.getZExtOrTrunc(V, DL, T);
6384 return DAG.getBitcast(VT, V);
6385}
6386
6389 const LoongArchSubtarget &Subtarget) {
6390 MVT GRLenVT = Subtarget.getGRLenVT();
6391 EVT ValTy = N->getValueType(0);
6392 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6393 ConstantSDNode *CN0, *CN1;
6394 SDLoc DL(N);
6395 unsigned ValBits = ValTy.getSizeInBits();
6396 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
6397 unsigned Shamt;
6398 bool SwapAndRetried = false;
6399
6400 // BSTRPICK requires the 32S feature.
6401 if (!Subtarget.has32S())
6402 return SDValue();
6403
6404 if (DCI.isBeforeLegalizeOps())
6405 return SDValue();
6406
6407 if (ValBits != 32 && ValBits != 64)
6408 return SDValue();
6409
6410Retry:
6411 // 1st pattern to match BSTRINS:
6412 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
6413 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
6414 // =>
6415 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6416 if (N0.getOpcode() == ISD::AND &&
6417 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6418 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6419 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
6420 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6421 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6422 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
6423 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6424 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6425 (MaskIdx0 + MaskLen0 <= ValBits)) {
6426 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
6427 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6428 N1.getOperand(0).getOperand(0),
6429 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6430 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6431 }
6432
6433 // 2nd pattern to match BSTRINS:
6434 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
6435 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
6436 // =>
6437 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6438 if (N0.getOpcode() == ISD::AND &&
6439 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6440 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6441 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6442 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6443 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6444 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6445 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6446 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
6447 (MaskIdx0 + MaskLen0 <= ValBits)) {
6448 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
6449 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6450 N1.getOperand(0).getOperand(0),
6451 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6452 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6453 }
6454
6455 // 3rd pattern to match BSTRINS:
6456 // R = or (and X, mask0), (and Y, mask1)
6457 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
6458 // =>
6459 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
6460 // where msb = lsb + size - 1
6461 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6462 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6463 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6464 (MaskIdx0 + MaskLen0 <= 64) &&
6465 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
6466 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6467 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
6468 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6469 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
6470 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
6471 DAG.getConstant(ValBits == 32
6472 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6473 : (MaskIdx0 + MaskLen0 - 1),
6474 DL, GRLenVT),
6475 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6476 }
6477
6478 // 4th pattern to match BSTRINS:
6479 // R = or (and X, mask), (shl Y, shamt)
6480 // where mask = (2**shamt - 1)
6481 // =>
6482 // R = BSTRINS X, Y, ValBits - 1, shamt
6483 // where ValBits = 32 or 64
6484 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
6485 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6486 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
6487 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6488 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
6489 (MaskIdx0 + MaskLen0 <= ValBits)) {
6490 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
6491 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6492 N1.getOperand(0),
6493 DAG.getConstant((ValBits - 1), DL, GRLenVT),
6494 DAG.getConstant(Shamt, DL, GRLenVT));
6495 }
6496
6497 // 5th pattern to match BSTRINS:
6498 // R = or (and X, mask), const
6499 // where ~mask = (2**size - 1) << lsb, mask & const = 0
6500 // =>
6501 // R = BSTRINS X, (const >> lsb), msb, lsb
6502 // where msb = lsb + size - 1
6503 if (N0.getOpcode() == ISD::AND &&
6504 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6505 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6506 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
6507 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6508 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
6509 return DAG.getNode(
6510 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6511 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
6512 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6513 : (MaskIdx0 + MaskLen0 - 1),
6514 DL, GRLenVT),
6515 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6516 }
6517
6518 // 6th pattern.
6519 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
6520 // by the incoming bits are known to be zero.
6521 // =>
6522 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
6523 //
6524 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
6525 // pattern is more common than the 1st. So we put the 1st before the 6th in
6526 // order to match as many nodes as possible.
6527 ConstantSDNode *CNMask, *CNShamt;
6528 unsigned MaskIdx, MaskLen;
6529 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6530 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6531 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6532 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6533 CNShamt->getZExtValue() + MaskLen <= ValBits) {
6534 Shamt = CNShamt->getZExtValue();
6535 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
6536 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6537 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
6538 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6539 N1.getOperand(0).getOperand(0),
6540 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
6541 DAG.getConstant(Shamt, DL, GRLenVT));
6542 }
6543 }
6544
6545 // 7th pattern.
6546 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
6547 // overwritten by the incoming bits are known to be zero.
6548 // =>
6549 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
6550 //
6551 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
6552 // before the 7th in order to match as many nodes as possible.
6553 if (N1.getOpcode() == ISD::AND &&
6554 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6555 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6556 N1.getOperand(0).getOpcode() == ISD::SHL &&
6557 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6558 CNShamt->getZExtValue() == MaskIdx) {
6559 APInt ShMask(ValBits, CNMask->getZExtValue());
6560 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6561 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
6562 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6563 N1.getOperand(0).getOperand(0),
6564 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6565 DAG.getConstant(MaskIdx, DL, GRLenVT));
6566 }
6567 }
6568
6569 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
6570 if (!SwapAndRetried) {
6571 std::swap(N0, N1);
6572 SwapAndRetried = true;
6573 goto Retry;
6574 }
6575
6576 SwapAndRetried = false;
6577Retry2:
6578 // 8th pattern.
6579 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
6580 // the incoming bits are known to be zero.
6581 // =>
6582 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
6583 //
6584 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
6585 // we put it here in order to match as many nodes as possible or generate less
6586 // instructions.
6587 if (N1.getOpcode() == ISD::AND &&
6588 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6589 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
6590 APInt ShMask(ValBits, CNMask->getZExtValue());
6591 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6592 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
6593 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6594 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
6595 N1->getOperand(0),
6596 DAG.getConstant(MaskIdx, DL, GRLenVT)),
6597 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6598 DAG.getConstant(MaskIdx, DL, GRLenVT));
6599 }
6600 }
6601 // Swap N0/N1 and retry.
6602 if (!SwapAndRetried) {
6603 std::swap(N0, N1);
6604 SwapAndRetried = true;
6605 goto Retry2;
6606 }
6607
6608 return SDValue();
6609}
6610
6611static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
6612 ExtType = ISD::NON_EXTLOAD;
6613
6614 switch (V.getNode()->getOpcode()) {
6615 case ISD::LOAD: {
6616 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
6617 if ((LoadNode->getMemoryVT() == MVT::i8) ||
6618 (LoadNode->getMemoryVT() == MVT::i16)) {
6619 ExtType = LoadNode->getExtensionType();
6620 return true;
6621 }
6622 return false;
6623 }
6624 case ISD::AssertSext: {
6625 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6626 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6627 ExtType = ISD::SEXTLOAD;
6628 return true;
6629 }
6630 return false;
6631 }
6632 case ISD::AssertZext: {
6633 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6634 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6635 ExtType = ISD::ZEXTLOAD;
6636 return true;
6637 }
6638 return false;
6639 }
6640 default:
6641 return false;
6642 }
6643
6644 return false;
6645}
6646
6647// Eliminate redundant truncation and zero-extension nodes.
6648// * Case 1:
6649// +------------+ +------------+ +------------+
6650// | Input1 | | Input2 | | CC |
6651// +------------+ +------------+ +------------+
6652// | | |
6653// V V +----+
6654// +------------+ +------------+ |
6655// | TRUNCATE | | TRUNCATE | |
6656// +------------+ +------------+ |
6657// | | |
6658// V V |
6659// +------------+ +------------+ |
6660// | ZERO_EXT | | ZERO_EXT | |
6661// +------------+ +------------+ |
6662// | | |
6663// | +-------------+ |
6664// V V | |
6665// +----------------+ | |
6666// | AND | | |
6667// +----------------+ | |
6668// | | |
6669// +---------------+ | |
6670// | | |
6671// V V V
6672// +-------------+
6673// | CMP |
6674// +-------------+
6675// * Case 2:
6676// +------------+ +------------+ +-------------+ +------------+ +------------+
6677// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
6678// +------------+ +------------+ +-------------+ +------------+ +------------+
6679// | | | | |
6680// V | | | |
6681// +------------+ | | | |
6682// | XOR |<---------------------+ | |
6683// +------------+ | | |
6684// | | | |
6685// V V +---------------+ |
6686// +------------+ +------------+ | |
6687// | TRUNCATE | | TRUNCATE | | +-------------------------+
6688// +------------+ +------------+ | |
6689// | | | |
6690// V V | |
6691// +------------+ +------------+ | |
6692// | ZERO_EXT | | ZERO_EXT | | |
6693// +------------+ +------------+ | |
6694// | | | |
6695// V V | |
6696// +----------------+ | |
6697// | AND | | |
6698// +----------------+ | |
6699// | | |
6700// +---------------+ | |
6701// | | |
6702// V V V
6703// +-------------+
6704// | CMP |
6705// +-------------+
6708 const LoongArchSubtarget &Subtarget) {
6709 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
6710
6711 SDNode *AndNode = N->getOperand(0).getNode();
6712 if (AndNode->getOpcode() != ISD::AND)
6713 return SDValue();
6714
6715 SDValue AndInputValue2 = AndNode->getOperand(1);
6716 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
6717 return SDValue();
6718
6719 SDValue CmpInputValue = N->getOperand(1);
6720 SDValue AndInputValue1 = AndNode->getOperand(0);
6721 if (AndInputValue1.getOpcode() == ISD::XOR) {
6722 if (CC != ISD::SETEQ && CC != ISD::SETNE)
6723 return SDValue();
6724 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
6725 if (!CN || !CN->isAllOnes())
6726 return SDValue();
6727 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
6728 if (!CN || !CN->isZero())
6729 return SDValue();
6730 AndInputValue1 = AndInputValue1.getOperand(0);
6731 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
6732 return SDValue();
6733 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
6734 if (AndInputValue2 != CmpInputValue)
6735 return SDValue();
6736 } else {
6737 return SDValue();
6738 }
6739
6740 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
6741 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
6742 return SDValue();
6743
6744 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
6745 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
6746 return SDValue();
6747
6748 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
6749 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
6750 ISD::LoadExtType ExtType1;
6751 ISD::LoadExtType ExtType2;
6752
6753 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
6754 !checkValueWidth(TruncInputValue2, ExtType2))
6755 return SDValue();
6756
6757 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
6758 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
6759 return SDValue();
6760
6761 if ((ExtType2 != ISD::ZEXTLOAD) &&
6762 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
6763 return SDValue();
6764
6765 // These truncation and zero-extension nodes are not necessary, remove them.
6766 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
6767 TruncInputValue1, TruncInputValue2);
6768 SDValue NewSetCC =
6769 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
6770 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
6771 return SDValue(N, 0);
6772}
6773
6774// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
6777 const LoongArchSubtarget &Subtarget) {
6778 if (DCI.isBeforeLegalizeOps())
6779 return SDValue();
6780
6781 SDValue Src = N->getOperand(0);
6782 if (Src.getOpcode() != LoongArchISD::REVB_2W)
6783 return SDValue();
6784
6785 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
6786 Src.getOperand(0));
6787}
6788
6789// Perform common combines for BR_CC and SELECT_CC conditions.
6790static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6791 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6792 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6793
6794 // As far as arithmetic right shift always saves the sign,
6795 // shift can be omitted.
6796 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6797 // setge (sra X, N), 0 -> setge X, 0
6798 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6799 LHS.getOpcode() == ISD::SRA) {
6800 LHS = LHS.getOperand(0);
6801 return true;
6802 }
6803
6804 if (!ISD::isIntEqualitySetCC(CCVal))
6805 return false;
6806
6807 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6808 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6809 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6810 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6811 // If we're looking for eq 0 instead of ne 0, we need to invert the
6812 // condition.
6813 bool Invert = CCVal == ISD::SETEQ;
6814 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6815 if (Invert)
6816 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6817
6818 RHS = LHS.getOperand(1);
6819 LHS = LHS.getOperand(0);
6820 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6821
6822 CC = DAG.getCondCode(CCVal);
6823 return true;
6824 }
6825
6826 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6827 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6828 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6829 SDValue LHS0 = LHS.getOperand(0);
6830 if (LHS0.getOpcode() == ISD::AND &&
6831 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6832 uint64_t Mask = LHS0.getConstantOperandVal(1);
6833 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6834 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6835 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6836 CC = DAG.getCondCode(CCVal);
6837
6838 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6839 LHS = LHS0.getOperand(0);
6840 if (ShAmt != 0)
6841 LHS =
6842 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6843 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6844 return true;
6845 }
6846 }
6847 }
6848
6849 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6850 // This can occur when legalizing some floating point comparisons.
6851 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6852 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6853 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6854 CC = DAG.getCondCode(CCVal);
6855 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6856 return true;
6857 }
6858
6859 return false;
6860}
6861
6864 const LoongArchSubtarget &Subtarget) {
6865 SDValue LHS = N->getOperand(1);
6866 SDValue RHS = N->getOperand(2);
6867 SDValue CC = N->getOperand(3);
6868 SDLoc DL(N);
6869
6870 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6871 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6872 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6873
6874 return SDValue();
6875}
6876
6879 const LoongArchSubtarget &Subtarget) {
6880 // Transform
6881 SDValue LHS = N->getOperand(0);
6882 SDValue RHS = N->getOperand(1);
6883 SDValue CC = N->getOperand(2);
6884 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6885 SDValue TrueV = N->getOperand(3);
6886 SDValue FalseV = N->getOperand(4);
6887 SDLoc DL(N);
6888 EVT VT = N->getValueType(0);
6889
6890 // If the True and False values are the same, we don't need a select_cc.
6891 if (TrueV == FalseV)
6892 return TrueV;
6893
6894 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6895 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6896 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6898 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6899 if (CCVal == ISD::CondCode::SETGE)
6900 std::swap(TrueV, FalseV);
6901
6902 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6903 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6904 // Only handle simm12, if it is not in this range, it can be considered as
6905 // register.
6906 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6907 isInt<12>(TrueSImm - FalseSImm)) {
6908 SDValue SRA =
6909 DAG.getNode(ISD::SRA, DL, VT, LHS,
6910 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6911 SDValue AND =
6912 DAG.getNode(ISD::AND, DL, VT, SRA,
6913 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6914 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6915 }
6916
6917 if (CCVal == ISD::CondCode::SETGE)
6918 std::swap(TrueV, FalseV);
6919 }
6920
6921 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6922 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6923 {LHS, RHS, CC, TrueV, FalseV});
6924
6925 return SDValue();
6926}
6927
6928template <unsigned N>
6930 SelectionDAG &DAG,
6931 const LoongArchSubtarget &Subtarget,
6932 bool IsSigned = false) {
6933 SDLoc DL(Node);
6934 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6935 // Check the ImmArg.
6936 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6937 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6938 DAG.getContext()->emitError(Node->getOperationName(0) +
6939 ": argument out of range.");
6940 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6941 }
6942 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6943}
6944
6945template <unsigned N>
6946static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6947 SelectionDAG &DAG, bool IsSigned = false) {
6948 SDLoc DL(Node);
6949 EVT ResTy = Node->getValueType(0);
6950 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6951
6952 // Check the ImmArg.
6953 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6954 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6955 DAG.getContext()->emitError(Node->getOperationName(0) +
6956 ": argument out of range.");
6957 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6958 }
6959 return DAG.getConstant(
6961 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6962 DL, ResTy);
6963}
6964
6966 SDLoc DL(Node);
6967 EVT ResTy = Node->getValueType(0);
6968 SDValue Vec = Node->getOperand(2);
6969 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6970 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6971}
6972
6974 SDLoc DL(Node);
6975 EVT ResTy = Node->getValueType(0);
6976 SDValue One = DAG.getConstant(1, DL, ResTy);
6977 SDValue Bit =
6978 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6979
6980 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6981 DAG.getNOT(DL, Bit, ResTy));
6982}
6983
6984template <unsigned N>
6986 SDLoc DL(Node);
6987 EVT ResTy = Node->getValueType(0);
6988 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6989 // Check the unsigned ImmArg.
6990 if (!isUInt<N>(CImm->getZExtValue())) {
6991 DAG.getContext()->emitError(Node->getOperationName(0) +
6992 ": argument out of range.");
6993 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6994 }
6995
6996 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6997 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6998
6999 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
7000}
7001
7002template <unsigned N>
7004 SDLoc DL(Node);
7005 EVT ResTy = Node->getValueType(0);
7006 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7007 // Check the unsigned ImmArg.
7008 if (!isUInt<N>(CImm->getZExtValue())) {
7009 DAG.getContext()->emitError(Node->getOperationName(0) +
7010 ": argument out of range.");
7011 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7012 }
7013
7014 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7015 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7016 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
7017}
7018
7019template <unsigned N>
7021 SDLoc DL(Node);
7022 EVT ResTy = Node->getValueType(0);
7023 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7024 // Check the unsigned ImmArg.
7025 if (!isUInt<N>(CImm->getZExtValue())) {
7026 DAG.getContext()->emitError(Node->getOperationName(0) +
7027 ": argument out of range.");
7028 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7029 }
7030
7031 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7032 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7033 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
7034}
7035
7036template <unsigned W>
7038 unsigned ResOp) {
7039 unsigned Imm = N->getConstantOperandVal(2);
7040 if (!isUInt<W>(Imm)) {
7041 const StringRef ErrorMsg = "argument out of range";
7042 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
7043 return DAG.getUNDEF(N->getValueType(0));
7044 }
7045 SDLoc DL(N);
7046 SDValue Vec = N->getOperand(1);
7047 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
7049 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
7050}
7051
7052static SDValue
7055 const LoongArchSubtarget &Subtarget) {
7056 SDLoc DL(N);
7057 switch (N->getConstantOperandVal(0)) {
7058 default:
7059 break;
7060 case Intrinsic::loongarch_lsx_vadd_b:
7061 case Intrinsic::loongarch_lsx_vadd_h:
7062 case Intrinsic::loongarch_lsx_vadd_w:
7063 case Intrinsic::loongarch_lsx_vadd_d:
7064 case Intrinsic::loongarch_lasx_xvadd_b:
7065 case Intrinsic::loongarch_lasx_xvadd_h:
7066 case Intrinsic::loongarch_lasx_xvadd_w:
7067 case Intrinsic::loongarch_lasx_xvadd_d:
7068 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7069 N->getOperand(2));
7070 case Intrinsic::loongarch_lsx_vaddi_bu:
7071 case Intrinsic::loongarch_lsx_vaddi_hu:
7072 case Intrinsic::loongarch_lsx_vaddi_wu:
7073 case Intrinsic::loongarch_lsx_vaddi_du:
7074 case Intrinsic::loongarch_lasx_xvaddi_bu:
7075 case Intrinsic::loongarch_lasx_xvaddi_hu:
7076 case Intrinsic::loongarch_lasx_xvaddi_wu:
7077 case Intrinsic::loongarch_lasx_xvaddi_du:
7078 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7079 lowerVectorSplatImm<5>(N, 2, DAG));
7080 case Intrinsic::loongarch_lsx_vsub_b:
7081 case Intrinsic::loongarch_lsx_vsub_h:
7082 case Intrinsic::loongarch_lsx_vsub_w:
7083 case Intrinsic::loongarch_lsx_vsub_d:
7084 case Intrinsic::loongarch_lasx_xvsub_b:
7085 case Intrinsic::loongarch_lasx_xvsub_h:
7086 case Intrinsic::loongarch_lasx_xvsub_w:
7087 case Intrinsic::loongarch_lasx_xvsub_d:
7088 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7089 N->getOperand(2));
7090 case Intrinsic::loongarch_lsx_vsubi_bu:
7091 case Intrinsic::loongarch_lsx_vsubi_hu:
7092 case Intrinsic::loongarch_lsx_vsubi_wu:
7093 case Intrinsic::loongarch_lsx_vsubi_du:
7094 case Intrinsic::loongarch_lasx_xvsubi_bu:
7095 case Intrinsic::loongarch_lasx_xvsubi_hu:
7096 case Intrinsic::loongarch_lasx_xvsubi_wu:
7097 case Intrinsic::loongarch_lasx_xvsubi_du:
7098 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7099 lowerVectorSplatImm<5>(N, 2, DAG));
7100 case Intrinsic::loongarch_lsx_vneg_b:
7101 case Intrinsic::loongarch_lsx_vneg_h:
7102 case Intrinsic::loongarch_lsx_vneg_w:
7103 case Intrinsic::loongarch_lsx_vneg_d:
7104 case Intrinsic::loongarch_lasx_xvneg_b:
7105 case Intrinsic::loongarch_lasx_xvneg_h:
7106 case Intrinsic::loongarch_lasx_xvneg_w:
7107 case Intrinsic::loongarch_lasx_xvneg_d:
7108 return DAG.getNode(
7109 ISD::SUB, DL, N->getValueType(0),
7110 DAG.getConstant(
7111 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
7112 /*isSigned=*/true),
7113 SDLoc(N), N->getValueType(0)),
7114 N->getOperand(1));
7115 case Intrinsic::loongarch_lsx_vmax_b:
7116 case Intrinsic::loongarch_lsx_vmax_h:
7117 case Intrinsic::loongarch_lsx_vmax_w:
7118 case Intrinsic::loongarch_lsx_vmax_d:
7119 case Intrinsic::loongarch_lasx_xvmax_b:
7120 case Intrinsic::loongarch_lasx_xvmax_h:
7121 case Intrinsic::loongarch_lasx_xvmax_w:
7122 case Intrinsic::loongarch_lasx_xvmax_d:
7123 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7124 N->getOperand(2));
7125 case Intrinsic::loongarch_lsx_vmax_bu:
7126 case Intrinsic::loongarch_lsx_vmax_hu:
7127 case Intrinsic::loongarch_lsx_vmax_wu:
7128 case Intrinsic::loongarch_lsx_vmax_du:
7129 case Intrinsic::loongarch_lasx_xvmax_bu:
7130 case Intrinsic::loongarch_lasx_xvmax_hu:
7131 case Intrinsic::loongarch_lasx_xvmax_wu:
7132 case Intrinsic::loongarch_lasx_xvmax_du:
7133 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7134 N->getOperand(2));
7135 case Intrinsic::loongarch_lsx_vmaxi_b:
7136 case Intrinsic::loongarch_lsx_vmaxi_h:
7137 case Intrinsic::loongarch_lsx_vmaxi_w:
7138 case Intrinsic::loongarch_lsx_vmaxi_d:
7139 case Intrinsic::loongarch_lasx_xvmaxi_b:
7140 case Intrinsic::loongarch_lasx_xvmaxi_h:
7141 case Intrinsic::loongarch_lasx_xvmaxi_w:
7142 case Intrinsic::loongarch_lasx_xvmaxi_d:
7143 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7144 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7145 case Intrinsic::loongarch_lsx_vmaxi_bu:
7146 case Intrinsic::loongarch_lsx_vmaxi_hu:
7147 case Intrinsic::loongarch_lsx_vmaxi_wu:
7148 case Intrinsic::loongarch_lsx_vmaxi_du:
7149 case Intrinsic::loongarch_lasx_xvmaxi_bu:
7150 case Intrinsic::loongarch_lasx_xvmaxi_hu:
7151 case Intrinsic::loongarch_lasx_xvmaxi_wu:
7152 case Intrinsic::loongarch_lasx_xvmaxi_du:
7153 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7154 lowerVectorSplatImm<5>(N, 2, DAG));
7155 case Intrinsic::loongarch_lsx_vmin_b:
7156 case Intrinsic::loongarch_lsx_vmin_h:
7157 case Intrinsic::loongarch_lsx_vmin_w:
7158 case Intrinsic::loongarch_lsx_vmin_d:
7159 case Intrinsic::loongarch_lasx_xvmin_b:
7160 case Intrinsic::loongarch_lasx_xvmin_h:
7161 case Intrinsic::loongarch_lasx_xvmin_w:
7162 case Intrinsic::loongarch_lasx_xvmin_d:
7163 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7164 N->getOperand(2));
7165 case Intrinsic::loongarch_lsx_vmin_bu:
7166 case Intrinsic::loongarch_lsx_vmin_hu:
7167 case Intrinsic::loongarch_lsx_vmin_wu:
7168 case Intrinsic::loongarch_lsx_vmin_du:
7169 case Intrinsic::loongarch_lasx_xvmin_bu:
7170 case Intrinsic::loongarch_lasx_xvmin_hu:
7171 case Intrinsic::loongarch_lasx_xvmin_wu:
7172 case Intrinsic::loongarch_lasx_xvmin_du:
7173 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7174 N->getOperand(2));
7175 case Intrinsic::loongarch_lsx_vmini_b:
7176 case Intrinsic::loongarch_lsx_vmini_h:
7177 case Intrinsic::loongarch_lsx_vmini_w:
7178 case Intrinsic::loongarch_lsx_vmini_d:
7179 case Intrinsic::loongarch_lasx_xvmini_b:
7180 case Intrinsic::loongarch_lasx_xvmini_h:
7181 case Intrinsic::loongarch_lasx_xvmini_w:
7182 case Intrinsic::loongarch_lasx_xvmini_d:
7183 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7184 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7185 case Intrinsic::loongarch_lsx_vmini_bu:
7186 case Intrinsic::loongarch_lsx_vmini_hu:
7187 case Intrinsic::loongarch_lsx_vmini_wu:
7188 case Intrinsic::loongarch_lsx_vmini_du:
7189 case Intrinsic::loongarch_lasx_xvmini_bu:
7190 case Intrinsic::loongarch_lasx_xvmini_hu:
7191 case Intrinsic::loongarch_lasx_xvmini_wu:
7192 case Intrinsic::loongarch_lasx_xvmini_du:
7193 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7194 lowerVectorSplatImm<5>(N, 2, DAG));
7195 case Intrinsic::loongarch_lsx_vmul_b:
7196 case Intrinsic::loongarch_lsx_vmul_h:
7197 case Intrinsic::loongarch_lsx_vmul_w:
7198 case Intrinsic::loongarch_lsx_vmul_d:
7199 case Intrinsic::loongarch_lasx_xvmul_b:
7200 case Intrinsic::loongarch_lasx_xvmul_h:
7201 case Intrinsic::loongarch_lasx_xvmul_w:
7202 case Intrinsic::loongarch_lasx_xvmul_d:
7203 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
7204 N->getOperand(2));
7205 case Intrinsic::loongarch_lsx_vmadd_b:
7206 case Intrinsic::loongarch_lsx_vmadd_h:
7207 case Intrinsic::loongarch_lsx_vmadd_w:
7208 case Intrinsic::loongarch_lsx_vmadd_d:
7209 case Intrinsic::loongarch_lasx_xvmadd_b:
7210 case Intrinsic::loongarch_lasx_xvmadd_h:
7211 case Intrinsic::loongarch_lasx_xvmadd_w:
7212 case Intrinsic::loongarch_lasx_xvmadd_d: {
7213 EVT ResTy = N->getValueType(0);
7214 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
7215 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7216 N->getOperand(3)));
7217 }
7218 case Intrinsic::loongarch_lsx_vmsub_b:
7219 case Intrinsic::loongarch_lsx_vmsub_h:
7220 case Intrinsic::loongarch_lsx_vmsub_w:
7221 case Intrinsic::loongarch_lsx_vmsub_d:
7222 case Intrinsic::loongarch_lasx_xvmsub_b:
7223 case Intrinsic::loongarch_lasx_xvmsub_h:
7224 case Intrinsic::loongarch_lasx_xvmsub_w:
7225 case Intrinsic::loongarch_lasx_xvmsub_d: {
7226 EVT ResTy = N->getValueType(0);
7227 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
7228 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7229 N->getOperand(3)));
7230 }
7231 case Intrinsic::loongarch_lsx_vdiv_b:
7232 case Intrinsic::loongarch_lsx_vdiv_h:
7233 case Intrinsic::loongarch_lsx_vdiv_w:
7234 case Intrinsic::loongarch_lsx_vdiv_d:
7235 case Intrinsic::loongarch_lasx_xvdiv_b:
7236 case Intrinsic::loongarch_lasx_xvdiv_h:
7237 case Intrinsic::loongarch_lasx_xvdiv_w:
7238 case Intrinsic::loongarch_lasx_xvdiv_d:
7239 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
7240 N->getOperand(2));
7241 case Intrinsic::loongarch_lsx_vdiv_bu:
7242 case Intrinsic::loongarch_lsx_vdiv_hu:
7243 case Intrinsic::loongarch_lsx_vdiv_wu:
7244 case Intrinsic::loongarch_lsx_vdiv_du:
7245 case Intrinsic::loongarch_lasx_xvdiv_bu:
7246 case Intrinsic::loongarch_lasx_xvdiv_hu:
7247 case Intrinsic::loongarch_lasx_xvdiv_wu:
7248 case Intrinsic::loongarch_lasx_xvdiv_du:
7249 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
7250 N->getOperand(2));
7251 case Intrinsic::loongarch_lsx_vmod_b:
7252 case Intrinsic::loongarch_lsx_vmod_h:
7253 case Intrinsic::loongarch_lsx_vmod_w:
7254 case Intrinsic::loongarch_lsx_vmod_d:
7255 case Intrinsic::loongarch_lasx_xvmod_b:
7256 case Intrinsic::loongarch_lasx_xvmod_h:
7257 case Intrinsic::loongarch_lasx_xvmod_w:
7258 case Intrinsic::loongarch_lasx_xvmod_d:
7259 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
7260 N->getOperand(2));
7261 case Intrinsic::loongarch_lsx_vmod_bu:
7262 case Intrinsic::loongarch_lsx_vmod_hu:
7263 case Intrinsic::loongarch_lsx_vmod_wu:
7264 case Intrinsic::loongarch_lsx_vmod_du:
7265 case Intrinsic::loongarch_lasx_xvmod_bu:
7266 case Intrinsic::loongarch_lasx_xvmod_hu:
7267 case Intrinsic::loongarch_lasx_xvmod_wu:
7268 case Intrinsic::loongarch_lasx_xvmod_du:
7269 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
7270 N->getOperand(2));
7271 case Intrinsic::loongarch_lsx_vand_v:
7272 case Intrinsic::loongarch_lasx_xvand_v:
7273 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7274 N->getOperand(2));
7275 case Intrinsic::loongarch_lsx_vor_v:
7276 case Intrinsic::loongarch_lasx_xvor_v:
7277 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7278 N->getOperand(2));
7279 case Intrinsic::loongarch_lsx_vxor_v:
7280 case Intrinsic::loongarch_lasx_xvxor_v:
7281 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7282 N->getOperand(2));
7283 case Intrinsic::loongarch_lsx_vnor_v:
7284 case Intrinsic::loongarch_lasx_xvnor_v: {
7285 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7286 N->getOperand(2));
7287 return DAG.getNOT(DL, Res, Res->getValueType(0));
7288 }
7289 case Intrinsic::loongarch_lsx_vandi_b:
7290 case Intrinsic::loongarch_lasx_xvandi_b:
7291 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7292 lowerVectorSplatImm<8>(N, 2, DAG));
7293 case Intrinsic::loongarch_lsx_vori_b:
7294 case Intrinsic::loongarch_lasx_xvori_b:
7295 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7296 lowerVectorSplatImm<8>(N, 2, DAG));
7297 case Intrinsic::loongarch_lsx_vxori_b:
7298 case Intrinsic::loongarch_lasx_xvxori_b:
7299 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7300 lowerVectorSplatImm<8>(N, 2, DAG));
7301 case Intrinsic::loongarch_lsx_vsll_b:
7302 case Intrinsic::loongarch_lsx_vsll_h:
7303 case Intrinsic::loongarch_lsx_vsll_w:
7304 case Intrinsic::loongarch_lsx_vsll_d:
7305 case Intrinsic::loongarch_lasx_xvsll_b:
7306 case Intrinsic::loongarch_lasx_xvsll_h:
7307 case Intrinsic::loongarch_lasx_xvsll_w:
7308 case Intrinsic::loongarch_lasx_xvsll_d:
7309 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7310 truncateVecElts(N, DAG));
7311 case Intrinsic::loongarch_lsx_vslli_b:
7312 case Intrinsic::loongarch_lasx_xvslli_b:
7313 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7314 lowerVectorSplatImm<3>(N, 2, DAG));
7315 case Intrinsic::loongarch_lsx_vslli_h:
7316 case Intrinsic::loongarch_lasx_xvslli_h:
7317 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7318 lowerVectorSplatImm<4>(N, 2, DAG));
7319 case Intrinsic::loongarch_lsx_vslli_w:
7320 case Intrinsic::loongarch_lasx_xvslli_w:
7321 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7322 lowerVectorSplatImm<5>(N, 2, DAG));
7323 case Intrinsic::loongarch_lsx_vslli_d:
7324 case Intrinsic::loongarch_lasx_xvslli_d:
7325 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7326 lowerVectorSplatImm<6>(N, 2, DAG));
7327 case Intrinsic::loongarch_lsx_vsrl_b:
7328 case Intrinsic::loongarch_lsx_vsrl_h:
7329 case Intrinsic::loongarch_lsx_vsrl_w:
7330 case Intrinsic::loongarch_lsx_vsrl_d:
7331 case Intrinsic::loongarch_lasx_xvsrl_b:
7332 case Intrinsic::loongarch_lasx_xvsrl_h:
7333 case Intrinsic::loongarch_lasx_xvsrl_w:
7334 case Intrinsic::loongarch_lasx_xvsrl_d:
7335 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7336 truncateVecElts(N, DAG));
7337 case Intrinsic::loongarch_lsx_vsrli_b:
7338 case Intrinsic::loongarch_lasx_xvsrli_b:
7339 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7340 lowerVectorSplatImm<3>(N, 2, DAG));
7341 case Intrinsic::loongarch_lsx_vsrli_h:
7342 case Intrinsic::loongarch_lasx_xvsrli_h:
7343 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7344 lowerVectorSplatImm<4>(N, 2, DAG));
7345 case Intrinsic::loongarch_lsx_vsrli_w:
7346 case Intrinsic::loongarch_lasx_xvsrli_w:
7347 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7348 lowerVectorSplatImm<5>(N, 2, DAG));
7349 case Intrinsic::loongarch_lsx_vsrli_d:
7350 case Intrinsic::loongarch_lasx_xvsrli_d:
7351 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7352 lowerVectorSplatImm<6>(N, 2, DAG));
7353 case Intrinsic::loongarch_lsx_vsra_b:
7354 case Intrinsic::loongarch_lsx_vsra_h:
7355 case Intrinsic::loongarch_lsx_vsra_w:
7356 case Intrinsic::loongarch_lsx_vsra_d:
7357 case Intrinsic::loongarch_lasx_xvsra_b:
7358 case Intrinsic::loongarch_lasx_xvsra_h:
7359 case Intrinsic::loongarch_lasx_xvsra_w:
7360 case Intrinsic::loongarch_lasx_xvsra_d:
7361 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7362 truncateVecElts(N, DAG));
7363 case Intrinsic::loongarch_lsx_vsrai_b:
7364 case Intrinsic::loongarch_lasx_xvsrai_b:
7365 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7366 lowerVectorSplatImm<3>(N, 2, DAG));
7367 case Intrinsic::loongarch_lsx_vsrai_h:
7368 case Intrinsic::loongarch_lasx_xvsrai_h:
7369 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7370 lowerVectorSplatImm<4>(N, 2, DAG));
7371 case Intrinsic::loongarch_lsx_vsrai_w:
7372 case Intrinsic::loongarch_lasx_xvsrai_w:
7373 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7374 lowerVectorSplatImm<5>(N, 2, DAG));
7375 case Intrinsic::loongarch_lsx_vsrai_d:
7376 case Intrinsic::loongarch_lasx_xvsrai_d:
7377 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7378 lowerVectorSplatImm<6>(N, 2, DAG));
7379 case Intrinsic::loongarch_lsx_vclz_b:
7380 case Intrinsic::loongarch_lsx_vclz_h:
7381 case Intrinsic::loongarch_lsx_vclz_w:
7382 case Intrinsic::loongarch_lsx_vclz_d:
7383 case Intrinsic::loongarch_lasx_xvclz_b:
7384 case Intrinsic::loongarch_lasx_xvclz_h:
7385 case Intrinsic::loongarch_lasx_xvclz_w:
7386 case Intrinsic::loongarch_lasx_xvclz_d:
7387 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
7388 case Intrinsic::loongarch_lsx_vpcnt_b:
7389 case Intrinsic::loongarch_lsx_vpcnt_h:
7390 case Intrinsic::loongarch_lsx_vpcnt_w:
7391 case Intrinsic::loongarch_lsx_vpcnt_d:
7392 case Intrinsic::loongarch_lasx_xvpcnt_b:
7393 case Intrinsic::loongarch_lasx_xvpcnt_h:
7394 case Intrinsic::loongarch_lasx_xvpcnt_w:
7395 case Intrinsic::loongarch_lasx_xvpcnt_d:
7396 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
7397 case Intrinsic::loongarch_lsx_vbitclr_b:
7398 case Intrinsic::loongarch_lsx_vbitclr_h:
7399 case Intrinsic::loongarch_lsx_vbitclr_w:
7400 case Intrinsic::loongarch_lsx_vbitclr_d:
7401 case Intrinsic::loongarch_lasx_xvbitclr_b:
7402 case Intrinsic::loongarch_lasx_xvbitclr_h:
7403 case Intrinsic::loongarch_lasx_xvbitclr_w:
7404 case Intrinsic::loongarch_lasx_xvbitclr_d:
7405 return lowerVectorBitClear(N, DAG);
7406 case Intrinsic::loongarch_lsx_vbitclri_b:
7407 case Intrinsic::loongarch_lasx_xvbitclri_b:
7408 return lowerVectorBitClearImm<3>(N, DAG);
7409 case Intrinsic::loongarch_lsx_vbitclri_h:
7410 case Intrinsic::loongarch_lasx_xvbitclri_h:
7411 return lowerVectorBitClearImm<4>(N, DAG);
7412 case Intrinsic::loongarch_lsx_vbitclri_w:
7413 case Intrinsic::loongarch_lasx_xvbitclri_w:
7414 return lowerVectorBitClearImm<5>(N, DAG);
7415 case Intrinsic::loongarch_lsx_vbitclri_d:
7416 case Intrinsic::loongarch_lasx_xvbitclri_d:
7417 return lowerVectorBitClearImm<6>(N, DAG);
7418 case Intrinsic::loongarch_lsx_vbitset_b:
7419 case Intrinsic::loongarch_lsx_vbitset_h:
7420 case Intrinsic::loongarch_lsx_vbitset_w:
7421 case Intrinsic::loongarch_lsx_vbitset_d:
7422 case Intrinsic::loongarch_lasx_xvbitset_b:
7423 case Intrinsic::loongarch_lasx_xvbitset_h:
7424 case Intrinsic::loongarch_lasx_xvbitset_w:
7425 case Intrinsic::loongarch_lasx_xvbitset_d: {
7426 EVT VecTy = N->getValueType(0);
7427 SDValue One = DAG.getConstant(1, DL, VecTy);
7428 return DAG.getNode(
7429 ISD::OR, DL, VecTy, N->getOperand(1),
7430 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7431 }
7432 case Intrinsic::loongarch_lsx_vbitseti_b:
7433 case Intrinsic::loongarch_lasx_xvbitseti_b:
7434 return lowerVectorBitSetImm<3>(N, DAG);
7435 case Intrinsic::loongarch_lsx_vbitseti_h:
7436 case Intrinsic::loongarch_lasx_xvbitseti_h:
7437 return lowerVectorBitSetImm<4>(N, DAG);
7438 case Intrinsic::loongarch_lsx_vbitseti_w:
7439 case Intrinsic::loongarch_lasx_xvbitseti_w:
7440 return lowerVectorBitSetImm<5>(N, DAG);
7441 case Intrinsic::loongarch_lsx_vbitseti_d:
7442 case Intrinsic::loongarch_lasx_xvbitseti_d:
7443 return lowerVectorBitSetImm<6>(N, DAG);
7444 case Intrinsic::loongarch_lsx_vbitrev_b:
7445 case Intrinsic::loongarch_lsx_vbitrev_h:
7446 case Intrinsic::loongarch_lsx_vbitrev_w:
7447 case Intrinsic::loongarch_lsx_vbitrev_d:
7448 case Intrinsic::loongarch_lasx_xvbitrev_b:
7449 case Intrinsic::loongarch_lasx_xvbitrev_h:
7450 case Intrinsic::loongarch_lasx_xvbitrev_w:
7451 case Intrinsic::loongarch_lasx_xvbitrev_d: {
7452 EVT VecTy = N->getValueType(0);
7453 SDValue One = DAG.getConstant(1, DL, VecTy);
7454 return DAG.getNode(
7455 ISD::XOR, DL, VecTy, N->getOperand(1),
7456 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7457 }
7458 case Intrinsic::loongarch_lsx_vbitrevi_b:
7459 case Intrinsic::loongarch_lasx_xvbitrevi_b:
7460 return lowerVectorBitRevImm<3>(N, DAG);
7461 case Intrinsic::loongarch_lsx_vbitrevi_h:
7462 case Intrinsic::loongarch_lasx_xvbitrevi_h:
7463 return lowerVectorBitRevImm<4>(N, DAG);
7464 case Intrinsic::loongarch_lsx_vbitrevi_w:
7465 case Intrinsic::loongarch_lasx_xvbitrevi_w:
7466 return lowerVectorBitRevImm<5>(N, DAG);
7467 case Intrinsic::loongarch_lsx_vbitrevi_d:
7468 case Intrinsic::loongarch_lasx_xvbitrevi_d:
7469 return lowerVectorBitRevImm<6>(N, DAG);
7470 case Intrinsic::loongarch_lsx_vfadd_s:
7471 case Intrinsic::loongarch_lsx_vfadd_d:
7472 case Intrinsic::loongarch_lasx_xvfadd_s:
7473 case Intrinsic::loongarch_lasx_xvfadd_d:
7474 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
7475 N->getOperand(2));
7476 case Intrinsic::loongarch_lsx_vfsub_s:
7477 case Intrinsic::loongarch_lsx_vfsub_d:
7478 case Intrinsic::loongarch_lasx_xvfsub_s:
7479 case Intrinsic::loongarch_lasx_xvfsub_d:
7480 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
7481 N->getOperand(2));
7482 case Intrinsic::loongarch_lsx_vfmul_s:
7483 case Intrinsic::loongarch_lsx_vfmul_d:
7484 case Intrinsic::loongarch_lasx_xvfmul_s:
7485 case Intrinsic::loongarch_lasx_xvfmul_d:
7486 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
7487 N->getOperand(2));
7488 case Intrinsic::loongarch_lsx_vfdiv_s:
7489 case Intrinsic::loongarch_lsx_vfdiv_d:
7490 case Intrinsic::loongarch_lasx_xvfdiv_s:
7491 case Intrinsic::loongarch_lasx_xvfdiv_d:
7492 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
7493 N->getOperand(2));
7494 case Intrinsic::loongarch_lsx_vfmadd_s:
7495 case Intrinsic::loongarch_lsx_vfmadd_d:
7496 case Intrinsic::loongarch_lasx_xvfmadd_s:
7497 case Intrinsic::loongarch_lasx_xvfmadd_d:
7498 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
7499 N->getOperand(2), N->getOperand(3));
7500 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
7501 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7502 N->getOperand(1), N->getOperand(2),
7503 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
7504 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
7505 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
7506 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7507 N->getOperand(1), N->getOperand(2),
7508 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
7509 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
7510 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
7511 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7512 N->getOperand(1), N->getOperand(2),
7513 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
7514 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
7515 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7516 N->getOperand(1), N->getOperand(2),
7517 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
7518 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
7519 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
7520 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
7521 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
7522 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
7523 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
7524 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
7525 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
7526 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
7527 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7528 N->getOperand(1)));
7529 case Intrinsic::loongarch_lsx_vreplve_b:
7530 case Intrinsic::loongarch_lsx_vreplve_h:
7531 case Intrinsic::loongarch_lsx_vreplve_w:
7532 case Intrinsic::loongarch_lsx_vreplve_d:
7533 case Intrinsic::loongarch_lasx_xvreplve_b:
7534 case Intrinsic::loongarch_lasx_xvreplve_h:
7535 case Intrinsic::loongarch_lasx_xvreplve_w:
7536 case Intrinsic::loongarch_lasx_xvreplve_d:
7537 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
7538 N->getOperand(1),
7539 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7540 N->getOperand(2)));
7541 case Intrinsic::loongarch_lsx_vpickve2gr_b:
7542 if (!Subtarget.is64Bit())
7543 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7544 break;
7545 case Intrinsic::loongarch_lsx_vpickve2gr_h:
7546 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
7547 if (!Subtarget.is64Bit())
7548 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7549 break;
7550 case Intrinsic::loongarch_lsx_vpickve2gr_w:
7551 if (!Subtarget.is64Bit())
7552 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7553 break;
7554 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
7555 if (!Subtarget.is64Bit())
7556 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7557 break;
7558 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
7559 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
7560 if (!Subtarget.is64Bit())
7561 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7562 break;
7563 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
7564 if (!Subtarget.is64Bit())
7565 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7566 break;
7567 case Intrinsic::loongarch_lsx_bz_b:
7568 case Intrinsic::loongarch_lsx_bz_h:
7569 case Intrinsic::loongarch_lsx_bz_w:
7570 case Intrinsic::loongarch_lsx_bz_d:
7571 case Intrinsic::loongarch_lasx_xbz_b:
7572 case Intrinsic::loongarch_lasx_xbz_h:
7573 case Intrinsic::loongarch_lasx_xbz_w:
7574 case Intrinsic::loongarch_lasx_xbz_d:
7575 if (!Subtarget.is64Bit())
7576 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
7577 N->getOperand(1));
7578 break;
7579 case Intrinsic::loongarch_lsx_bz_v:
7580 case Intrinsic::loongarch_lasx_xbz_v:
7581 if (!Subtarget.is64Bit())
7582 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
7583 N->getOperand(1));
7584 break;
7585 case Intrinsic::loongarch_lsx_bnz_b:
7586 case Intrinsic::loongarch_lsx_bnz_h:
7587 case Intrinsic::loongarch_lsx_bnz_w:
7588 case Intrinsic::loongarch_lsx_bnz_d:
7589 case Intrinsic::loongarch_lasx_xbnz_b:
7590 case Intrinsic::loongarch_lasx_xbnz_h:
7591 case Intrinsic::loongarch_lasx_xbnz_w:
7592 case Intrinsic::loongarch_lasx_xbnz_d:
7593 if (!Subtarget.is64Bit())
7594 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
7595 N->getOperand(1));
7596 break;
7597 case Intrinsic::loongarch_lsx_bnz_v:
7598 case Intrinsic::loongarch_lasx_xbnz_v:
7599 if (!Subtarget.is64Bit())
7600 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
7601 N->getOperand(1));
7602 break;
7603 case Intrinsic::loongarch_lasx_concat_128_s:
7604 case Intrinsic::loongarch_lasx_concat_128_d:
7605 case Intrinsic::loongarch_lasx_concat_128:
7606 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
7607 N->getOperand(1), N->getOperand(2));
7608 }
7609 return SDValue();
7610}
7611
7614 const LoongArchSubtarget &Subtarget) {
7615 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
7616 // conversion is unnecessary and can be replaced with the
7617 // MOVFR2GR_S_LA64 operand.
7618 SDValue Op0 = N->getOperand(0);
7619 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
7620 return Op0.getOperand(0);
7621 return SDValue();
7622}
7623
7626 const LoongArchSubtarget &Subtarget) {
7627 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
7628 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
7629 // operand.
7630 SDValue Op0 = N->getOperand(0);
7631 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
7632 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
7633 "Unexpected value type!");
7634 return Op0.getOperand(0);
7635 }
7636 return SDValue();
7637}
7638
7641 const LoongArchSubtarget &Subtarget) {
7642 MVT VT = N->getSimpleValueType(0);
7643 unsigned NumBits = VT.getScalarSizeInBits();
7644
7645 // Simplify the inputs.
7646 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7647 APInt DemandedMask(APInt::getAllOnes(NumBits));
7648 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
7649 return SDValue(N, 0);
7650
7651 return SDValue();
7652}
7653
7654static SDValue
7657 const LoongArchSubtarget &Subtarget) {
7658 SDValue Op0 = N->getOperand(0);
7659 SDLoc DL(N);
7660
7661 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
7662 // redundant. Instead, use BuildPairF64's operands directly.
7663 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
7664 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7665
7666 if (Op0->isUndef()) {
7667 SDValue Lo = DAG.getUNDEF(MVT::i32);
7668 SDValue Hi = DAG.getUNDEF(MVT::i32);
7669 return DCI.CombineTo(N, Lo, Hi);
7670 }
7671
7672 // It's cheaper to materialise two 32-bit integers than to load a double
7673 // from the constant pool and transfer it to integer registers through the
7674 // stack.
7676 APInt V = C->getValueAPF().bitcastToAPInt();
7677 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7678 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7679 return DCI.CombineTo(N, Lo, Hi);
7680 }
7681
7682 return SDValue();
7683}
7684
7685/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
7688 const LoongArchSubtarget &Subtarget) {
7689 SDValue N0 = N->getOperand(0);
7690 SDValue N1 = N->getOperand(1);
7691 MVT VT = N->getSimpleValueType(0);
7692 SDLoc DL(N);
7693
7694 // VANDN(undef, x) -> 0
7695 // VANDN(x, undef) -> 0
7696 if (N0.isUndef() || N1.isUndef())
7697 return DAG.getConstant(0, DL, VT);
7698
7699 // VANDN(0, x) -> x
7701 return N1;
7702
7703 // VANDN(x, 0) -> 0
7705 return DAG.getConstant(0, DL, VT);
7706
7707 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
7709 return DAG.getNOT(DL, N0, VT);
7710
7711 // Turn VANDN back to AND if input is inverted.
7712 if (SDValue Not = isNOT(N0, DAG))
7713 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
7714
7715 // Folds for better commutativity:
7716 if (N1->hasOneUse()) {
7717 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
7718 if (SDValue Not = isNOT(N1, DAG))
7719 return DAG.getNOT(
7720 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
7721
7722 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
7723 // -> NOT(OR(x, SplatVector(-Imm))
7724 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
7725 // gain benefits.
7726 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
7727 N1.getOpcode() == ISD::BUILD_VECTOR) {
7728 if (SDValue SplatValue =
7729 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
7730 if (!N1->isOnlyUserOf(SplatValue.getNode()))
7731 return SDValue();
7732
7733 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
7734 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
7735 SDValue Not =
7736 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
7737 return DAG.getNOT(
7738 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
7739 VT);
7740 }
7741 }
7742 }
7743 }
7744
7745 return SDValue();
7746}
7747
7750 const LoongArchSubtarget &Subtarget) {
7751 SDLoc DL(N);
7752 EVT VT = N->getValueType(0);
7753
7754 if (VT != MVT::f32 && VT != MVT::f64)
7755 return SDValue();
7756 if (VT == MVT::f32 && !Subtarget.hasBasicF())
7757 return SDValue();
7758 if (VT == MVT::f64 && !Subtarget.hasBasicD())
7759 return SDValue();
7760
7761 // Only optimize when the source and destination types have the same width.
7762 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
7763 return SDValue();
7764
7765 SDValue Src = N->getOperand(0);
7766 // If the result of an integer load is only used by an integer-to-float
7767 // conversion, use a fp load instead. This eliminates an integer-to-float-move
7768 // (movgr2fr) instruction.
7769 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
7770 // Do not change the width of a volatile load. This condition check is
7771 // inspired by AArch64.
7772 !cast<LoadSDNode>(Src)->isVolatile()) {
7773 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
7774 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
7775 LN0->getPointerInfo(), LN0->getAlign(),
7776 LN0->getMemOperand()->getFlags());
7777
7778 // Make sure successors of the original load stay after it by updating them
7779 // to use the new Chain.
7780 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
7781 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
7782 }
7783
7784 return SDValue();
7785}
7786
7787// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
7788// logical operations, like in the example below.
7789// or (and (truncate x, truncate y)),
7790// (xor (truncate z, build_vector (constants)))
7791// Given a target type \p VT, we generate
7792// or (and x, y), (xor z, zext(build_vector (constants)))
7793// given x, y and z are of type \p VT. We can do so, if operands are either
7794// truncates from VT types, the second operand is a vector of constants, can
7795// be recursively promoted or is an existing extension we can extend further.
7797 SelectionDAG &DAG,
7798 const LoongArchSubtarget &Subtarget,
7799 unsigned Depth) {
7800 // Limit recursion to avoid excessive compile times.
7802 return SDValue();
7803
7804 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
7805 return SDValue();
7806
7807 SDValue N0 = N.getOperand(0);
7808 SDValue N1 = N.getOperand(1);
7809
7810 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7811 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
7812 return SDValue();
7813
7814 if (SDValue NN0 =
7815 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
7816 N0 = NN0;
7817 else {
7818 // The left side has to be a 'trunc'.
7819 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
7820 N0.getOperand(0).getValueType() == VT;
7821 if (LHSTrunc)
7822 N0 = N0.getOperand(0);
7823 else
7824 return SDValue();
7825 }
7826
7827 if (SDValue NN1 =
7828 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
7829 N1 = NN1;
7830 else {
7831 // The right side has to be a 'trunc', a (foldable) constant or an
7832 // existing extension we can extend further.
7833 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
7834 N1.getOperand(0).getValueType() == VT;
7835 if (RHSTrunc)
7836 N1 = N1.getOperand(0);
7837 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
7838 Subtarget.hasExtLASX() && N1.hasOneUse())
7839 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
7840 // On 32-bit platform, i64 is an illegal integer scalar type, and
7841 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
7842 // future.
7843 else if (SDValue Cst =
7845 N1 = Cst;
7846 else
7847 return SDValue();
7848 }
7849
7850 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
7851}
7852
7853// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
7854// is LSX-sized register. In most cases we actually compare or select LASX-sized
7855// registers and mixing the two types creates horrible code. This method
7856// optimizes some of the transition sequences.
7858 SelectionDAG &DAG,
7859 const LoongArchSubtarget &Subtarget) {
7860 EVT VT = N.getValueType();
7861 assert(VT.isVector() && "Expected vector type");
7862 assert((N.getOpcode() == ISD::ANY_EXTEND ||
7863 N.getOpcode() == ISD::ZERO_EXTEND ||
7864 N.getOpcode() == ISD::SIGN_EXTEND) &&
7865 "Invalid Node");
7866
7867 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
7868 return SDValue();
7869
7870 SDValue Narrow = N.getOperand(0);
7871 EVT NarrowVT = Narrow.getValueType();
7872
7873 // Generate the wide operation.
7874 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
7875 if (!Op)
7876 return SDValue();
7877 switch (N.getOpcode()) {
7878 default:
7879 llvm_unreachable("Unexpected opcode");
7880 case ISD::ANY_EXTEND:
7881 return Op;
7882 case ISD::ZERO_EXTEND:
7883 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
7884 case ISD::SIGN_EXTEND:
7885 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7886 DAG.getValueType(NarrowVT));
7887 }
7888}
7889
7892 const LoongArchSubtarget &Subtarget) {
7893 EVT VT = N->getValueType(0);
7894 SDLoc DL(N);
7895
7896 if (VT.isVector())
7897 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
7898 return R;
7899
7900 return SDValue();
7901}
7902
7903static SDValue
7906 const LoongArchSubtarget &Subtarget) {
7907 SDLoc DL(N);
7908 EVT VT = N->getValueType(0);
7909
7910 if (VT.isVector() && N->getNumOperands() == 2)
7911 if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
7912 return R;
7913
7914 return SDValue();
7915}
7916
7919 const LoongArchSubtarget &Subtarget) {
7920 if (DCI.isBeforeLegalizeOps())
7921 return SDValue();
7922
7923 EVT VT = N->getValueType(0);
7924 if (!VT.isVector())
7925 return SDValue();
7926
7927 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7928 return SDValue();
7929
7930 EVT EltVT = VT.getVectorElementType();
7931 if (!EltVT.isInteger())
7932 return SDValue();
7933
7934 SDValue Cond = N->getOperand(0);
7935 SDValue TrueVal = N->getOperand(1);
7936 SDValue FalseVal = N->getOperand(2);
7937
7938 // match:
7939 //
7940 // vselect (setcc shift, 0, seteq),
7941 // x,
7942 // rounded_shift
7943
7944 if (Cond.getOpcode() != ISD::SETCC)
7945 return SDValue();
7946
7947 if (!ISD::isConstantSplatVectorAllZeros(Cond.getOperand(1).getNode()))
7948 return SDValue();
7949
7950 auto *CC = cast<CondCodeSDNode>(Cond.getOperand(2));
7951 if (CC->get() != ISD::SETEQ)
7952 return SDValue();
7953
7954 SDValue Shift = Cond.getOperand(0);
7955
7956 // True branch must be original value:
7957 //
7958 // vselect cond, x, ...
7959
7960 SDValue X = TrueVal;
7961
7962 // Now match rounded shift pattern:
7963 //
7964 // add
7965 // (and
7966 // (srl X, shift-1)
7967 // 1)
7968 // (srl/sra X, shift)
7969
7970 if (FalseVal.getOpcode() != ISD::ADD)
7971 return SDValue();
7972
7973 SDValue Add0 = FalseVal.getOperand(0);
7974 SDValue Add1 = FalseVal.getOperand(1);
7975 SDValue And;
7976 SDValue Shr;
7977
7978 if (Add0.getOpcode() == ISD::AND) {
7979 And = Add0;
7980 Shr = Add1;
7981 } else if (Add1.getOpcode() == ISD::AND) {
7982 And = Add1;
7983 Shr = Add0;
7984 } else {
7985 return SDValue();
7986 }
7987
7988 // match:
7989 //
7990 // srl/sra X, shift
7991
7992 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
7993 return SDValue();
7994
7995 if (Shr.getOperand(0) != X)
7996 return SDValue();
7997
7998 if (Shr.getOperand(1) != Shift)
7999 return SDValue();
8000
8001 // match:
8002 //
8003 // and
8004 // (srl X, shift-1)
8005 // 1
8006
8007 SDValue Srl = And.getOperand(0);
8008 SDValue One = And.getOperand(1);
8009 APInt SplatVal;
8010
8011 if (Srl.getOpcode() != ISD::SRL)
8012 return SDValue();
8013
8014 One = peekThroughBitcasts(One);
8015 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
8016 return SDValue();
8017
8018 if (SplatVal != 1)
8019 return SDValue();
8020
8021 if (Srl.getOperand(0) != X)
8022 return SDValue();
8023
8024 // match:
8025 //
8026 // shift-1
8027
8028 SDValue ShiftMinus1 = Srl.getOperand(1);
8029
8030 if (ShiftMinus1.getOpcode() != ISD::ADD)
8031 return SDValue();
8032
8033 if (ShiftMinus1.getOperand(0) != Shift)
8034 return SDValue();
8035
8037 return SDValue();
8038
8039 // We matched a rounded right shift pattern and can lower it
8040 // to a single vector rounded shift instruction.
8041
8042 SDLoc DL(N);
8043 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
8044 : LoongArchISD::VSRAR,
8045 DL, VT, X, Shift);
8046}
8047
8049 DAGCombinerInfo &DCI) const {
8050 SelectionDAG &DAG = DCI.DAG;
8051 switch (N->getOpcode()) {
8052 default:
8053 break;
8054 case ISD::ADD:
8055 return performADDCombine(N, DAG, DCI, Subtarget);
8056 case ISD::AND:
8057 return performANDCombine(N, DAG, DCI, Subtarget);
8058 case ISD::OR:
8059 return performORCombine(N, DAG, DCI, Subtarget);
8060 case ISD::SETCC:
8061 return performSETCCCombine(N, DAG, DCI, Subtarget);
8062 case ISD::SRL:
8063 return performSRLCombine(N, DAG, DCI, Subtarget);
8064 case ISD::BITCAST:
8065 return performBITCASTCombine(N, DAG, DCI, Subtarget);
8066 case ISD::ANY_EXTEND:
8067 case ISD::ZERO_EXTEND:
8068 case ISD::SIGN_EXTEND:
8069 return performEXTENDCombine(N, DAG, DCI, Subtarget);
8070 case ISD::SINT_TO_FP:
8071 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
8072 case LoongArchISD::BITREV_W:
8073 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
8074 case LoongArchISD::BR_CC:
8075 return performBR_CCCombine(N, DAG, DCI, Subtarget);
8076 case LoongArchISD::SELECT_CC:
8077 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
8079 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
8080 case LoongArchISD::MOVGR2FR_W_LA64:
8081 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
8082 case LoongArchISD::MOVFR2GR_S_LA64:
8083 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
8084 case LoongArchISD::VMSKLTZ:
8085 case LoongArchISD::XVMSKLTZ:
8086 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
8087 case LoongArchISD::SPLIT_PAIR_F64:
8088 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
8089 case LoongArchISD::VANDN:
8090 return performVANDNCombine(N, DAG, DCI, Subtarget);
8092 return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
8093 case ISD::VSELECT:
8094 return performVSELECTCombine(N, DAG, DCI, Subtarget);
8095 case LoongArchISD::VPACKEV:
8096 case LoongArchISD::VPERMI:
8097 if (SDValue Result =
8098 combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
8099 return Result;
8100 }
8101 return SDValue();
8102}
8103
8106 if (!ZeroDivCheck)
8107 return MBB;
8108
8109 // Build instructions:
8110 // MBB:
8111 // div(or mod) $dst, $dividend, $divisor
8112 // bne $divisor, $zero, SinkMBB
8113 // BreakMBB:
8114 // break 7 // BRK_DIVZERO
8115 // SinkMBB:
8116 // fallthrough
8117 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
8118 MachineFunction::iterator It = ++MBB->getIterator();
8119 MachineFunction *MF = MBB->getParent();
8120 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8121 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8122 MF->insert(It, BreakMBB);
8123 MF->insert(It, SinkMBB);
8124
8125 // Transfer the remainder of MBB and its successor edges to SinkMBB.
8126 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
8127 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8128
8129 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
8130 DebugLoc DL = MI.getDebugLoc();
8131 MachineOperand &Divisor = MI.getOperand(2);
8132 Register DivisorReg = Divisor.getReg();
8133
8134 // MBB:
8135 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
8136 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
8137 .addReg(LoongArch::R0)
8138 .addMBB(SinkMBB);
8139 MBB->addSuccessor(BreakMBB);
8140 MBB->addSuccessor(SinkMBB);
8141
8142 // BreakMBB:
8143 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
8144 // definition of BRK_DIVZERO.
8145 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
8146 BreakMBB->addSuccessor(SinkMBB);
8147
8148 // Clear Divisor's kill flag.
8149 Divisor.setIsKill(false);
8150
8151 return SinkMBB;
8152}
8153
8154static MachineBasicBlock *
8156 const LoongArchSubtarget &Subtarget) {
8157 unsigned CondOpc;
8158 switch (MI.getOpcode()) {
8159 default:
8160 llvm_unreachable("Unexpected opcode");
8161 case LoongArch::PseudoVBZ:
8162 CondOpc = LoongArch::VSETEQZ_V;
8163 break;
8164 case LoongArch::PseudoVBZ_B:
8165 CondOpc = LoongArch::VSETANYEQZ_B;
8166 break;
8167 case LoongArch::PseudoVBZ_H:
8168 CondOpc = LoongArch::VSETANYEQZ_H;
8169 break;
8170 case LoongArch::PseudoVBZ_W:
8171 CondOpc = LoongArch::VSETANYEQZ_W;
8172 break;
8173 case LoongArch::PseudoVBZ_D:
8174 CondOpc = LoongArch::VSETANYEQZ_D;
8175 break;
8176 case LoongArch::PseudoVBNZ:
8177 CondOpc = LoongArch::VSETNEZ_V;
8178 break;
8179 case LoongArch::PseudoVBNZ_B:
8180 CondOpc = LoongArch::VSETALLNEZ_B;
8181 break;
8182 case LoongArch::PseudoVBNZ_H:
8183 CondOpc = LoongArch::VSETALLNEZ_H;
8184 break;
8185 case LoongArch::PseudoVBNZ_W:
8186 CondOpc = LoongArch::VSETALLNEZ_W;
8187 break;
8188 case LoongArch::PseudoVBNZ_D:
8189 CondOpc = LoongArch::VSETALLNEZ_D;
8190 break;
8191 case LoongArch::PseudoXVBZ:
8192 CondOpc = LoongArch::XVSETEQZ_V;
8193 break;
8194 case LoongArch::PseudoXVBZ_B:
8195 CondOpc = LoongArch::XVSETANYEQZ_B;
8196 break;
8197 case LoongArch::PseudoXVBZ_H:
8198 CondOpc = LoongArch::XVSETANYEQZ_H;
8199 break;
8200 case LoongArch::PseudoXVBZ_W:
8201 CondOpc = LoongArch::XVSETANYEQZ_W;
8202 break;
8203 case LoongArch::PseudoXVBZ_D:
8204 CondOpc = LoongArch::XVSETANYEQZ_D;
8205 break;
8206 case LoongArch::PseudoXVBNZ:
8207 CondOpc = LoongArch::XVSETNEZ_V;
8208 break;
8209 case LoongArch::PseudoXVBNZ_B:
8210 CondOpc = LoongArch::XVSETALLNEZ_B;
8211 break;
8212 case LoongArch::PseudoXVBNZ_H:
8213 CondOpc = LoongArch::XVSETALLNEZ_H;
8214 break;
8215 case LoongArch::PseudoXVBNZ_W:
8216 CondOpc = LoongArch::XVSETALLNEZ_W;
8217 break;
8218 case LoongArch::PseudoXVBNZ_D:
8219 CondOpc = LoongArch::XVSETALLNEZ_D;
8220 break;
8221 }
8222
8223 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8224 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8225 DebugLoc DL = MI.getDebugLoc();
8228
8229 MachineFunction *F = BB->getParent();
8230 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
8231 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
8232 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
8233
8234 F->insert(It, FalseBB);
8235 F->insert(It, TrueBB);
8236 F->insert(It, SinkBB);
8237
8238 // Transfer the remainder of MBB and its successor edges to Sink.
8239 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
8241
8242 // Insert the real instruction to BB.
8243 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
8244 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
8245
8246 // Insert branch.
8247 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
8248 BB->addSuccessor(FalseBB);
8249 BB->addSuccessor(TrueBB);
8250
8251 // FalseBB.
8252 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8253 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
8254 .addReg(LoongArch::R0)
8255 .addImm(0);
8256 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
8257 FalseBB->addSuccessor(SinkBB);
8258
8259 // TrueBB.
8260 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8261 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
8262 .addReg(LoongArch::R0)
8263 .addImm(1);
8264 TrueBB->addSuccessor(SinkBB);
8265
8266 // SinkBB: merge the results.
8267 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
8268 MI.getOperand(0).getReg())
8269 .addReg(RD1)
8270 .addMBB(FalseBB)
8271 .addReg(RD2)
8272 .addMBB(TrueBB);
8273
8274 // The pseudo instruction is gone now.
8275 MI.eraseFromParent();
8276 return SinkBB;
8277}
8278
8279static MachineBasicBlock *
8281 const LoongArchSubtarget &Subtarget) {
8282 unsigned InsOp;
8283 unsigned BroadcastOp;
8284 unsigned HalfSize;
8285 switch (MI.getOpcode()) {
8286 default:
8287 llvm_unreachable("Unexpected opcode");
8288 case LoongArch::PseudoXVINSGR2VR_B:
8289 HalfSize = 16;
8290 BroadcastOp = LoongArch::XVREPLGR2VR_B;
8291 InsOp = LoongArch::XVEXTRINS_B;
8292 break;
8293 case LoongArch::PseudoXVINSGR2VR_H:
8294 HalfSize = 8;
8295 BroadcastOp = LoongArch::XVREPLGR2VR_H;
8296 InsOp = LoongArch::XVEXTRINS_H;
8297 break;
8298 }
8299 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8300 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
8301 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
8302 DebugLoc DL = MI.getDebugLoc();
8304 // XDst = vector_insert XSrc, Elt, Idx
8305 Register XDst = MI.getOperand(0).getReg();
8306 Register XSrc = MI.getOperand(1).getReg();
8307 Register Elt = MI.getOperand(2).getReg();
8308 unsigned Idx = MI.getOperand(3).getImm();
8309
8310 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
8311 Idx < HalfSize) {
8312 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
8313 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
8314
8315 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
8316 .addReg(XSrc, {}, LoongArch::sub_128);
8317 BuildMI(*BB, MI, DL,
8318 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
8319 : LoongArch::VINSGR2VR_B),
8320 ScratchSubReg2)
8321 .addReg(ScratchSubReg1)
8322 .addReg(Elt)
8323 .addImm(Idx);
8324
8325 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
8326 .addReg(ScratchSubReg2)
8327 .addImm(LoongArch::sub_128);
8328 } else {
8329 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8330 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8331
8332 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
8333
8334 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
8335 .addReg(ScratchReg1)
8336 .addReg(XSrc)
8337 .addImm(Idx >= HalfSize ? 48 : 18);
8338
8339 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
8340 .addReg(XSrc)
8341 .addReg(ScratchReg2)
8342 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
8343 }
8344
8345 MI.eraseFromParent();
8346 return BB;
8347}
8348
8351 const LoongArchSubtarget &Subtarget) {
8352 assert(Subtarget.hasExtLSX());
8353 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8354 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8355 DebugLoc DL = MI.getDebugLoc();
8357 Register Dst = MI.getOperand(0).getReg();
8358 Register Src = MI.getOperand(1).getReg();
8359
8360 unsigned BroadcastOp, CTOp, PickOp;
8361 switch (MI.getOpcode()) {
8362 default:
8363 llvm_unreachable("Unexpected opcode");
8364 case LoongArch::PseudoCTPOP_B:
8365 BroadcastOp = LoongArch::VREPLGR2VR_B;
8366 CTOp = LoongArch::VPCNT_B;
8367 PickOp = LoongArch::VPICKVE2GR_B;
8368 break;
8369 case LoongArch::PseudoCTPOP_H:
8370 case LoongArch::PseudoCTPOP_H_LA32:
8371 BroadcastOp = LoongArch::VREPLGR2VR_H;
8372 CTOp = LoongArch::VPCNT_H;
8373 PickOp = LoongArch::VPICKVE2GR_H;
8374 break;
8375 case LoongArch::PseudoCTPOP_W:
8376 case LoongArch::PseudoCTPOP_W_LA32:
8377 BroadcastOp = LoongArch::VREPLGR2VR_W;
8378 CTOp = LoongArch::VPCNT_W;
8379 PickOp = LoongArch::VPICKVE2GR_W;
8380 break;
8381 case LoongArch::PseudoCTPOP_D:
8382 BroadcastOp = LoongArch::VREPLGR2VR_D;
8383 CTOp = LoongArch::VPCNT_D;
8384 PickOp = LoongArch::VPICKVE2GR_D;
8385 break;
8386 }
8387
8388 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8389 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8390 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Src);
8391 BuildMI(*BB, MI, DL, TII->get(CTOp), ScratchReg2).addReg(ScratchReg1);
8392 BuildMI(*BB, MI, DL, TII->get(PickOp), Dst).addReg(ScratchReg2).addImm(0);
8393
8394 MI.eraseFromParent();
8395 return BB;
8396}
8397
8398static MachineBasicBlock *
8400 const LoongArchSubtarget &Subtarget) {
8401 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8402 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8403 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8405 Register Dst = MI.getOperand(0).getReg();
8406 Register Src = MI.getOperand(1).getReg();
8407 DebugLoc DL = MI.getDebugLoc();
8408 unsigned EleBits = 8;
8409 unsigned NotOpc = 0;
8410 unsigned MskOpc;
8411
8412 switch (MI.getOpcode()) {
8413 default:
8414 llvm_unreachable("Unexpected opcode");
8415 case LoongArch::PseudoVMSKLTZ_B:
8416 MskOpc = LoongArch::VMSKLTZ_B;
8417 break;
8418 case LoongArch::PseudoVMSKLTZ_H:
8419 MskOpc = LoongArch::VMSKLTZ_H;
8420 EleBits = 16;
8421 break;
8422 case LoongArch::PseudoVMSKLTZ_W:
8423 MskOpc = LoongArch::VMSKLTZ_W;
8424 EleBits = 32;
8425 break;
8426 case LoongArch::PseudoVMSKLTZ_D:
8427 MskOpc = LoongArch::VMSKLTZ_D;
8428 EleBits = 64;
8429 break;
8430 case LoongArch::PseudoVMSKGEZ_B:
8431 MskOpc = LoongArch::VMSKGEZ_B;
8432 break;
8433 case LoongArch::PseudoVMSKEQZ_B:
8434 MskOpc = LoongArch::VMSKNZ_B;
8435 NotOpc = LoongArch::VNOR_V;
8436 break;
8437 case LoongArch::PseudoVMSKNEZ_B:
8438 MskOpc = LoongArch::VMSKNZ_B;
8439 break;
8440 case LoongArch::PseudoXVMSKLTZ_B:
8441 MskOpc = LoongArch::XVMSKLTZ_B;
8442 RC = &LoongArch::LASX256RegClass;
8443 break;
8444 case LoongArch::PseudoXVMSKLTZ_H:
8445 MskOpc = LoongArch::XVMSKLTZ_H;
8446 RC = &LoongArch::LASX256RegClass;
8447 EleBits = 16;
8448 break;
8449 case LoongArch::PseudoXVMSKLTZ_W:
8450 MskOpc = LoongArch::XVMSKLTZ_W;
8451 RC = &LoongArch::LASX256RegClass;
8452 EleBits = 32;
8453 break;
8454 case LoongArch::PseudoXVMSKLTZ_D:
8455 MskOpc = LoongArch::XVMSKLTZ_D;
8456 RC = &LoongArch::LASX256RegClass;
8457 EleBits = 64;
8458 break;
8459 case LoongArch::PseudoXVMSKGEZ_B:
8460 MskOpc = LoongArch::XVMSKGEZ_B;
8461 RC = &LoongArch::LASX256RegClass;
8462 break;
8463 case LoongArch::PseudoXVMSKEQZ_B:
8464 MskOpc = LoongArch::XVMSKNZ_B;
8465 NotOpc = LoongArch::XVNOR_V;
8466 RC = &LoongArch::LASX256RegClass;
8467 break;
8468 case LoongArch::PseudoXVMSKNEZ_B:
8469 MskOpc = LoongArch::XVMSKNZ_B;
8470 RC = &LoongArch::LASX256RegClass;
8471 break;
8472 }
8473
8474 Register Msk = MRI.createVirtualRegister(RC);
8475 if (NotOpc) {
8476 Register Tmp = MRI.createVirtualRegister(RC);
8477 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
8478 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
8479 .addReg(Tmp, RegState::Kill)
8480 .addReg(Tmp, RegState::Kill);
8481 } else {
8482 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
8483 }
8484
8485 if (TRI->getRegSizeInBits(*RC) > 128) {
8486 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8487 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8488 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
8489 .addReg(Msk)
8490 .addImm(0);
8491 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
8492 .addReg(Msk, RegState::Kill)
8493 .addImm(4);
8494 BuildMI(*BB, MI, DL,
8495 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
8496 : LoongArch::BSTRINS_W),
8497 Dst)
8500 .addImm(256 / EleBits - 1)
8501 .addImm(128 / EleBits);
8502 } else {
8503 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
8504 .addReg(Msk, RegState::Kill)
8505 .addImm(0);
8506 }
8507
8508 MI.eraseFromParent();
8509 return BB;
8510}
8511
8512static MachineBasicBlock *
8514 const LoongArchSubtarget &Subtarget) {
8515 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
8516 "Unexpected instruction");
8517
8518 MachineFunction &MF = *BB->getParent();
8519 DebugLoc DL = MI.getDebugLoc();
8521 Register LoReg = MI.getOperand(0).getReg();
8522 Register HiReg = MI.getOperand(1).getReg();
8523 Register SrcReg = MI.getOperand(2).getReg();
8524
8525 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
8526 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
8527 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
8528 MI.eraseFromParent(); // The pseudo instruction is gone now.
8529 return BB;
8530}
8531
8532static MachineBasicBlock *
8534 const LoongArchSubtarget &Subtarget) {
8535 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
8536 "Unexpected instruction");
8537
8538 MachineFunction &MF = *BB->getParent();
8539 DebugLoc DL = MI.getDebugLoc();
8542 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
8543 Register DstReg = MI.getOperand(0).getReg();
8544 Register LoReg = MI.getOperand(1).getReg();
8545 Register HiReg = MI.getOperand(2).getReg();
8546
8547 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
8548 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
8549 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
8550 .addReg(TmpReg, RegState::Kill)
8551 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
8552 MI.eraseFromParent(); // The pseudo instruction is gone now.
8553 return BB;
8554}
8555
8557 switch (MI.getOpcode()) {
8558 default:
8559 return false;
8560 case LoongArch::Select_GPR_Using_CC_GPR:
8561 return true;
8562 }
8563}
8564
8565static MachineBasicBlock *
8567 const LoongArchSubtarget &Subtarget) {
8568 // To "insert" Select_* instructions, we actually have to insert the triangle
8569 // control-flow pattern. The incoming instructions know the destination vreg
8570 // to set, the condition code register to branch on, the true/false values to
8571 // select between, and the condcode to use to select the appropriate branch.
8572 //
8573 // We produce the following control flow:
8574 // HeadMBB
8575 // | \
8576 // | IfFalseMBB
8577 // | /
8578 // TailMBB
8579 //
8580 // When we find a sequence of selects we attempt to optimize their emission
8581 // by sharing the control flow. Currently we only handle cases where we have
8582 // multiple selects with the exact same condition (same LHS, RHS and CC).
8583 // The selects may be interleaved with other instructions if the other
8584 // instructions meet some requirements we deem safe:
8585 // - They are not pseudo instructions.
8586 // - They are debug instructions. Otherwise,
8587 // - They do not have side-effects, do not access memory and their inputs do
8588 // not depend on the results of the select pseudo-instructions.
8589 // The TrueV/FalseV operands of the selects cannot depend on the result of
8590 // previous selects in the sequence.
8591 // These conditions could be further relaxed. See the X86 target for a
8592 // related approach and more information.
8593
8594 Register LHS = MI.getOperand(1).getReg();
8595 Register RHS;
8596 if (MI.getOperand(2).isReg())
8597 RHS = MI.getOperand(2).getReg();
8598 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
8599
8600 SmallVector<MachineInstr *, 4> SelectDebugValues;
8601 SmallSet<Register, 4> SelectDests;
8602 SelectDests.insert(MI.getOperand(0).getReg());
8603
8604 MachineInstr *LastSelectPseudo = &MI;
8605 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
8606 SequenceMBBI != E; ++SequenceMBBI) {
8607 if (SequenceMBBI->isDebugInstr())
8608 continue;
8609 if (isSelectPseudo(*SequenceMBBI)) {
8610 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
8611 !SequenceMBBI->getOperand(2).isReg() ||
8612 SequenceMBBI->getOperand(2).getReg() != RHS ||
8613 SequenceMBBI->getOperand(3).getImm() != CC ||
8614 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
8615 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
8616 break;
8617 LastSelectPseudo = &*SequenceMBBI;
8618 SequenceMBBI->collectDebugValues(SelectDebugValues);
8619 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
8620 continue;
8621 }
8622 if (SequenceMBBI->hasUnmodeledSideEffects() ||
8623 SequenceMBBI->mayLoadOrStore() ||
8624 SequenceMBBI->usesCustomInsertionHook())
8625 break;
8626 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
8627 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
8628 }))
8629 break;
8630 }
8631
8632 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
8633 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8634 DebugLoc DL = MI.getDebugLoc();
8636
8637 MachineBasicBlock *HeadMBB = BB;
8638 MachineFunction *F = BB->getParent();
8639 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
8640 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
8641
8642 F->insert(I, IfFalseMBB);
8643 F->insert(I, TailMBB);
8644
8645 // Set the call frame size on entry to the new basic blocks.
8646 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
8647 IfFalseMBB->setCallFrameSize(CallFrameSize);
8648 TailMBB->setCallFrameSize(CallFrameSize);
8649
8650 // Transfer debug instructions associated with the selects to TailMBB.
8651 for (MachineInstr *DebugInstr : SelectDebugValues) {
8652 TailMBB->push_back(DebugInstr->removeFromParent());
8653 }
8654
8655 // Move all instructions after the sequence to TailMBB.
8656 TailMBB->splice(TailMBB->end(), HeadMBB,
8657 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
8658 // Update machine-CFG edges by transferring all successors of the current
8659 // block to the new block which will contain the Phi nodes for the selects.
8660 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
8661 // Set the successors for HeadMBB.
8662 HeadMBB->addSuccessor(IfFalseMBB);
8663 HeadMBB->addSuccessor(TailMBB);
8664
8665 // Insert appropriate branch.
8666 if (MI.getOperand(2).isImm())
8667 BuildMI(HeadMBB, DL, TII.get(CC))
8668 .addReg(LHS)
8669 .addImm(MI.getOperand(2).getImm())
8670 .addMBB(TailMBB);
8671 else
8672 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
8673
8674 // IfFalseMBB just falls through to TailMBB.
8675 IfFalseMBB->addSuccessor(TailMBB);
8676
8677 // Create PHIs for all of the select pseudo-instructions.
8678 auto SelectMBBI = MI.getIterator();
8679 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
8680 auto InsertionPoint = TailMBB->begin();
8681 while (SelectMBBI != SelectEnd) {
8682 auto Next = std::next(SelectMBBI);
8683 if (isSelectPseudo(*SelectMBBI)) {
8684 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
8685 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
8686 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
8687 .addReg(SelectMBBI->getOperand(4).getReg())
8688 .addMBB(HeadMBB)
8689 .addReg(SelectMBBI->getOperand(5).getReg())
8690 .addMBB(IfFalseMBB);
8691 SelectMBBI->eraseFromParent();
8692 }
8693 SelectMBBI = Next;
8694 }
8695
8696 F->getProperties().resetNoPHIs();
8697 return TailMBB;
8698}
8699
8700MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
8701 MachineInstr &MI, MachineBasicBlock *BB) const {
8702 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8703 DebugLoc DL = MI.getDebugLoc();
8704
8705 switch (MI.getOpcode()) {
8706 default:
8707 llvm_unreachable("Unexpected instr type to insert");
8708 case LoongArch::DIV_W:
8709 case LoongArch::DIV_WU:
8710 case LoongArch::MOD_W:
8711 case LoongArch::MOD_WU:
8712 case LoongArch::DIV_D:
8713 case LoongArch::DIV_DU:
8714 case LoongArch::MOD_D:
8715 case LoongArch::MOD_DU:
8716 return insertDivByZeroTrap(MI, BB);
8717 break;
8718 case LoongArch::WRFCSR: {
8719 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
8720 LoongArch::FCSR0 + MI.getOperand(0).getImm())
8721 .addReg(MI.getOperand(1).getReg());
8722 MI.eraseFromParent();
8723 return BB;
8724 }
8725 case LoongArch::RDFCSR: {
8726 MachineInstr *ReadFCSR =
8727 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
8728 MI.getOperand(0).getReg())
8729 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
8730 ReadFCSR->getOperand(1).setIsUndef();
8731 MI.eraseFromParent();
8732 return BB;
8733 }
8734 case LoongArch::Select_GPR_Using_CC_GPR:
8735 return emitSelectPseudo(MI, BB, Subtarget);
8736 case LoongArch::BuildPairF64Pseudo:
8737 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
8738 case LoongArch::SplitPairF64Pseudo:
8739 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
8740 case LoongArch::PseudoVBZ:
8741 case LoongArch::PseudoVBZ_B:
8742 case LoongArch::PseudoVBZ_H:
8743 case LoongArch::PseudoVBZ_W:
8744 case LoongArch::PseudoVBZ_D:
8745 case LoongArch::PseudoVBNZ:
8746 case LoongArch::PseudoVBNZ_B:
8747 case LoongArch::PseudoVBNZ_H:
8748 case LoongArch::PseudoVBNZ_W:
8749 case LoongArch::PseudoVBNZ_D:
8750 case LoongArch::PseudoXVBZ:
8751 case LoongArch::PseudoXVBZ_B:
8752 case LoongArch::PseudoXVBZ_H:
8753 case LoongArch::PseudoXVBZ_W:
8754 case LoongArch::PseudoXVBZ_D:
8755 case LoongArch::PseudoXVBNZ:
8756 case LoongArch::PseudoXVBNZ_B:
8757 case LoongArch::PseudoXVBNZ_H:
8758 case LoongArch::PseudoXVBNZ_W:
8759 case LoongArch::PseudoXVBNZ_D:
8760 return emitVecCondBranchPseudo(MI, BB, Subtarget);
8761 case LoongArch::PseudoXVINSGR2VR_B:
8762 case LoongArch::PseudoXVINSGR2VR_H:
8763 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
8764 case LoongArch::PseudoCTPOP_B:
8765 case LoongArch::PseudoCTPOP_H:
8766 case LoongArch::PseudoCTPOP_W:
8767 case LoongArch::PseudoCTPOP_D:
8768 case LoongArch::PseudoCTPOP_H_LA32:
8769 case LoongArch::PseudoCTPOP_W_LA32:
8770 return emitPseudoCTPOP(MI, BB, Subtarget);
8771 case LoongArch::PseudoVMSKLTZ_B:
8772 case LoongArch::PseudoVMSKLTZ_H:
8773 case LoongArch::PseudoVMSKLTZ_W:
8774 case LoongArch::PseudoVMSKLTZ_D:
8775 case LoongArch::PseudoVMSKGEZ_B:
8776 case LoongArch::PseudoVMSKEQZ_B:
8777 case LoongArch::PseudoVMSKNEZ_B:
8778 case LoongArch::PseudoXVMSKLTZ_B:
8779 case LoongArch::PseudoXVMSKLTZ_H:
8780 case LoongArch::PseudoXVMSKLTZ_W:
8781 case LoongArch::PseudoXVMSKLTZ_D:
8782 case LoongArch::PseudoXVMSKGEZ_B:
8783 case LoongArch::PseudoXVMSKEQZ_B:
8784 case LoongArch::PseudoXVMSKNEZ_B:
8785 return emitPseudoVMSKCOND(MI, BB, Subtarget);
8786 case TargetOpcode::STATEPOINT:
8787 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
8788 // while bl call instruction (where statepoint will be lowered at the
8789 // end) has implicit def. This def is early-clobber as it will be set at
8790 // the moment of the call and earlier than any use is read.
8791 // Add this implicit dead def here as a workaround.
8792 MI.addOperand(*MI.getMF(),
8794 LoongArch::R1, /*isDef*/ true,
8795 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
8796 /*isUndef*/ false, /*isEarlyClobber*/ true));
8797 if (!Subtarget.is64Bit())
8798 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
8799 return emitPatchPoint(MI, BB);
8800 case LoongArch::PROBED_STACKALLOC_DYN:
8801 return emitDynamicProbedAlloc(MI, BB);
8802 }
8803}
8804
8806 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
8807 unsigned *Fast) const {
8808 if (!Subtarget.hasUAL())
8809 return false;
8810
8811 // TODO: set reasonable speed number.
8812 if (Fast)
8813 *Fast = 1;
8814 return true;
8815}
8816
8817//===----------------------------------------------------------------------===//
8818// Calling Convention Implementation
8819//===----------------------------------------------------------------------===//
8820
8821// Eight general-purpose registers a0-a7 used for passing integer arguments,
8822// with a0-a1 reused to return values. Generally, the GPRs are used to pass
8823// fixed-point arguments, and floating-point arguments when no FPR is available
8824// or with soft float ABI.
8825const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
8826 LoongArch::R7, LoongArch::R8, LoongArch::R9,
8827 LoongArch::R10, LoongArch::R11};
8828
8829// PreserveNone calling convention:
8830// Arguments may be passed in any general-purpose registers except:
8831// - R1 : return address register
8832// - R22 : frame pointer
8833// - R31 : base pointer
8834//
8835// All general-purpose registers are treated as caller-saved,
8836// except R1 (RA) and R22 (FP).
8837//
8838// Non-volatile registers are allocated first so that a function
8839// can call normal functions without having to spill and reload
8840// argument registers.
8842 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
8843 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
8844 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
8845 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
8846 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
8847 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
8848 LoongArch::R20};
8849
8850// Eight floating-point registers fa0-fa7 used for passing floating-point
8851// arguments, and fa0-fa1 are also used to return values.
8852const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
8853 LoongArch::F3, LoongArch::F4, LoongArch::F5,
8854 LoongArch::F6, LoongArch::F7};
8855// FPR32 and FPR64 alias each other.
8857 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
8858 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
8859
8860const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
8861 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
8862 LoongArch::VR6, LoongArch::VR7};
8863
8864const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
8865 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
8866 LoongArch::XR6, LoongArch::XR7};
8867
8869 switch (State.getCallingConv()) {
8871 if (!State.isVarArg())
8872 return State.AllocateReg(PreserveNoneArgGPRs);
8873 [[fallthrough]];
8874 default:
8875 return State.AllocateReg(ArgGPRs);
8876 }
8877}
8878
8879// Pass a 2*GRLen argument that has been split into two GRLen values through
8880// registers or the stack as necessary.
8881static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
8882 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
8883 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
8884 ISD::ArgFlagsTy ArgFlags2) {
8885 unsigned GRLenInBytes = GRLen / 8;
8886 if (Register Reg = allocateArgGPR(State)) {
8887 // At least one half can be passed via register.
8888 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
8889 VA1.getLocVT(), CCValAssign::Full));
8890 } else {
8891 // Both halves must be passed on the stack, with proper alignment.
8892 Align StackAlign =
8893 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
8894 State.addLoc(
8896 State.AllocateStack(GRLenInBytes, StackAlign),
8897 VA1.getLocVT(), CCValAssign::Full));
8898 State.addLoc(CCValAssign::getMem(
8899 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8900 LocVT2, CCValAssign::Full));
8901 return false;
8902 }
8903 if (Register Reg = allocateArgGPR(State)) {
8904 // The second half can also be passed via register.
8905 State.addLoc(
8906 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
8907 } else {
8908 // The second half is passed via the stack, without additional alignment.
8909 State.addLoc(CCValAssign::getMem(
8910 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8911 LocVT2, CCValAssign::Full));
8912 }
8913 return false;
8914}
8915
8916// Implements the LoongArch calling convention. Returns true upon failure.
8918 unsigned ValNo, MVT ValVT,
8919 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
8920 CCState &State, bool IsRet, Type *OrigTy) {
8921 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
8922 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
8923 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
8924 MVT LocVT = ValVT;
8925
8926 // Any return value split into more than two values can't be returned
8927 // directly.
8928 if (IsRet && ValNo > 1)
8929 return true;
8930
8931 // If passing a variadic argument, or if no FPR is available.
8932 bool UseGPRForFloat = true;
8933
8934 switch (ABI) {
8935 default:
8936 llvm_unreachable("Unexpected ABI");
8937 break;
8942 UseGPRForFloat = ArgFlags.isVarArg();
8943 break;
8946 break;
8947 }
8948
8949 // If this is a variadic argument, the LoongArch calling convention requires
8950 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
8951 // byte alignment. An aligned register should be used regardless of whether
8952 // the original argument was split during legalisation or not. The argument
8953 // will not be passed by registers if the original type is larger than
8954 // 2*GRLen, so the register alignment rule does not apply.
8955 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
8956 if (ArgFlags.isVarArg() &&
8957 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
8958 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
8959 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
8960 // Skip 'odd' register if necessary.
8961 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
8962 State.AllocateReg(ArgGPRs);
8963 }
8964
8965 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8966 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8967 State.getPendingArgFlags();
8968
8969 assert(PendingLocs.size() == PendingArgFlags.size() &&
8970 "PendingLocs and PendingArgFlags out of sync");
8971
8972 // FPR32 and FPR64 alias each other.
8973 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
8974 UseGPRForFloat = true;
8975
8976 if (UseGPRForFloat && ValVT == MVT::f32) {
8977 LocVT = GRLenVT;
8978 LocInfo = CCValAssign::BCvt;
8979 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
8980 LocVT = MVT::i64;
8981 LocInfo = CCValAssign::BCvt;
8982 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
8983 // Handle passing f64 on LA32D with a soft float ABI or when floating point
8984 // registers are exhausted.
8985 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
8986 // Depending on available argument GPRS, f64 may be passed in a pair of
8987 // GPRs, split between a GPR and the stack, or passed completely on the
8988 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8989 // cases.
8990 MCRegister Reg = allocateArgGPR(State);
8991 if (!Reg) {
8992 int64_t StackOffset = State.AllocateStack(8, Align(8));
8993 State.addLoc(
8994 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8995 return false;
8996 }
8997 LocVT = MVT::i32;
8998 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8999 MCRegister HiReg = allocateArgGPR(State);
9000 if (HiReg) {
9001 State.addLoc(
9002 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
9003 } else {
9004 int64_t StackOffset = State.AllocateStack(4, Align(4));
9005 State.addLoc(
9006 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9007 }
9008 return false;
9009 }
9010
9011 // Split arguments might be passed indirectly, so keep track of the pending
9012 // values.
9013 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
9014 LocVT = GRLenVT;
9015 LocInfo = CCValAssign::Indirect;
9016 PendingLocs.push_back(
9017 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
9018 PendingArgFlags.push_back(ArgFlags);
9019 if (!ArgFlags.isSplitEnd()) {
9020 return false;
9021 }
9022 }
9023
9024 // If the split argument only had two elements, it should be passed directly
9025 // in registers or on the stack.
9026 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
9027 PendingLocs.size() <= 2) {
9028 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
9029 // Apply the normal calling convention rules to the first half of the
9030 // split argument.
9031 CCValAssign VA = PendingLocs[0];
9032 ISD::ArgFlagsTy AF = PendingArgFlags[0];
9033 PendingLocs.clear();
9034 PendingArgFlags.clear();
9035 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
9036 ArgFlags);
9037 }
9038
9039 // Allocate to a register if possible, or else a stack slot.
9040 Register Reg;
9041 unsigned StoreSizeBytes = GRLen / 8;
9042 Align StackAlign = Align(GRLen / 8);
9043
9044 if (ValVT == MVT::f32 && !UseGPRForFloat) {
9045 Reg = State.AllocateReg(ArgFPR32s);
9046 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
9047 Reg = State.AllocateReg(ArgFPR64s);
9048 } else if (ValVT.is128BitVector()) {
9049 Reg = State.AllocateReg(ArgVRs);
9050 UseGPRForFloat = false;
9051 StoreSizeBytes = 16;
9052 StackAlign = Align(16);
9053 } else if (ValVT.is256BitVector()) {
9054 Reg = State.AllocateReg(ArgXRs);
9055 UseGPRForFloat = false;
9056 StoreSizeBytes = 32;
9057 StackAlign = Align(32);
9058 } else {
9059 Reg = allocateArgGPR(State);
9060 }
9061
9062 unsigned StackOffset =
9063 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
9064
9065 // If we reach this point and PendingLocs is non-empty, we must be at the
9066 // end of a split argument that must be passed indirectly.
9067 if (!PendingLocs.empty()) {
9068 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
9069 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
9070 for (auto &It : PendingLocs) {
9071 if (Reg)
9072 It.convertToReg(Reg);
9073 else
9074 It.convertToMem(StackOffset);
9075 State.addLoc(It);
9076 }
9077 PendingLocs.clear();
9078 PendingArgFlags.clear();
9079 return false;
9080 }
9081 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
9082 "Expected an GRLenVT at this stage");
9083
9084 if (Reg) {
9085 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9086 return false;
9087 }
9088
9089 // When a floating-point value is passed on the stack, no bit-cast is needed.
9090 if (ValVT.isFloatingPoint()) {
9091 LocVT = ValVT;
9092 LocInfo = CCValAssign::Full;
9093 }
9094
9095 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9096 return false;
9097}
9098
9099void LoongArchTargetLowering::analyzeInputArgs(
9100 MachineFunction &MF, CCState &CCInfo,
9101 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
9102 LoongArchCCAssignFn Fn) const {
9103 FunctionType *FType = MF.getFunction().getFunctionType();
9104 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
9105 MVT ArgVT = Ins[i].VT;
9106 Type *ArgTy = nullptr;
9107 if (IsRet)
9108 ArgTy = FType->getReturnType();
9109 else if (Ins[i].isOrigArg())
9110 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
9112 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9113 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
9114 CCInfo, IsRet, ArgTy)) {
9115 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
9116 << '\n');
9117 llvm_unreachable("");
9118 }
9119 }
9120}
9121
9122void LoongArchTargetLowering::analyzeOutputArgs(
9123 MachineFunction &MF, CCState &CCInfo,
9124 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
9125 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
9126 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9127 MVT ArgVT = Outs[i].VT;
9128 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
9130 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9131 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
9132 CCInfo, IsRet, OrigTy)) {
9133 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
9134 << "\n");
9135 llvm_unreachable("");
9136 }
9137 }
9138}
9139
9140// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
9141// values.
9143 const CCValAssign &VA, const SDLoc &DL) {
9144 switch (VA.getLocInfo()) {
9145 default:
9146 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9147 case CCValAssign::Full:
9149 break;
9150 case CCValAssign::BCvt:
9151 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9152 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
9153 else
9154 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
9155 break;
9156 }
9157 return Val;
9158}
9159
9161 const CCValAssign &VA, const SDLoc &DL,
9162 const ISD::InputArg &In,
9163 const LoongArchTargetLowering &TLI) {
9166 EVT LocVT = VA.getLocVT();
9167 SDValue Val;
9168 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
9169 Register VReg = RegInfo.createVirtualRegister(RC);
9170 RegInfo.addLiveIn(VA.getLocReg(), VReg);
9171 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
9172
9173 // If input is sign extended from 32 bits, note it for the OptW pass.
9174 if (In.isOrigArg()) {
9175 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
9176 if (OrigArg->getType()->isIntegerTy()) {
9177 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
9178 // An input zero extended from i31 can also be considered sign extended.
9179 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
9180 (BitWidth < 32 && In.Flags.isZExt())) {
9183 LAFI->addSExt32Register(VReg);
9184 }
9185 }
9186 }
9187
9188 return convertLocVTToValVT(DAG, Val, VA, DL);
9189}
9190
9191// The caller is responsible for loading the full value if the argument is
9192// passed with CCValAssign::Indirect.
9194 const CCValAssign &VA, const SDLoc &DL) {
9196 MachineFrameInfo &MFI = MF.getFrameInfo();
9197 EVT ValVT = VA.getValVT();
9198 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
9199 /*IsImmutable=*/true);
9200 SDValue FIN = DAG.getFrameIndex(
9202
9203 ISD::LoadExtType ExtType;
9204 switch (VA.getLocInfo()) {
9205 default:
9206 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9207 case CCValAssign::Full:
9209 case CCValAssign::BCvt:
9210 ExtType = ISD::NON_EXTLOAD;
9211 break;
9212 }
9213 return DAG.getExtLoad(
9214 ExtType, DL, VA.getLocVT(), Chain, FIN,
9216}
9217
9219 const CCValAssign &VA,
9220 const CCValAssign &HiVA,
9221 const SDLoc &DL) {
9222 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9223 "Unexpected VA");
9225 MachineFrameInfo &MFI = MF.getFrameInfo();
9227
9228 assert(VA.isRegLoc() && "Expected register VA assignment");
9229
9230 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9231 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
9232 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
9233 SDValue Hi;
9234 if (HiVA.isMemLoc()) {
9235 // Second half of f64 is passed on the stack.
9236 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
9237 /*IsImmutable=*/true);
9238 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9239 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
9241 } else {
9242 // Second half of f64 is passed in another GPR.
9243 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9244 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
9245 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
9246 }
9247 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
9248}
9249
9251 const CCValAssign &VA, const SDLoc &DL) {
9252 EVT LocVT = VA.getLocVT();
9253
9254 switch (VA.getLocInfo()) {
9255 default:
9256 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9257 case CCValAssign::Full:
9258 break;
9259 case CCValAssign::BCvt:
9260 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9261 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
9262 else
9263 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
9264 break;
9265 }
9266 return Val;
9267}
9268
9269static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9270 CCValAssign::LocInfo LocInfo,
9271 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
9272 CCState &State) {
9273 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9274 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
9275 // s0 s1 s2 s3 s4 s5 s6 s7 s8
9276 static const MCPhysReg GPRList[] = {
9277 LoongArch::R23, LoongArch::R24, LoongArch::R25,
9278 LoongArch::R26, LoongArch::R27, LoongArch::R28,
9279 LoongArch::R29, LoongArch::R30, LoongArch::R31};
9280 if (MCRegister Reg = State.AllocateReg(GPRList)) {
9281 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9282 return false;
9283 }
9284 }
9285
9286 if (LocVT == MVT::f32) {
9287 // Pass in STG registers: F1, F2, F3, F4
9288 // fs0,fs1,fs2,fs3
9289 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
9290 LoongArch::F26, LoongArch::F27};
9291 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
9292 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9293 return false;
9294 }
9295 }
9296
9297 if (LocVT == MVT::f64) {
9298 // Pass in STG registers: D1, D2, D3, D4
9299 // fs4,fs5,fs6,fs7
9300 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
9301 LoongArch::F30_64, LoongArch::F31_64};
9302 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
9303 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9304 return false;
9305 }
9306 }
9307
9308 report_fatal_error("No registers left in GHC calling convention");
9309 return true;
9310}
9311
9312// Transform physical registers into virtual registers.
9314 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9315 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9316 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9317
9319
9320 switch (CallConv) {
9321 default:
9322 llvm_unreachable("Unsupported calling convention");
9323 case CallingConv::C:
9324 case CallingConv::Fast:
9327 break;
9328 case CallingConv::GHC:
9329 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
9330 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
9332 "GHC calling convention requires the F and D extensions");
9333 }
9334
9335 const Function &Func = MF.getFunction();
9336 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9337 MVT GRLenVT = Subtarget.getGRLenVT();
9338 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
9339
9340 // Check if this function has any musttail calls. If so, incoming indirect
9341 // arg pointers must be saved in virtual registers so they survive across
9342 // basic blocks (the SelectionDAG is cleared between BBs). Only do this
9343 // when needed to avoid adding register pressure to non-musttail functions.
9344 bool HasMusttail = llvm::any_of(Func, [](const BasicBlock &BB) {
9345 return llvm::any_of(BB, [](const Instruction &I) {
9346 if (const auto *CI = dyn_cast<CallInst>(&I))
9347 return CI->isMustTailCall();
9348 return false;
9349 });
9350 });
9351 // Used with varargs to acumulate store chains.
9352 std::vector<SDValue> OutChains;
9353
9354 // Assign locations to all of the incoming arguments.
9356 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9357
9358 if (CallConv == CallingConv::GHC)
9360 else
9361 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
9362
9363 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
9364 CCValAssign &VA = ArgLocs[i];
9365 SDValue ArgValue;
9366 // Passing f64 on LA32D with a soft float ABI must be handled as a special
9367 // case.
9368 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9369 assert(VA.needsCustom());
9370 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
9371 } else if (VA.isRegLoc())
9372 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
9373 else
9374 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9375 if (VA.getLocInfo() == CCValAssign::Indirect) {
9376 // If the original argument was split and passed by reference, we need to
9377 // load all parts of it here (using the same address).
9378 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9380 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
9381 if (HasMusttail) {
9384 Register VReg =
9385 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
9386 Chain = DAG.getCopyToReg(Chain, DL, VReg, ArgValue);
9387 LAFI->setIncomingIndirectArg(ArgIndex, VReg);
9388 }
9389 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
9390 assert(ArgPartOffset == 0);
9391 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
9392 CCValAssign &PartVA = ArgLocs[i + 1];
9393 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
9394 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9395 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9396 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9398 ++i;
9399 ++InsIdx;
9400 }
9401 continue;
9402 }
9403 InVals.push_back(ArgValue);
9404 }
9405
9406 if (IsVarArg) {
9408 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9409 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
9410 MachineFrameInfo &MFI = MF.getFrameInfo();
9411 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9412 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
9413
9414 // Offset of the first variable argument from stack pointer, and size of
9415 // the vararg save area. For now, the varargs save area is either zero or
9416 // large enough to hold a0-a7.
9417 int VaArgOffset, VarArgsSaveSize;
9418
9419 // If all registers are allocated, then all varargs must be passed on the
9420 // stack and we don't need to save any argregs.
9421 if (ArgRegs.size() == Idx) {
9422 VaArgOffset = CCInfo.getStackSize();
9423 VarArgsSaveSize = 0;
9424 } else {
9425 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
9426 VaArgOffset = -VarArgsSaveSize;
9427 }
9428
9429 // Record the frame index of the first variable argument
9430 // which is a value necessary to VASTART.
9431 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9432 LoongArchFI->setVarArgsFrameIndex(FI);
9433
9434 // If saving an odd number of registers then create an extra stack slot to
9435 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
9436 // offsets to even-numbered registered remain 2*GRLen-aligned.
9437 if (Idx % 2) {
9438 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
9439 true);
9440 VarArgsSaveSize += GRLenInBytes;
9441 }
9442
9443 // Copy the integer registers that may have been used for passing varargs
9444 // to the vararg save area.
9445 for (unsigned I = Idx; I < ArgRegs.size();
9446 ++I, VaArgOffset += GRLenInBytes) {
9447 const Register Reg = RegInfo.createVirtualRegister(RC);
9448 RegInfo.addLiveIn(ArgRegs[I], Reg);
9449 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
9450 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9451 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9452 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9454 cast<StoreSDNode>(Store.getNode())
9455 ->getMemOperand()
9456 ->setValue((Value *)nullptr);
9457 OutChains.push_back(Store);
9458 }
9459 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
9460 }
9461
9462 // All stores are grouped in one node to allow the matching between
9463 // the size of Ins and InVals. This only happens for vararg functions.
9464 if (!OutChains.empty()) {
9465 OutChains.push_back(Chain);
9466 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9467 }
9468
9469 return Chain;
9470}
9471
9473 return CI->isTailCall();
9474}
9475
9476// Check if the return value is used as only a return value, as otherwise
9477// we can't perform a tail-call.
9479 SDValue &Chain) const {
9480 if (N->getNumValues() != 1)
9481 return false;
9482 if (!N->hasNUsesOfValue(1, 0))
9483 return false;
9484
9485 SDNode *Copy = *N->user_begin();
9486 if (Copy->getOpcode() != ISD::CopyToReg)
9487 return false;
9488
9489 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
9490 // isn't safe to perform a tail call.
9491 if (Copy->getGluedNode())
9492 return false;
9493
9494 // The copy must be used by a LoongArchISD::RET, and nothing else.
9495 bool HasRet = false;
9496 for (SDNode *Node : Copy->users()) {
9497 if (Node->getOpcode() != LoongArchISD::RET)
9498 return false;
9499 HasRet = true;
9500 }
9501
9502 if (!HasRet)
9503 return false;
9504
9505 Chain = Copy->getOperand(0);
9506 return true;
9507}
9508
9509// Check whether the call is eligible for tail call optimization.
9510bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
9511 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9512 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
9513
9514 auto CalleeCC = CLI.CallConv;
9515 auto &Outs = CLI.Outs;
9516 auto &Caller = MF.getFunction();
9517 auto CallerCC = Caller.getCallingConv();
9518
9519 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
9520
9521 // Byval parameters hand the function a pointer directly into the stack area
9522 // we want to reuse during a tail call. Working around this *is* possible
9523 // but less efficient and uglier in LowerCall. For musttail, there is no
9524 // workaround today: a byval arg requires a local copy that becomes invalid
9525 // after the tail call deallocates the caller's frame, so rejecting here
9526 // (and triggering reportFatalInternalError in LowerCall) is safer than
9527 // miscompiling.
9528 for (auto &Arg : Outs)
9529 if (Arg.Flags.isByVal())
9530 return false;
9531
9532 // musttail bypasses the remaining checks: the checks either reject cases
9533 // we handle specially (indirect args are forwarded via incoming pointers,
9534 // stack-passed args reuse the matching incoming layout, sret is forwarded
9535 // like any other pointer arg) or are optimizations not applicable to
9536 // mandatory tail calls.
9537 if (IsMustTail)
9538 return true;
9539
9540 // Do not tail call opt if the stack is used to pass parameters.
9541 if (CCInfo.getStackSize() != 0)
9542 return false;
9543
9544 // Do not tail call opt if any parameters need to be passed indirectly.
9545 for (auto &VA : ArgLocs)
9546 if (VA.getLocInfo() == CCValAssign::Indirect)
9547 return false;
9548
9549 // Do not tail call opt if either caller or callee uses struct return
9550 // semantics.
9551 auto IsCallerStructRet = Caller.hasStructRetAttr();
9552 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
9553 if (IsCallerStructRet || IsCalleeStructRet)
9554 return false;
9555
9556 // The callee has to preserve all registers the caller needs to preserve.
9557 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
9558 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
9559 if (CalleeCC != CallerCC) {
9560 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
9561 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9562 return false;
9563 }
9564 return true;
9565}
9566
9568 return DAG.getDataLayout().getPrefTypeAlign(
9569 VT.getTypeForEVT(*DAG.getContext()));
9570}
9571
9572// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
9573// and output parameter nodes.
9574SDValue
9576 SmallVectorImpl<SDValue> &InVals) const {
9577 SelectionDAG &DAG = CLI.DAG;
9578 SDLoc &DL = CLI.DL;
9580 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
9582 SDValue Chain = CLI.Chain;
9583 SDValue Callee = CLI.Callee;
9584 CallingConv::ID CallConv = CLI.CallConv;
9585 bool IsVarArg = CLI.IsVarArg;
9586 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9587 MVT GRLenVT = Subtarget.getGRLenVT();
9588 bool &IsTailCall = CLI.IsTailCall;
9589
9591
9592 // Analyze the operands of the call, assigning locations to each operand.
9594 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9595
9596 if (CallConv == CallingConv::GHC)
9597 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
9598 else
9599 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
9600
9601 // Check if it's really possible to do a tail call.
9602 if (IsTailCall)
9603 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
9604
9605 if (IsTailCall)
9606 ++NumTailCalls;
9607 else if (CLI.CB && CLI.CB->isMustTailCall())
9608 report_fatal_error("failed to perform tail call elimination on a call "
9609 "site marked musttail");
9610
9611 // Get a count of how many bytes are to be pushed on the stack.
9612 unsigned NumBytes = ArgCCInfo.getStackSize();
9613
9614 // Create local copies for byval args.
9615 SmallVector<SDValue> ByValArgs;
9616 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9617 ISD::ArgFlagsTy Flags = Outs[i].Flags;
9618 if (!Flags.isByVal())
9619 continue;
9620
9621 SDValue Arg = OutVals[i];
9622 unsigned Size = Flags.getByValSize();
9623 Align Alignment = Flags.getNonZeroByValAlign();
9624
9625 int FI =
9626 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
9627 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9628 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
9629
9630 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
9631 /*IsVolatile=*/false,
9632 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
9634 ByValArgs.push_back(FIPtr);
9635 }
9636
9637 if (!IsTailCall)
9638 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
9639
9640 // Copy argument values to their designated locations.
9642 SmallVector<SDValue> MemOpChains;
9643 SDValue StackPtr;
9644 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
9645 ++i, ++OutIdx) {
9646 CCValAssign &VA = ArgLocs[i];
9647 SDValue ArgValue = OutVals[OutIdx];
9648 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
9649
9650 // Handle passing f64 on LA32D with a soft float ABI as a special case.
9651 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9652 assert(VA.isRegLoc() && "Expected register VA assignment");
9653 assert(VA.needsCustom());
9654 SDValue SplitF64 =
9655 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
9656 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
9657 SDValue Lo = SplitF64.getValue(0);
9658 SDValue Hi = SplitF64.getValue(1);
9659
9660 Register RegLo = VA.getLocReg();
9661 RegsToPass.push_back(std::make_pair(RegLo, Lo));
9662
9663 // Get the CCValAssign for the Hi part.
9664 CCValAssign &HiVA = ArgLocs[++i];
9665
9666 if (HiVA.isMemLoc()) {
9667 // Second half of f64 is passed on the stack.
9668 if (!StackPtr.getNode())
9669 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
9671 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9672 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
9673 // Emit the store.
9674 MemOpChains.push_back(DAG.getStore(
9675 Chain, DL, Hi, Address,
9677 } else {
9678 // Second half of f64 is passed in another GPR.
9679 Register RegHigh = HiVA.getLocReg();
9680 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
9681 }
9682 continue;
9683 }
9684
9685 // Promote the value if needed.
9686 // For now, only handle fully promoted and indirect arguments.
9687 if (VA.getLocInfo() == CCValAssign::Indirect) {
9688 // For musttail calls, reuse incoming indirect pointers instead of
9689 // creating new stack temporaries. The incoming pointers point to the
9690 // caller's caller's frame, which remains valid after a tail call.
9691 if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
9694 unsigned CallArgIdx = Outs[OutIdx].OrigArgIndex;
9695
9696 // Resolve which formal parameter is being passed at this call
9697 // position.
9698 //
9699 // FIXME: Ins[].OrigArgIndex is Argument::getArgNo() (unfiltered),
9700 // but Outs[].OrigArgIndex is an index into a filtered arg list
9701 // (empty types removed, via CallLoweringInfo in the target-
9702 // independent layer). IncomingIndirectArgs is keyed by the
9703 // caller's unfiltered Argument::getArgNo(), so we have to walk
9704 // the caller's formals (same filter) to translate the index.
9705 // This target-independent asymmetry should be normalized so
9706 // backends do not need to re-derive the mapping.
9707 //
9708 // Steps:
9709 // 1. Find the call operand at filtered position CallArgIdx.
9710 // 2. If it is an Argument, use getArgNo() directly (same filter
9711 // for caller formals and call operands).
9712 // 3. Otherwise (computed value), walk the caller's formals and
9713 // skip empty types to map the filtered index to getArgNo().
9714 const Argument *FormalArg = nullptr;
9715 unsigned FilteredIdx = 0;
9716 for (const auto &CallArg : CLI.CB->args()) {
9717 if (CallArg->getType()->isEmptyTy())
9718 continue;
9719 if (FilteredIdx == CallArgIdx) {
9720 FormalArg = dyn_cast<Argument>(CallArg);
9721 break;
9722 }
9723 ++FilteredIdx;
9724 }
9725
9726 // For forwarded args, getArgNo() gives the unfiltered index directly.
9727 // For computed args, walk the caller's formals to resolve it.
9728 unsigned FormalArgIdx = CallArgIdx;
9729 if (FormalArg) {
9730 FormalArgIdx = FormalArg->getArgNo();
9731 } else {
9732 FilteredIdx = 0;
9733 for (const auto &Arg : MF.getFunction().args()) {
9734 if (Arg.getType()->isEmptyTy())
9735 continue;
9736 if (FilteredIdx == CallArgIdx) {
9737 FormalArgIdx = Arg.getArgNo();
9738 break;
9739 }
9740 ++FilteredIdx;
9741 }
9742 }
9743
9744 Register VReg = LAFI->getIncomingIndirectArg(FormalArgIdx);
9745 SDValue CopyOp = DAG.getCopyFromReg(Chain, DL, VReg, PtrVT);
9746 // Thread the CopyFromReg output chain through MemOpChains so the
9747 // TokenFactor below sequences the copy with any stores we emit
9748 // for this argument.
9749 MemOpChains.push_back(CopyOp.getValue(1));
9750 SDValue IncomingPtr = CopyOp;
9751
9752 if (!FormalArg) {
9753 // Computed value: store into the incoming indirect pointer for the
9754 // same-position formal parameter (musttail guarantees matching
9755 // prototypes, so types match). The pointer survives the tail call
9756 // since it points to the caller's caller's frame.
9757 //
9758 // The data-flow edge through IncomingPtr already prevents the
9759 // store from being scheduled before the CopyFromReg. Threading
9760 // CopyOp.getValue(1) (the copy's output chain) into the store
9761 // makes that ordering explicit on the chain edge as well, which
9762 // is the convention for memory ops chaining off their producers.
9763 MemOpChains.push_back(
9764 DAG.getStore(CopyOp.getValue(1), DL, ArgValue, IncomingPtr,
9766 // Store any split parts at their respective offsets.
9767 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
9768 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
9769 SDValue PartValue = OutVals[OutIdx + 1];
9770 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
9771 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9772 SDValue Addr =
9773 DAG.getNode(ISD::ADD, DL, PtrVT, IncomingPtr, Offset);
9774 MemOpChains.push_back(
9775 DAG.getStore(CopyOp.getValue(1), DL, PartValue, Addr,
9777 ++i;
9778 ++OutIdx;
9779 }
9780 }
9781 ArgValue = IncomingPtr;
9782
9783 // Skip any remaining split parts (for forwarded args, they are
9784 // covered by the forwarded pointer).
9785 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
9786 ++i;
9787 ++OutIdx;
9788 }
9789 } else {
9790 // Store the argument in a stack slot and pass its address.
9791 Align StackAlign =
9792 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
9793 getPrefTypeAlign(ArgValue.getValueType(), DAG));
9794 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
9795 // If the original argument was split and passed by reference, we need
9796 // to store the required parts of it here (and pass just one address).
9797 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
9798 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
9799 assert(ArgPartOffset == 0);
9800 // Calculate the total size to store. We don't have access to what we're
9801 // actually storing other than performing the loop and collecting the
9802 // info.
9804 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
9805 SDValue PartValue = OutVals[OutIdx + 1];
9806 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
9807 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9808 EVT PartVT = PartValue.getValueType();
9809 StoredSize += PartVT.getStoreSize();
9810 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
9811 Parts.push_back(std::make_pair(PartValue, Offset));
9812 ++i;
9813 ++OutIdx;
9814 }
9815 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
9816 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
9817 MemOpChains.push_back(
9818 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
9820 for (const auto &Part : Parts) {
9821 SDValue PartValue = Part.first;
9822 SDValue PartOffset = Part.second;
9824 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
9825 MemOpChains.push_back(
9826 DAG.getStore(Chain, DL, PartValue, Address,
9828 }
9829 ArgValue = SpillSlot;
9830 }
9831 } else {
9832 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
9833 }
9834
9835 // Use local copy if it is a byval arg.
9836 if (Flags.isByVal())
9837 ArgValue = ByValArgs[j++];
9838
9839 if (VA.isRegLoc()) {
9840 // Queue up the argument copies and emit them at the end.
9841 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
9842 } else {
9843 assert(VA.isMemLoc() && "Argument not register or memory");
9844 assert((!IsTailCall || (CLI.CB && CLI.CB->isMustTailCall())) &&
9845 "Tail call not allowed if stack is used for passing parameters");
9846
9847 // Work out the address of the stack slot.
9848 if (!StackPtr.getNode())
9849 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
9851 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9853
9854 // Emit the store.
9855 MemOpChains.push_back(
9856 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
9857 }
9858 }
9859
9860 // Join the stores, which are independent of one another.
9861 if (!MemOpChains.empty())
9862 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
9863
9864 SDValue Glue;
9865
9866 // Build a sequence of copy-to-reg nodes, chained and glued together.
9867 for (auto &Reg : RegsToPass) {
9868 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
9869 Glue = Chain.getValue(1);
9870 }
9871
9872 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
9873 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
9874 // split it and then direct call can be matched by PseudoCALL_SMALL.
9876 const GlobalValue *GV = S->getGlobal();
9877 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
9880 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
9881 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
9882 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
9885 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
9886 }
9887
9888 // The first call operand is the chain and the second is the target address.
9890 Ops.push_back(Chain);
9891 Ops.push_back(Callee);
9892
9893 // Add argument registers to the end of the list so that they are
9894 // known live into the call.
9895 for (auto &Reg : RegsToPass)
9896 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
9897
9898 if (!IsTailCall) {
9899 // Add a register mask operand representing the call-preserved registers.
9900 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
9901 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
9902 assert(Mask && "Missing call preserved mask for calling convention");
9903 Ops.push_back(DAG.getRegisterMask(Mask));
9904 }
9905
9906 // Glue the call to the argument copies, if any.
9907 if (Glue.getNode())
9908 Ops.push_back(Glue);
9909
9910 // Emit the call.
9911 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
9912 unsigned Op;
9913 switch (DAG.getTarget().getCodeModel()) {
9914 default:
9915 report_fatal_error("Unsupported code model");
9916 case CodeModel::Small:
9917 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
9918 break;
9919 case CodeModel::Medium:
9920 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
9921 break;
9922 case CodeModel::Large:
9923 assert(Subtarget.is64Bit() && "Large code model requires LA64");
9924 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
9925 break;
9926 }
9927
9928 if (IsTailCall) {
9930 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
9931 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
9932 return Ret;
9933 }
9934
9935 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
9936 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
9937 Glue = Chain.getValue(1);
9938
9939 // Mark the end of the call, which is glued to the call itself.
9940 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
9941 Glue = Chain.getValue(1);
9942
9943 // Assign locations to each value returned by this call.
9945 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
9946 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
9947
9948 // Copy all of the result registers out of their specified physreg.
9949 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
9950 auto &VA = RVLocs[i];
9951 // Copy the value out.
9952 SDValue RetValue =
9953 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
9954 // Glue the RetValue to the end of the call sequence.
9955 Chain = RetValue.getValue(1);
9956 Glue = RetValue.getValue(2);
9957
9958 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9959 assert(VA.needsCustom());
9960 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
9961 MVT::i32, Glue);
9962 Chain = RetValue2.getValue(1);
9963 Glue = RetValue2.getValue(2);
9964 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
9965 RetValue, RetValue2);
9966 } else
9967 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
9968
9969 InVals.push_back(RetValue);
9970 }
9971
9972 return Chain;
9973}
9974
9976 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
9977 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
9978 const Type *RetTy) const {
9980 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
9981
9982 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9983 LoongArchABI::ABI ABI =
9984 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9985 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
9986 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
9987 return false;
9988 }
9989 return true;
9990}
9991
9993 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9995 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
9996 SelectionDAG &DAG) const {
9997 // Stores the assignment of the return value to a location.
9999
10000 // Info about the registers and stack slot.
10001 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
10002 *DAG.getContext());
10003
10004 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
10005 nullptr, CC_LoongArch);
10006 if (CallConv == CallingConv::GHC && !RVLocs.empty())
10007 report_fatal_error("GHC functions return void only");
10008 SDValue Glue;
10009 SmallVector<SDValue, 4> RetOps(1, Chain);
10010
10011 // Copy the result values into the output registers.
10012 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
10013 SDValue Val = OutVals[OutIdx];
10014 CCValAssign &VA = RVLocs[i];
10015 assert(VA.isRegLoc() && "Can only return in registers!");
10016
10017 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10018 // Handle returning f64 on LA32D with a soft float ABI.
10019 assert(VA.isRegLoc() && "Expected return via registers");
10020 assert(VA.needsCustom());
10021 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
10022 DAG.getVTList(MVT::i32, MVT::i32), Val);
10023 SDValue Lo = SplitF64.getValue(0);
10024 SDValue Hi = SplitF64.getValue(1);
10025 Register RegLo = VA.getLocReg();
10026 Register RegHi = RVLocs[++i].getLocReg();
10027
10028 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
10029 Glue = Chain.getValue(1);
10030 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
10031 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
10032 Glue = Chain.getValue(1);
10033 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
10034 } else {
10035 // Handle a 'normal' return.
10036 Val = convertValVTToLocVT(DAG, Val, VA, DL);
10037 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
10038
10039 // Guarantee that all emitted copies are stuck together.
10040 Glue = Chain.getValue(1);
10041 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
10042 }
10043 }
10044
10045 RetOps[0] = Chain; // Update chain.
10046
10047 // Add the glue node if we have it.
10048 if (Glue.getNode())
10049 RetOps.push_back(Glue);
10050
10051 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
10052}
10053
10054// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
10055// Note: The following prefixes are excluded:
10056// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
10057// as they can be represented using [x]vrepli.[whb]
10059 const APInt &SplatValue, const unsigned SplatBitSize) const {
10060 uint64_t RequiredImm = 0;
10061 uint64_t V = SplatValue.getZExtValue();
10062 if (SplatBitSize == 16 && !(V & 0x00FF)) {
10063 // 4'b0101
10064 RequiredImm = (0b10101 << 8) | (V >> 8);
10065 return {true, RequiredImm};
10066 } else if (SplatBitSize == 32) {
10067 // 4'b0001
10068 if (!(V & 0xFFFF00FF)) {
10069 RequiredImm = (0b10001 << 8) | (V >> 8);
10070 return {true, RequiredImm};
10071 }
10072 // 4'b0010
10073 if (!(V & 0xFF00FFFF)) {
10074 RequiredImm = (0b10010 << 8) | (V >> 16);
10075 return {true, RequiredImm};
10076 }
10077 // 4'b0011
10078 if (!(V & 0x00FFFFFF)) {
10079 RequiredImm = (0b10011 << 8) | (V >> 24);
10080 return {true, RequiredImm};
10081 }
10082 // 4'b0110
10083 if ((V & 0xFFFF00FF) == 0xFF) {
10084 RequiredImm = (0b10110 << 8) | (V >> 8);
10085 return {true, RequiredImm};
10086 }
10087 // 4'b0111
10088 if ((V & 0xFF00FFFF) == 0xFFFF) {
10089 RequiredImm = (0b10111 << 8) | (V >> 16);
10090 return {true, RequiredImm};
10091 }
10092 // 4'b1010
10093 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
10094 RequiredImm =
10095 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10096 return {true, RequiredImm};
10097 }
10098 } else if (SplatBitSize == 64) {
10099 // 4'b1011
10100 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
10101 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
10102 RequiredImm =
10103 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10104 return {true, RequiredImm};
10105 }
10106 // 4'b1100
10107 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
10108 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
10109 RequiredImm =
10110 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
10111 return {true, RequiredImm};
10112 }
10113 // 4'b1001
10114 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
10115 uint8_t res = 0;
10116 for (int i = 0; i < 8; ++i) {
10117 uint8_t byte = x & 0xFF;
10118 if (byte == 0 || byte == 0xFF)
10119 res |= ((byte & 1) << i);
10120 else
10121 return {false, 0};
10122 x >>= 8;
10123 }
10124 return {true, res};
10125 };
10126 auto [IsSame, Suffix] = sameBitsPreByte(V);
10127 if (IsSame) {
10128 RequiredImm = (0b11001 << 8) | Suffix;
10129 return {true, RequiredImm};
10130 }
10131 }
10132 return {false, RequiredImm};
10133}
10134
10136 EVT VT) const {
10137 if (!Subtarget.hasExtLSX())
10138 return false;
10139
10140 if (VT == MVT::f32) {
10141 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
10142 return (masked == 0x3e000000 || masked == 0x40000000);
10143 }
10144
10145 if (VT == MVT::f64) {
10146 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
10147 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
10148 }
10149
10150 return false;
10151}
10152
10153bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
10154 bool ForCodeSize) const {
10155 // TODO: Maybe need more checks here after vector extension is supported.
10156 if (VT == MVT::f32 && !Subtarget.hasBasicF())
10157 return false;
10158 if (VT == MVT::f64 && !Subtarget.hasBasicD())
10159 return false;
10160 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
10161}
10162
10164 return true;
10165}
10166
10168 return true;
10169}
10170
10171bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
10172 const Instruction *I) const {
10173 if (!Subtarget.is64Bit())
10174 return isa<LoadInst>(I) || isa<StoreInst>(I);
10175
10176 if (isa<LoadInst>(I))
10177 return true;
10178
10179 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
10180 // require fences beacuse we can use amswap_db.[w/d].
10181 Type *Ty = I->getOperand(0)->getType();
10182 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
10183 unsigned Size = Ty->getIntegerBitWidth();
10184 return (Size == 8 || Size == 16);
10185 }
10186
10187 return false;
10188}
10189
10191 LLVMContext &Context,
10192 EVT VT) const {
10193 if (!VT.isVector())
10194 return getPointerTy(DL);
10196}
10197
10199 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
10200 // Do not merge to float value size (128 or 256 bits) if no implicit
10201 // float attribute is set.
10202 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
10203 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
10204 if (NoFloat)
10205 return MemVT.getSizeInBits() <= MaxIntSize;
10206
10207 // Make sure we don't merge greater than our maximum supported vector width.
10208 if (Subtarget.hasExtLASX())
10209 MaxIntSize = 256;
10210 else if (Subtarget.hasExtLSX())
10211 MaxIntSize = 128;
10212
10213 return MemVT.getSizeInBits() <= MaxIntSize;
10214}
10215
10217 EVT VT = Y.getValueType();
10218
10219 if (VT.isVector())
10220 return Subtarget.hasExtLSX() && VT.isInteger();
10221
10222 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
10223}
10224
10227 MachineFunction &MF, unsigned Intrinsic) const {
10228 switch (Intrinsic) {
10229 default:
10230 return;
10231 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
10232 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
10233 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
10234 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
10235 IntrinsicInfo Info;
10237 Info.memVT = MVT::i32;
10238 Info.ptrVal = I.getArgOperand(0);
10239 Info.offset = 0;
10240 Info.align = Align(4);
10243 Infos.push_back(Info);
10244 return;
10245 // TODO: Add more Intrinsics later.
10246 }
10247 }
10248}
10249
10250// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
10251// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
10252// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
10253// regression, we need to implement it manually.
10256
10258 Op == AtomicRMWInst::And) &&
10259 "Unable to expand");
10260 unsigned MinWordSize = 4;
10261
10262 IRBuilder<> Builder(AI);
10263 LLVMContext &Ctx = Builder.getContext();
10264 const DataLayout &DL = AI->getDataLayout();
10265 Type *ValueType = AI->getType();
10266 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
10267
10268 Value *Addr = AI->getPointerOperand();
10269 PointerType *PtrTy = cast<PointerType>(Addr->getType());
10270 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
10271
10272 Value *AlignedAddr = Builder.CreateIntrinsic(
10273 Intrinsic::ptrmask, {PtrTy, IntTy},
10274 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
10275 "AlignedAddr");
10276
10277 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
10278 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
10279 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
10280 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
10281 Value *Mask = Builder.CreateShl(
10282 ConstantInt::get(WordType,
10283 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
10284 ShiftAmt, "Mask");
10285 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
10286 Value *ValOperand_Shifted =
10287 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
10288 ShiftAmt, "ValOperand_Shifted");
10289 Value *NewOperand;
10290 if (Op == AtomicRMWInst::And)
10291 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
10292 else
10293 NewOperand = ValOperand_Shifted;
10294
10295 AtomicRMWInst *NewAI =
10296 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
10297 AI->getOrdering(), AI->getSyncScopeID());
10298
10299 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
10300 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
10301 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
10302 AI->replaceAllUsesWith(FinalOldResult);
10303 AI->eraseFromParent();
10304}
10305
10308 const AtomicRMWInst *AI) const {
10309 // TODO: Add more AtomicRMWInst that needs to be extended.
10310
10311 // Since floating-point operation requires a non-trivial set of data
10312 // operations, use CmpXChg to expand.
10313 if (AI->isFloatingPointOperation() ||
10319
10320 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
10323 AI->getOperation() == AtomicRMWInst::Sub)) {
10325 }
10326
10327 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10328 if (Subtarget.hasLAMCAS()) {
10329 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
10333 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
10335 }
10336
10337 if (Size == 8 || Size == 16)
10340}
10341
10342static Intrinsic::ID
10344 AtomicRMWInst::BinOp BinOp) {
10345 if (GRLen == 64) {
10346 switch (BinOp) {
10347 default:
10348 llvm_unreachable("Unexpected AtomicRMW BinOp");
10350 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
10351 case AtomicRMWInst::Add:
10352 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
10353 case AtomicRMWInst::Sub:
10354 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
10356 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
10358 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
10360 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
10361 case AtomicRMWInst::Max:
10362 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
10363 case AtomicRMWInst::Min:
10364 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
10365 // TODO: support other AtomicRMWInst.
10366 }
10367 }
10368
10369 if (GRLen == 32) {
10370 switch (BinOp) {
10371 default:
10372 llvm_unreachable("Unexpected AtomicRMW BinOp");
10374 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
10375 case AtomicRMWInst::Add:
10376 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
10377 case AtomicRMWInst::Sub:
10378 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
10380 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
10382 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
10384 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
10385 case AtomicRMWInst::Max:
10386 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
10387 case AtomicRMWInst::Min:
10388 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
10389 // TODO: support other AtomicRMWInst.
10390 }
10391 }
10392
10393 llvm_unreachable("Unexpected GRLen\n");
10394}
10395
10398 const AtomicCmpXchgInst *CI) const {
10399
10400 if (Subtarget.hasLAMCAS())
10402
10404 if (Size == 8 || Size == 16)
10407}
10408
10410 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10411 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10412 unsigned GRLen = Subtarget.getGRLen();
10413 AtomicOrdering FailOrd = CI->getFailureOrdering();
10414 Value *FailureOrdering =
10415 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
10416 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
10417 if (GRLen == 64) {
10418 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
10419 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
10420 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
10421 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10422 }
10423 Type *Tys[] = {AlignedAddr->getType()};
10424 Value *Result = Builder.CreateIntrinsic(
10425 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
10426 if (GRLen == 64)
10427 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10428 return Result;
10429}
10430
10432 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10433 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10434 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
10435 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
10436 // mask, as this produces better code than the LL/SC loop emitted by
10437 // int_loongarch_masked_atomicrmw_xchg.
10438 if (AI->getOperation() == AtomicRMWInst::Xchg &&
10441 if (CVal->isZero())
10442 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
10443 Builder.CreateNot(Mask, "Inv_Mask"),
10444 AI->getAlign(), Ord);
10445 if (CVal->isMinusOne())
10446 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
10447 AI->getAlign(), Ord);
10448 }
10449
10450 unsigned GRLen = Subtarget.getGRLen();
10451 Value *Ordering =
10452 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
10453 Type *Tys[] = {AlignedAddr->getType()};
10455 AI->getModule(),
10457
10458 if (GRLen == 64) {
10459 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10460 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10461 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10462 }
10463
10464 Value *Result;
10465
10466 // Must pass the shift amount needed to sign extend the loaded value prior
10467 // to performing a signed comparison for min/max. ShiftAmt is the number of
10468 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
10469 // is the number of bits to left+right shift the value in order to
10470 // sign-extend.
10471 if (AI->getOperation() == AtomicRMWInst::Min ||
10473 const DataLayout &DL = AI->getDataLayout();
10474 unsigned ValWidth =
10475 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
10476 Value *SextShamt =
10477 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
10478 Result = Builder.CreateCall(LlwOpScwLoop,
10479 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10480 } else {
10481 Result =
10482 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
10483 }
10484
10485 if (GRLen == 64)
10486 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10487 return Result;
10488}
10489
10491 const MachineFunction &MF, EVT VT) const {
10492 VT = VT.getScalarType();
10493
10494 if (!VT.isSimple())
10495 return false;
10496
10497 switch (VT.getSimpleVT().SimpleTy) {
10498 case MVT::f32:
10499 case MVT::f64:
10500 return true;
10501 default:
10502 break;
10503 }
10504
10505 return false;
10506}
10507
10509 const Constant *PersonalityFn) const {
10510 return LoongArch::R4;
10511}
10512
10514 const Constant *PersonalityFn) const {
10515 return LoongArch::R5;
10516}
10517
10518//===----------------------------------------------------------------------===//
10519// Target Optimization Hooks
10520//===----------------------------------------------------------------------===//
10521
10523 const LoongArchSubtarget &Subtarget) {
10524 // Feature FRECIPE instrucions relative accuracy is 2^-14.
10525 // IEEE float has 23 digits and double has 52 digits.
10526 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
10527 return RefinementSteps;
10528}
10529
10530static bool
10532 assert(Subtarget.hasFrecipe() &&
10533 "Reciprocal estimate queried on unsupported target");
10534
10535 if (!VT.isSimple())
10536 return false;
10537
10538 switch (VT.getSimpleVT().SimpleTy) {
10539 case MVT::f32:
10540 // f32 is the base type for reciprocal estimate instructions.
10541 return true;
10542
10543 case MVT::f64:
10544 return Subtarget.hasBasicD();
10545
10546 case MVT::v4f32:
10547 case MVT::v2f64:
10548 return Subtarget.hasExtLSX();
10549
10550 case MVT::v8f32:
10551 case MVT::v4f64:
10552 return Subtarget.hasExtLASX();
10553
10554 default:
10555 return false;
10556 }
10557}
10558
10560 SelectionDAG &DAG, int Enabled,
10561 int &RefinementSteps,
10562 bool &UseOneConstNR,
10563 bool Reciprocal) const {
10565 "Enabled should never be Disabled here");
10566
10567 if (!Subtarget.hasFrecipe())
10568 return SDValue();
10569
10570 SDLoc DL(Operand);
10571 EVT VT = Operand.getValueType();
10572
10573 // Check supported types.
10574 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10575 return SDValue();
10576
10577 // Handle refinement steps.
10578 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10579 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10580
10581 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
10582 UseOneConstNR = false;
10583 SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
10584
10585 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
10586 // are needed (which rely on the reciprocal form), return the raw reciprocal
10587 // estimate.
10588 if (Reciprocal || RefinementSteps > 0)
10589 return Rsqrt;
10590
10591 // Otherwise, return sqrt(x) by multiplying with the operand.
10592 return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
10593}
10594
10596 SelectionDAG &DAG,
10597 int Enabled,
10598 int &RefinementSteps) const {
10600 "Enabled should never be Disabled here");
10601
10602 if (!Subtarget.hasFrecipe())
10603 return SDValue();
10604
10605 SDLoc DL(Operand);
10606 EVT VT = Operand.getValueType();
10607
10608 // Check supported types.
10609 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10610 return SDValue();
10611
10612 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10613 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10614
10615 // FRECIPE computes 1.0 / x.
10616 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
10617}
10618
10619//===----------------------------------------------------------------------===//
10620// LoongArch Inline Assembly Support
10621//===----------------------------------------------------------------------===//
10622
10624LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
10625 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
10626 //
10627 // 'f': A floating-point register (if available).
10628 // 'k': A memory operand whose address is formed by a base register and
10629 // (optionally scaled) index register.
10630 // 'l': A signed 16-bit constant.
10631 // 'm': A memory operand whose address is formed by a base register and
10632 // offset that is suitable for use in instructions with the same
10633 // addressing mode as st.w and ld.w.
10634 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
10635 // instruction)
10636 // 'I': A signed 12-bit constant (for arithmetic instructions).
10637 // 'J': Integer zero.
10638 // 'K': An unsigned 12-bit constant (for logic instructions).
10639 // "ZB": An address that is held in a general-purpose register. The offset is
10640 // zero.
10641 // "ZC": A memory operand whose address is formed by a base register and
10642 // offset that is suitable for use in instructions with the same
10643 // addressing mode as ll.w and sc.w.
10644 if (Constraint.size() == 1) {
10645 switch (Constraint[0]) {
10646 default:
10647 break;
10648 case 'f':
10649 case 'q':
10650 return C_RegisterClass;
10651 case 'l':
10652 case 'I':
10653 case 'J':
10654 case 'K':
10655 return C_Immediate;
10656 case 'k':
10657 return C_Memory;
10658 }
10659 }
10660
10661 if (Constraint == "ZC" || Constraint == "ZB")
10662 return C_Memory;
10663
10664 // 'm' is handled here.
10665 return TargetLowering::getConstraintType(Constraint);
10666}
10667
10668InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
10669 StringRef ConstraintCode) const {
10670 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
10674 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
10675}
10676
10677std::pair<unsigned, const TargetRegisterClass *>
10678LoongArchTargetLowering::getRegForInlineAsmConstraint(
10679 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
10680 // First, see if this is a constraint that directly corresponds to a LoongArch
10681 // register class.
10682 if (Constraint.size() == 1) {
10683 switch (Constraint[0]) {
10684 case 'r':
10685 // TODO: Support fixed vectors up to GRLen?
10686 if (VT.isVector())
10687 break;
10688 return std::make_pair(0U, &LoongArch::GPRRegClass);
10689 case 'q':
10690 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
10691 case 'f':
10692 if (Subtarget.hasBasicF() && VT == MVT::f32)
10693 return std::make_pair(0U, &LoongArch::FPR32RegClass);
10694 if (Subtarget.hasBasicD() && VT == MVT::f64)
10695 return std::make_pair(0U, &LoongArch::FPR64RegClass);
10696 if (Subtarget.hasExtLSX() &&
10697 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
10698 return std::make_pair(0U, &LoongArch::LSX128RegClass);
10699 if (Subtarget.hasExtLASX() &&
10700 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
10701 return std::make_pair(0U, &LoongArch::LASX256RegClass);
10702 break;
10703 default:
10704 break;
10705 }
10706 }
10707
10708 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
10709 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
10710 // constraints while the official register name is prefixed with a '$'. So we
10711 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
10712 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
10713 // case insensitive, so no need to convert the constraint to upper case here.
10714 //
10715 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
10716 // decode the usage of register name aliases into their official names. And
10717 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
10718 // official register names.
10719 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
10720 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
10721 bool IsFP = Constraint[2] == 'f';
10722 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
10723 std::pair<unsigned, const TargetRegisterClass *> R;
10725 TRI, join_items("", Temp.first, Temp.second), VT);
10726 // Match those names to the widest floating point register type available.
10727 if (IsFP) {
10728 unsigned RegNo = R.first;
10729 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
10730 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
10731 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
10732 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
10733 }
10734 }
10735 }
10736 return R;
10737 }
10738
10739 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10740}
10741
10742void LoongArchTargetLowering::LowerAsmOperandForConstraint(
10743 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
10744 SelectionDAG &DAG) const {
10745 // Currently only support length 1 constraints.
10746 if (Constraint.size() == 1) {
10747 switch (Constraint[0]) {
10748 case 'l':
10749 // Validate & create a 16-bit signed immediate operand.
10750 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10751 uint64_t CVal = C->getSExtValue();
10752 if (isInt<16>(CVal))
10753 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
10754 Subtarget.getGRLenVT()));
10755 }
10756 return;
10757 case 'I':
10758 // Validate & create a 12-bit signed immediate operand.
10759 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10760 uint64_t CVal = C->getSExtValue();
10761 if (isInt<12>(CVal))
10762 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
10763 Subtarget.getGRLenVT()));
10764 }
10765 return;
10766 case 'J':
10767 // Validate & create an integer zero operand.
10768 if (auto *C = dyn_cast<ConstantSDNode>(Op))
10769 if (C->getZExtValue() == 0)
10770 Ops.push_back(
10771 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
10772 return;
10773 case 'K':
10774 // Validate & create a 12-bit unsigned immediate operand.
10775 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10776 uint64_t CVal = C->getZExtValue();
10777 if (isUInt<12>(CVal))
10778 Ops.push_back(
10779 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
10780 }
10781 return;
10782 default:
10783 break;
10784 }
10785 }
10787}
10788
10789#define GET_REGISTER_MATCHER
10790#include "LoongArchGenAsmMatcher.inc"
10791
10794 const MachineFunction &MF) const {
10795 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
10796 std::string NewRegName = Name.second.str();
10797 Register Reg = MatchRegisterAltName(NewRegName);
10798 if (!Reg)
10799 Reg = MatchRegisterName(NewRegName);
10800 if (!Reg)
10801 return Reg;
10802 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
10803 if (!ReservedRegs.test(Reg))
10804 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
10805 StringRef(RegName) + "\"."));
10806 return Reg;
10807}
10808
10810 EVT VT, SDValue C) const {
10811 // TODO: Support vectors.
10812 if (!VT.isScalarInteger())
10813 return false;
10814
10815 // Omit the optimization if the data size exceeds GRLen.
10816 if (VT.getSizeInBits() > Subtarget.getGRLen())
10817 return false;
10818
10819 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
10820 const APInt &Imm = ConstNode->getAPIntValue();
10821 // Break MUL into (SLLI + ADD/SUB) or ALSL.
10822 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
10823 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
10824 return true;
10825 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
10826 if (ConstNode->hasOneUse() &&
10827 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
10828 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
10829 return true;
10830 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
10831 // in which the immediate has two set bits. Or Break (MUL x, imm)
10832 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
10833 // equals to (1 << s0) - (1 << s1).
10834 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
10835 unsigned Shifts = Imm.countr_zero();
10836 // Reject immediates which can be composed via a single LUI.
10837 if (Shifts >= 12)
10838 return false;
10839 // Reject multiplications can be optimized to
10840 // (SLLI (ALSL x, x, 1/2/3/4), s).
10841 APInt ImmPop = Imm.ashr(Shifts);
10842 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
10843 return false;
10844 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
10845 // since it needs one more instruction than other 3 cases.
10846 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
10847 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
10848 (ImmSmall - Imm).isPowerOf2())
10849 return true;
10850 }
10851 }
10852
10853 return false;
10854}
10855
10857 const AddrMode &AM,
10858 Type *Ty, unsigned AS,
10859 Instruction *I) const {
10860 // LoongArch has four basic addressing modes:
10861 // 1. reg
10862 // 2. reg + 12-bit signed offset
10863 // 3. reg + 14-bit signed offset left-shifted by 2
10864 // 4. reg1 + reg2
10865 // TODO: Add more checks after support vector extension.
10866
10867 // No global is ever allowed as a base.
10868 if (AM.BaseGV)
10869 return false;
10870
10871 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
10872 // with `UAL` feature.
10873 if (!isInt<12>(AM.BaseOffs) &&
10874 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
10875 return false;
10876
10877 switch (AM.Scale) {
10878 case 0:
10879 // "r+i" or just "i", depending on HasBaseReg.
10880 break;
10881 case 1:
10882 // "r+r+i" is not allowed.
10883 if (AM.HasBaseReg && AM.BaseOffs)
10884 return false;
10885 // Otherwise we have "r+r" or "r+i".
10886 break;
10887 case 2:
10888 // "2*r+r" or "2*r+i" is not allowed.
10889 if (AM.HasBaseReg || AM.BaseOffs)
10890 return false;
10891 // Allow "2*r" as "r+r".
10892 break;
10893 default:
10894 return false;
10895 }
10896
10897 return true;
10898}
10899
10901 return isInt<12>(Imm);
10902}
10903
10905 return isInt<12>(Imm);
10906}
10907
10909 // Zexts are free if they can be combined with a load.
10910 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
10911 // poorly with type legalization of compares preferring sext.
10912 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
10913 EVT MemVT = LD->getMemoryVT();
10914 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
10915 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
10916 LD->getExtensionType() == ISD::ZEXTLOAD))
10917 return true;
10918 }
10919
10920 return TargetLowering::isZExtFree(Val, VT2);
10921}
10922
10924 EVT DstVT) const {
10925 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
10926}
10927
10929 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
10930}
10931
10933 // TODO: Support vectors.
10934 if (Y.getValueType().isVector())
10935 return false;
10936
10937 return !isa<ConstantSDNode>(Y);
10938}
10939
10941 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
10942 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
10943}
10944
10946 Type *Ty, bool IsSigned) const {
10947 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
10948 return true;
10949
10950 return IsSigned;
10951}
10952
10954 // Return false to suppress the unnecessary extensions if the LibCall
10955 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
10956 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
10957 Type.getSizeInBits() < Subtarget.getGRLen()))
10958 return false;
10959 return true;
10960}
10961
10962// memcpy, and other memory intrinsics, typically tries to use wider load/store
10963// if the source/dest is aligned and the copy size is large enough. We therefore
10964// want to align such objects passed to memory intrinsics.
10966 unsigned &MinSize,
10967 Align &PrefAlign) const {
10968 if (!isa<MemIntrinsic>(CI))
10969 return false;
10970
10971 if (Subtarget.is64Bit()) {
10972 MinSize = 8;
10973 PrefAlign = Align(8);
10974 } else {
10975 MinSize = 4;
10976 PrefAlign = Align(4);
10977 }
10978
10979 return true;
10980}
10981
10984 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
10985 VT.getVectorElementType() != MVT::i1)
10986 return TypeWidenVector;
10987
10989}
10990
10991bool LoongArchTargetLowering::splitValueIntoRegisterParts(
10992 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
10993 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
10994 bool IsABIRegCopy = CC.has_value();
10995 EVT ValueVT = Val.getValueType();
10996
10997 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
10998 PartVT == MVT::f32) {
10999 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
11000 // nan, and cast to f32.
11001 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
11002 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
11003 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
11004 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
11005 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
11006 Parts[0] = Val;
11007 return true;
11008 }
11009
11010 return false;
11011}
11012
11013SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
11014 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
11015 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
11016 bool IsABIRegCopy = CC.has_value();
11017
11018 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11019 PartVT == MVT::f32) {
11020 SDValue Val = Parts[0];
11021
11022 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
11023 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
11024 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
11025 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
11026 return Val;
11027 }
11028
11029 return SDValue();
11030}
11031
11032MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
11033 CallingConv::ID CC,
11034 EVT VT) const {
11035 // Use f32 to pass f16.
11036 if (VT == MVT::f16 && Subtarget.hasBasicF())
11037 return MVT::f32;
11038
11040}
11041
11042unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
11043 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
11044 // Use f32 to pass f16.
11045 if (VT == MVT::f16 && Subtarget.hasBasicF())
11046 return 1;
11047
11049}
11050
11052 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
11053 const SelectionDAG &DAG, unsigned Depth) const {
11054 unsigned Opc = Op.getOpcode();
11055 Known.resetAll();
11056 switch (Opc) {
11057 default:
11058 break;
11059 case LoongArchISD::VPICK_ZEXT_ELT: {
11060 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
11061 EVT VT = cast<VTSDNode>(Op->getOperand(2))->getVT();
11062 unsigned VTBits = VT.getScalarSizeInBits();
11063 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
11064 Known.Zero.setBitsFrom(VTBits);
11065 break;
11066 }
11067 }
11068}
11069
11071 SDValue Op, const APInt &OriginalDemandedBits,
11072 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
11073 unsigned Depth) const {
11074 EVT VT = Op.getValueType();
11075 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
11076 unsigned Opc = Op.getOpcode();
11077 switch (Opc) {
11078 default:
11079 break;
11080 case LoongArchISD::VMSKLTZ:
11081 case LoongArchISD::XVMSKLTZ: {
11082 SDValue Src = Op.getOperand(0);
11083 MVT SrcVT = Src.getSimpleValueType();
11084 unsigned SrcBits = SrcVT.getScalarSizeInBits();
11085 unsigned NumElts = SrcVT.getVectorNumElements();
11086
11087 // If we don't need the sign bits at all just return zero.
11088 if (OriginalDemandedBits.countr_zero() >= NumElts)
11089 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
11090
11091 // Only demand the vector elements of the sign bits we need.
11092 APInt KnownUndef, KnownZero;
11093 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
11094 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
11095 TLO, Depth + 1))
11096 return true;
11097
11098 Known.Zero = KnownZero.zext(BitWidth);
11099 Known.Zero.setHighBits(BitWidth - NumElts);
11100
11101 // [X]VMSKLTZ only uses the MSB from each vector element.
11102 KnownBits KnownSrc;
11103 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
11104 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
11105 Depth + 1))
11106 return true;
11107
11108 if (KnownSrc.One[SrcBits - 1])
11109 Known.One.setLowBits(NumElts);
11110 else if (KnownSrc.Zero[SrcBits - 1])
11111 Known.Zero.setLowBits(NumElts);
11112
11113 // Attempt to avoid multi-use ops if we don't need anything from it.
11115 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
11116 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
11117 return false;
11118 }
11119 }
11120
11122 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
11123}
11124
11126 unsigned Opc = VecOp.getOpcode();
11127
11128 // Assume target opcodes can't be scalarized.
11129 // TODO - do we have any exceptions?
11130 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
11131 return false;
11132
11133 // If the vector op is not supported, try to convert to scalar.
11134 EVT VecVT = VecOp.getValueType();
11136 return true;
11137
11138 // If the vector op is supported, but the scalar op is not, the transform may
11139 // not be worthwhile.
11140 EVT ScalarVT = VecVT.getScalarType();
11141 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
11142}
11143
11145 unsigned Index) const {
11147 return false;
11148
11149 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
11150 return Index == 0;
11151}
11152
11154 unsigned Index) const {
11155 EVT EltVT = VT.getScalarType();
11156
11157 // Extract a scalar FP value from index 0 of a vector is free.
11158 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
11159}
11160
11162 const MachineFunction &MF) const {
11163
11164 // If the function specifically requests inline stack probes, emit them.
11165 if (MF.getFunction().hasFnAttribute("probe-stack"))
11166 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11167 "inline-asm";
11168
11169 return false;
11170}
11171
11173 Align StackAlign) const {
11174 // The default stack probe size is 4096 if the function has no
11175 // stack-probe-size attribute.
11176 const Function &Fn = MF.getFunction();
11177 unsigned StackProbeSize =
11178 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
11179 // Round down to the stack alignment.
11180 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
11181 return StackProbeSize ? StackProbeSize : StackAlign.value();
11182}
11183
11184SDValue
11185LoongArchTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
11186 SelectionDAG &DAG) const {
11188 if (!hasInlineStackProbe(MF))
11189 return SDValue();
11190
11191 const MVT GRLenVT = Subtarget.getGRLenVT();
11192 // Get the inputs.
11193 SDValue Chain = Op.getOperand(0);
11194 SDValue Size = Op.getOperand(1);
11195
11196 const MaybeAlign Align =
11197 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
11198 const SDLoc dl(Op);
11199 const EVT VT = Op.getValueType();
11200
11201 // Construct the new SP value in a GPR.
11202 SDValue SP = DAG.getCopyFromReg(Chain, dl, LoongArch::R3, GRLenVT);
11203 Chain = SP.getValue(1);
11204 SP = DAG.getNode(ISD::SUB, dl, GRLenVT, SP, Size);
11205 if (Align)
11206 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11207 DAG.getSignedConstant(-Align->value(), dl, VT));
11208
11209 // Set the real SP to the new value with a probing loop.
11210 Chain = DAG.getNode(LoongArchISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
11211 return DAG.getMergeValues({SP, Chain}, dl);
11212}
11213
11216 MachineBasicBlock *MBB) const {
11217 MachineFunction &MF = *MBB->getParent();
11218 MachineBasicBlock::iterator MBBI = MI.getIterator();
11219 DebugLoc DL = MBB->findDebugLoc(MBBI);
11220 const Register TargetReg = MI.getOperand(0).getReg();
11221
11222 const LoongArchInstrInfo *TII = Subtarget.getInstrInfo();
11223 const bool IsLA64 = Subtarget.is64Bit();
11224 const Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
11225 const LoongArchTargetLowering *TLI = Subtarget.getTargetLowering();
11226 const uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
11227
11228 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
11229 MachineBasicBlock *const LoopTestMBB =
11230 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11231 MF.insert(MBBInsertPoint, LoopTestMBB);
11232 MachineBasicBlock *const ExitMBB =
11233 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11234 MF.insert(MBBInsertPoint, ExitMBB);
11235 const Register SPReg = LoongArch::R3;
11236 const Register ScratchReg =
11237 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
11238
11239 // ScratchReg = ProbeSize
11240 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
11241
11242 // LoopTest:
11243 // sub.{w/d} $sp, $sp, ScratchReg
11244 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11245 TII->get(IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W), SPReg)
11246 .addReg(SPReg)
11247 .addReg(ScratchReg);
11248
11249 // st.{w/d} $zero, $sp, 0
11250 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11251 TII->get(IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
11252 .addReg(LoongArch::R0)
11253 .addReg(SPReg)
11254 .addImm(0);
11255
11256 // bltu TargetReg, $sp, LoopTest
11257 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(LoongArch::BLTU))
11258 .addReg(TargetReg)
11259 .addReg(SPReg)
11260 .addMBB(LoopTestMBB);
11261
11262 // move $sp, TargetReg
11263 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(LoongArch::OR), SPReg)
11264 .addReg(TargetReg)
11265 .addReg(LoongArch::R0);
11266
11267 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
11269
11270 LoopTestMBB->addSuccessor(ExitMBB);
11271 LoopTestMBB->addSuccessor(LoopTestMBB);
11272 MBB->addSuccessor(LoopTestMBB);
11273
11274 MI.eraseFromParent();
11275 MF.getInfo<LoongArchMachineFunctionInfo>()->setDynamicAllocation();
11276 return ExitMBB->begin()->getParent();
11277}
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSELECT_CCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static bool buildVPERMIInfo(ArrayRef< int > Mask, SDValue V1, SDValue V2, SmallVectorImpl< SDValue > &SrcVec, unsigned &MaskImm)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1521
bool isZero() const
Definition APFloat.h:1534
APInt bitcastToAPInt() const
Definition APFloat.h:1430
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition Argument.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
size_type count() const
Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:501
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
iterator_range< arg_iterator > args()
Definition Function.h:892
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
void setIncomingIndirectArg(unsigned ArgIndex, Register Reg)
Register getIncomingIndirectArg(unsigned ArgIndex) const
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this function.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:552
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:235
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...