LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
370 VT, Expand);
378 }
380 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
381 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
382 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
397 }
398 }
399
400 // Set operations for 'LASX' feature.
401
402 if (Subtarget.hasExtLASX()) {
403 for (MVT VT : LASXVTs) {
407
413
417 }
418 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
421 Legal);
423 VT, Legal);
430 Expand);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
459 VT, Expand);
467 }
468 }
469
470 // Set DAG combine for LA32 and LA64.
471 if (Subtarget.hasBasicF()) {
473 }
474
479
480 // Set DAG combine for 'LSX' feature.
481
482 if (Subtarget.hasExtLSX()) {
485 }
486
487 // Compute derived properties from the register classes.
488 computeRegisterProperties(Subtarget.getRegisterInfo());
489
491
494
495 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
496
498
499 // Function alignments.
501 // Set preferred alignments.
502 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
503 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
504 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
505
506 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
507 if (Subtarget.hasLAMCAS())
509
510 if (Subtarget.hasSCQ()) {
513 }
514
515 // Disable strict node mutation.
516 IsStrictFPEnabled = true;
517}
518
520 const GlobalAddressSDNode *GA) const {
521 // In order to maximise the opportunity for common subexpression elimination,
522 // keep a separate ADD node for the global address offset instead of folding
523 // it in the global address node. Later peephole optimisations may choose to
524 // fold it back in when profitable.
525 return false;
526}
527
529 SelectionDAG &DAG) const {
530 switch (Op.getOpcode()) {
532 return lowerATOMIC_FENCE(Op, DAG);
534 return lowerEH_DWARF_CFA(Op, DAG);
536 return lowerGlobalAddress(Op, DAG);
538 return lowerGlobalTLSAddress(Op, DAG);
540 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
542 return lowerINTRINSIC_W_CHAIN(Op, DAG);
544 return lowerINTRINSIC_VOID(Op, DAG);
546 return lowerBlockAddress(Op, DAG);
547 case ISD::JumpTable:
548 return lowerJumpTable(Op, DAG);
549 case ISD::SHL_PARTS:
550 return lowerShiftLeftParts(Op, DAG);
551 case ISD::SRA_PARTS:
552 return lowerShiftRightParts(Op, DAG, true);
553 case ISD::SRL_PARTS:
554 return lowerShiftRightParts(Op, DAG, false);
556 return lowerConstantPool(Op, DAG);
557 case ISD::FP_TO_SINT:
558 return lowerFP_TO_SINT(Op, DAG);
559 case ISD::BITCAST:
560 return lowerBITCAST(Op, DAG);
561 case ISD::UINT_TO_FP:
562 return lowerUINT_TO_FP(Op, DAG);
563 case ISD::SINT_TO_FP:
564 return lowerSINT_TO_FP(Op, DAG);
565 case ISD::VASTART:
566 return lowerVASTART(Op, DAG);
567 case ISD::FRAMEADDR:
568 return lowerFRAMEADDR(Op, DAG);
569 case ISD::RETURNADDR:
570 return lowerRETURNADDR(Op, DAG);
572 return lowerWRITE_REGISTER(Op, DAG);
574 return lowerINSERT_VECTOR_ELT(Op, DAG);
576 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
578 return lowerBUILD_VECTOR(Op, DAG);
580 return lowerCONCAT_VECTORS(Op, DAG);
582 return lowerVECTOR_SHUFFLE(Op, DAG);
583 case ISD::BITREVERSE:
584 return lowerBITREVERSE(Op, DAG);
586 return lowerSCALAR_TO_VECTOR(Op, DAG);
587 case ISD::PREFETCH:
588 return lowerPREFETCH(Op, DAG);
589 case ISD::SELECT:
590 return lowerSELECT(Op, DAG);
591 case ISD::BRCOND:
592 return lowerBRCOND(Op, DAG);
593 case ISD::FP_TO_FP16:
594 return lowerFP_TO_FP16(Op, DAG);
595 case ISD::FP16_TO_FP:
596 return lowerFP16_TO_FP(Op, DAG);
597 case ISD::FP_TO_BF16:
598 return lowerFP_TO_BF16(Op, DAG);
599 case ISD::BF16_TO_FP:
600 return lowerBF16_TO_FP(Op, DAG);
602 return lowerVECREDUCE_ADD(Op, DAG);
603 case ISD::ROTL:
604 case ISD::ROTR:
605 return lowerRotate(Op, DAG);
613 return lowerVECREDUCE(Op, DAG);
614 case ISD::ConstantFP:
615 return lowerConstantFP(Op, DAG);
616 case ISD::SETCC:
617 return lowerSETCC(Op, DAG);
618 }
619 return SDValue();
620}
621
622// Helper to attempt to return a cheaper, bit-inverted version of \p V.
624 // TODO: don't always ignore oneuse constraints.
625 V = peekThroughBitcasts(V);
626 EVT VT = V.getValueType();
627
628 // Match not(xor X, -1) -> X.
629 if (V.getOpcode() == ISD::XOR &&
630 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
631 isAllOnesConstant(V.getOperand(1))))
632 return V.getOperand(0);
633
634 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
635 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
636 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
637 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
638 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
639 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
640 V.getOperand(1));
641 }
642 }
643
644 // Match not(SplatVector(not(X)) -> SplatVector(X).
645 if (V.getOpcode() == ISD::BUILD_VECTOR) {
646 if (SDValue SplatValue =
647 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
648 if (!V->isOnlyUserOf(SplatValue.getNode()))
649 return SDValue();
650
651 if (SDValue Not = isNOT(SplatValue, DAG)) {
652 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
653 return DAG.getSplat(VT, SDLoc(Not), Not);
654 }
655 }
656 }
657
658 // Match not(or(not(X),not(Y))) -> and(X, Y).
659 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
660 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
661 // TODO: Handle cases with single NOT operand -> VANDN
662 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
663 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
664 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
665 DAG.getBitcast(VT, Op1));
666 }
667
668 // TODO: Add more matching patterns. Such as,
669 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
670 // not(slt(C, X)) -> slt(X - 1, C)
671
672 return SDValue();
673}
674
675SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
676 SelectionDAG &DAG) const {
677 EVT VT = Op.getValueType();
678 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
679 const APFloat &FPVal = CFP->getValueAPF();
680 SDLoc DL(CFP);
681
682 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
683 (VT == MVT::f64 && Subtarget.hasBasicD()));
684
685 // If value is 0.0 or -0.0, just ignore it.
686 if (FPVal.isZero())
687 return SDValue();
688
689 // If lsx enabled, use cheaper 'vldi' instruction if possible.
690 if (isFPImmVLDILegal(FPVal, VT))
691 return SDValue();
692
693 // Construct as integer, and move to float register.
694 APInt INTVal = FPVal.bitcastToAPInt();
695
696 // If more than MaterializeFPImmInsNum instructions will be used to
697 // generate the INTVal and move it to float register, fallback to
698 // use floating point load from the constant pool.
700 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
701 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
702 return SDValue();
703
704 switch (VT.getSimpleVT().SimpleTy) {
705 default:
706 llvm_unreachable("Unexpected floating point type!");
707 break;
708 case MVT::f32: {
709 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
710 if (Subtarget.is64Bit())
711 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
712 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
713 : LoongArchISD::MOVGR2FR_W,
714 DL, VT, NewVal);
715 }
716 case MVT::f64: {
717 if (Subtarget.is64Bit()) {
718 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
719 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
720 }
721 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
722 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
723 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
724 }
725 }
726
727 return SDValue();
728}
729
730// Ensure SETCC result and operand have the same bit width; isel does not
731// support mismatched widths.
732SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
733 SelectionDAG &DAG) const {
734 SDLoc DL(Op);
735 EVT ResultVT = Op.getValueType();
736 EVT OperandVT = Op.getOperand(0).getValueType();
737
738 EVT SetCCResultVT =
739 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
740
741 if (ResultVT == SetCCResultVT)
742 return Op;
743
744 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
745 "SETCC operands must have the same type!");
746
747 SDValue SetCCNode =
748 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
749 Op.getOperand(1), Op.getOperand(2));
750
751 if (ResultVT.bitsGT(SetCCResultVT))
752 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
753 else if (ResultVT.bitsLT(SetCCResultVT))
754 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
755
756 return SetCCNode;
757}
758
759// Lower vecreduce_add using vhaddw instructions.
760// For Example:
761// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
762// can be lowered to:
763// VHADDW_D_W vr0, vr0, vr0
764// VHADDW_Q_D vr0, vr0, vr0
765// VPICKVE2GR_D a0, vr0, 0
766// ADDI_W a0, a0, 0
767SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
768 SelectionDAG &DAG) const {
769
770 SDLoc DL(Op);
771 MVT OpVT = Op.getSimpleValueType();
772 SDValue Val = Op.getOperand(0);
773
774 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
775 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
776 unsigned ResBits = OpVT.getScalarSizeInBits();
777
778 unsigned LegalVecSize = 128;
779 bool isLASX256Vector =
780 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
781
782 // Ensure operand type legal or enable it legal.
783 while (!isTypeLegal(Val.getSimpleValueType())) {
784 Val = DAG.WidenVector(Val, DL);
785 }
786
787 // NumEles is designed for iterations count, v4i32 for LSX
788 // and v8i32 for LASX should have the same count.
789 if (isLASX256Vector) {
790 NumEles /= 2;
791 LegalVecSize = 256;
792 }
793
794 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
795 MVT IntTy = MVT::getIntegerVT(EleBits);
796 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
797 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
798 }
799
800 if (isLASX256Vector) {
801 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
802 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
803 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
804 }
805
806 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
807 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
808 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
809}
810
811// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
812// For Example:
813// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
814// can be lowered to:
815// VBSRL_V vr1, vr0, 8
816// VMAX_W vr0, vr1, vr0
817// VBSRL_V vr1, vr0, 4
818// VMAX_W vr0, vr1, vr0
819// VPICKVE2GR_W a0, vr0, 0
820// For 256 bit vector, it is illegal and will be spilt into
821// two 128 bit vector by default then processed by this.
822SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
823 SelectionDAG &DAG) const {
824 SDLoc DL(Op);
825
826 MVT OpVT = Op.getSimpleValueType();
827 SDValue Val = Op.getOperand(0);
828
829 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
830 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
831
832 // Ensure operand type legal or enable it legal.
833 while (!isTypeLegal(Val.getSimpleValueType())) {
834 Val = DAG.WidenVector(Val, DL);
835 }
836
837 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
838 MVT VecTy = Val.getSimpleValueType();
839 MVT GRLenVT = Subtarget.getGRLenVT();
840
841 for (int i = NumEles; i > 1; i /= 2) {
842 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
843 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
844 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
845 }
846
847 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
848 DAG.getConstant(0, DL, GRLenVT));
849}
850
851SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
852 SelectionDAG &DAG) const {
853 unsigned IsData = Op.getConstantOperandVal(4);
854
855 // We don't support non-data prefetch.
856 // Just preserve the chain.
857 if (!IsData)
858 return Op.getOperand(0);
859
860 return Op;
861}
862
863SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
864 SelectionDAG &DAG) const {
865 MVT VT = Op.getSimpleValueType();
866 assert(VT.isVector() && "Unexpected type");
867
868 SDLoc DL(Op);
869 SDValue R = Op.getOperand(0);
870 SDValue Amt = Op.getOperand(1);
871 unsigned Opcode = Op.getOpcode();
872 unsigned EltSizeInBits = VT.getScalarSizeInBits();
873
874 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
875 if (V.getOpcode() != ISD::BUILD_VECTOR)
876 return false;
877 if (SDValue SplatValue =
878 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
879 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
880 CstSplatValue = C->getAPIntValue();
881 return true;
882 }
883 }
884 return false;
885 };
886
887 // Check for constant splat rotation amount.
888 APInt CstSplatValue;
889 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
890 bool isROTL = Opcode == ISD::ROTL;
891
892 // Check for splat rotate by zero.
893 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
894 return R;
895
896 // LoongArch targets always prefer ISD::ROTR.
897 if (isROTL) {
898 SDValue Zero = DAG.getConstant(0, DL, VT);
899 return DAG.getNode(ISD::ROTR, DL, VT, R,
900 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
901 }
902
903 // Rotate by a immediate.
904 if (IsCstSplat) {
905 // ISD::ROTR: Attemp to rotate by a positive immediate.
906 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
907 if (SDValue Urem =
908 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
909 return DAG.getNode(Opcode, DL, VT, R, Urem);
910 }
911
912 return Op;
913}
914
915// Return true if Val is equal to (setcc LHS, RHS, CC).
916// Return false if Val is the inverse of (setcc LHS, RHS, CC).
917// Otherwise, return std::nullopt.
918static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
919 ISD::CondCode CC, SDValue Val) {
920 assert(Val->getOpcode() == ISD::SETCC);
921 SDValue LHS2 = Val.getOperand(0);
922 SDValue RHS2 = Val.getOperand(1);
923 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
924
925 if (LHS == LHS2 && RHS == RHS2) {
926 if (CC == CC2)
927 return true;
928 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
929 return false;
930 } else if (LHS == RHS2 && RHS == LHS2) {
932 if (CC == CC2)
933 return true;
934 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
935 return false;
936 }
937
938 return std::nullopt;
939}
940
942 const LoongArchSubtarget &Subtarget) {
943 SDValue CondV = N->getOperand(0);
944 SDValue TrueV = N->getOperand(1);
945 SDValue FalseV = N->getOperand(2);
946 MVT VT = N->getSimpleValueType(0);
947 SDLoc DL(N);
948
949 // (select c, -1, y) -> -c | y
950 if (isAllOnesConstant(TrueV)) {
951 SDValue Neg = DAG.getNegative(CondV, DL, VT);
952 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
953 }
954 // (select c, y, -1) -> (c-1) | y
955 if (isAllOnesConstant(FalseV)) {
956 SDValue Neg =
957 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
958 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
959 }
960
961 // (select c, 0, y) -> (c-1) & y
962 if (isNullConstant(TrueV)) {
963 SDValue Neg =
964 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
965 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
966 }
967 // (select c, y, 0) -> -c & y
968 if (isNullConstant(FalseV)) {
969 SDValue Neg = DAG.getNegative(CondV, DL, VT);
970 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
971 }
972
973 // select c, ~x, x --> xor -c, x
974 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
975 const APInt &TrueVal = TrueV->getAsAPIntVal();
976 const APInt &FalseVal = FalseV->getAsAPIntVal();
977 if (~TrueVal == FalseVal) {
978 SDValue Neg = DAG.getNegative(CondV, DL, VT);
979 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
980 }
981 }
982
983 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
984 // when both truev and falsev are also setcc.
985 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
986 FalseV.getOpcode() == ISD::SETCC) {
987 SDValue LHS = CondV.getOperand(0);
988 SDValue RHS = CondV.getOperand(1);
989 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
990
991 // (select x, x, y) -> x | y
992 // (select !x, x, y) -> x & y
993 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
994 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
995 DAG.getFreeze(FalseV));
996 }
997 // (select x, y, x) -> x & y
998 // (select !x, y, x) -> x | y
999 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1000 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1001 DAG.getFreeze(TrueV), FalseV);
1002 }
1003 }
1004
1005 return SDValue();
1006}
1007
1008// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1009// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1010// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1011// being `0` or `-1`. In such cases we can replace `select` with `and`.
1012// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1013// than `c0`?
1014static SDValue
1016 const LoongArchSubtarget &Subtarget) {
1017 unsigned SelOpNo = 0;
1018 SDValue Sel = BO->getOperand(0);
1019 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1020 SelOpNo = 1;
1021 Sel = BO->getOperand(1);
1022 }
1023
1024 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1025 return SDValue();
1026
1027 unsigned ConstSelOpNo = 1;
1028 unsigned OtherSelOpNo = 2;
1029 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1030 ConstSelOpNo = 2;
1031 OtherSelOpNo = 1;
1032 }
1033 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1034 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1035 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1036 return SDValue();
1037
1038 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1039 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1040 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1041 return SDValue();
1042
1043 SDLoc DL(Sel);
1044 EVT VT = BO->getValueType(0);
1045
1046 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1047 if (SelOpNo == 1)
1048 std::swap(NewConstOps[0], NewConstOps[1]);
1049
1050 SDValue NewConstOp =
1051 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1052 if (!NewConstOp)
1053 return SDValue();
1054
1055 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1056 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1057 return SDValue();
1058
1059 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1060 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1061 if (SelOpNo == 1)
1062 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1063 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1064
1065 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1066 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1067 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1068}
1069
1070// Changes the condition code and swaps operands if necessary, so the SetCC
1071// operation matches one of the comparisons supported directly by branches
1072// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1073// compare with 1/-1.
1075 ISD::CondCode &CC, SelectionDAG &DAG) {
1076 // If this is a single bit test that can't be handled by ANDI, shift the
1077 // bit to be tested to the MSB and perform a signed compare with 0.
1078 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1079 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1080 isa<ConstantSDNode>(LHS.getOperand(1))) {
1081 uint64_t Mask = LHS.getConstantOperandVal(1);
1082 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1083 unsigned ShAmt = 0;
1084 if (isPowerOf2_64(Mask)) {
1085 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1086 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1087 } else {
1088 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1089 }
1090
1091 LHS = LHS.getOperand(0);
1092 if (ShAmt != 0)
1093 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1094 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1095 return;
1096 }
1097 }
1098
1099 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1100 int64_t C = RHSC->getSExtValue();
1101 switch (CC) {
1102 default:
1103 break;
1104 case ISD::SETGT:
1105 // Convert X > -1 to X >= 0.
1106 if (C == -1) {
1107 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1108 CC = ISD::SETGE;
1109 return;
1110 }
1111 break;
1112 case ISD::SETLT:
1113 // Convert X < 1 to 0 >= X.
1114 if (C == 1) {
1115 RHS = LHS;
1116 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1117 CC = ISD::SETGE;
1118 return;
1119 }
1120 break;
1121 }
1122 }
1123
1124 switch (CC) {
1125 default:
1126 break;
1127 case ISD::SETGT:
1128 case ISD::SETLE:
1129 case ISD::SETUGT:
1130 case ISD::SETULE:
1132 std::swap(LHS, RHS);
1133 break;
1134 }
1135}
1136
1137SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1138 SelectionDAG &DAG) const {
1139 SDValue CondV = Op.getOperand(0);
1140 SDValue TrueV = Op.getOperand(1);
1141 SDValue FalseV = Op.getOperand(2);
1142 SDLoc DL(Op);
1143 MVT VT = Op.getSimpleValueType();
1144 MVT GRLenVT = Subtarget.getGRLenVT();
1145
1146 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1147 return V;
1148
1149 if (Op.hasOneUse()) {
1150 unsigned UseOpc = Op->user_begin()->getOpcode();
1151 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1152 SDNode *BinOp = *Op->user_begin();
1153 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1154 DAG, Subtarget)) {
1155 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1156 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1157 // may return a constant node and cause crash in lowerSELECT.
1158 if (NewSel.getOpcode() == ISD::SELECT)
1159 return lowerSELECT(NewSel, DAG);
1160 return NewSel;
1161 }
1162 }
1163 }
1164
1165 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1166 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1167 // (select condv, truev, falsev)
1168 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1169 if (CondV.getOpcode() != ISD::SETCC ||
1170 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1171 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1172 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1173
1174 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1175
1176 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1177 }
1178
1179 // If the CondV is the output of a SETCC node which operates on GRLenVT
1180 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1181 // to take advantage of the integer compare+branch instructions. i.e.: (select
1182 // (setcc lhs, rhs, cc), truev, falsev)
1183 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1184 SDValue LHS = CondV.getOperand(0);
1185 SDValue RHS = CondV.getOperand(1);
1186 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1187
1188 // Special case for a select of 2 constants that have a difference of 1.
1189 // Normally this is done by DAGCombine, but if the select is introduced by
1190 // type legalization or op legalization, we miss it. Restricting to SETLT
1191 // case for now because that is what signed saturating add/sub need.
1192 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1193 // but we would probably want to swap the true/false values if the condition
1194 // is SETGE/SETLE to avoid an XORI.
1195 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1196 CCVal == ISD::SETLT) {
1197 const APInt &TrueVal = TrueV->getAsAPIntVal();
1198 const APInt &FalseVal = FalseV->getAsAPIntVal();
1199 if (TrueVal - 1 == FalseVal)
1200 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1201 if (TrueVal + 1 == FalseVal)
1202 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1203 }
1204
1205 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1206 // 1 < x ? x : 1 -> 0 < x ? x : 1
1207 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1208 RHS == TrueV && LHS == FalseV) {
1209 LHS = DAG.getConstant(0, DL, VT);
1210 // 0 <u x is the same as x != 0.
1211 if (CCVal == ISD::SETULT) {
1212 std::swap(LHS, RHS);
1213 CCVal = ISD::SETNE;
1214 }
1215 }
1216
1217 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1218 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1219 RHS == FalseV) {
1220 RHS = DAG.getConstant(0, DL, VT);
1221 }
1222
1223 SDValue TargetCC = DAG.getCondCode(CCVal);
1224
1225 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1226 // (select (setcc lhs, rhs, CC), constant, falsev)
1227 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1228 std::swap(TrueV, FalseV);
1229 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1230 }
1231
1232 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1233 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1234}
1235
1236SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1237 SelectionDAG &DAG) const {
1238 SDValue CondV = Op.getOperand(1);
1239 SDLoc DL(Op);
1240 MVT GRLenVT = Subtarget.getGRLenVT();
1241
1242 if (CondV.getOpcode() == ISD::SETCC) {
1243 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1244 SDValue LHS = CondV.getOperand(0);
1245 SDValue RHS = CondV.getOperand(1);
1246 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1247
1248 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1249
1250 SDValue TargetCC = DAG.getCondCode(CCVal);
1251 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1252 Op.getOperand(0), LHS, RHS, TargetCC,
1253 Op.getOperand(2));
1254 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1255 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1256 Op.getOperand(0), CondV, Op.getOperand(2));
1257 }
1258 }
1259
1260 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1261 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1262 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1263}
1264
1265SDValue
1266LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1267 SelectionDAG &DAG) const {
1268 SDLoc DL(Op);
1269 MVT OpVT = Op.getSimpleValueType();
1270
1271 SDValue Vector = DAG.getUNDEF(OpVT);
1272 SDValue Val = Op.getOperand(0);
1273 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1274
1275 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1276}
1277
1278SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1279 SelectionDAG &DAG) const {
1280 EVT ResTy = Op->getValueType(0);
1281 SDValue Src = Op->getOperand(0);
1282 SDLoc DL(Op);
1283
1284 // LoongArchISD::BITREV_8B is not supported on LA32.
1285 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1286 return SDValue();
1287
1288 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1289 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1290 unsigned int NewEltNum = NewVT.getVectorNumElements();
1291
1292 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1293
1295 for (unsigned int i = 0; i < NewEltNum; i++) {
1296 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1297 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1298 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1299 ? (unsigned)LoongArchISD::BITREV_8B
1300 : (unsigned)ISD::BITREVERSE;
1301 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1302 }
1303 SDValue Res =
1304 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1305
1306 switch (ResTy.getSimpleVT().SimpleTy) {
1307 default:
1308 return SDValue();
1309 case MVT::v16i8:
1310 case MVT::v32i8:
1311 return Res;
1312 case MVT::v8i16:
1313 case MVT::v16i16:
1314 case MVT::v4i32:
1315 case MVT::v8i32: {
1317 for (unsigned int i = 0; i < NewEltNum; i++)
1318 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1319 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1320 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1321 }
1322 }
1323}
1324
1325// Widen element type to get a new mask value (if possible).
1326// For example:
1327// shufflevector <4 x i32> %a, <4 x i32> %b,
1328// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1329// is equivalent to:
1330// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1331// can be lowered to:
1332// VPACKOD_D vr0, vr0, vr1
1334 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1335 unsigned EltBits = VT.getScalarSizeInBits();
1336
1337 if (EltBits > 32 || EltBits == 1)
1338 return SDValue();
1339
1340 SmallVector<int, 8> NewMask;
1341 if (widenShuffleMaskElts(Mask, NewMask)) {
1342 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1343 : MVT::getIntegerVT(EltBits * 2);
1344 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1345 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1346 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1347 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1348 return DAG.getBitcast(
1349 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1350 }
1351 }
1352
1353 return SDValue();
1354}
1355
1356/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1357/// instruction.
1358// The funciton matches elements from one of the input vector shuffled to the
1359// left or right with zeroable elements 'shifted in'. It handles both the
1360// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1361// lane.
1362// Mostly copied from X86.
1363static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1364 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1365 int MaskOffset, const APInt &Zeroable) {
1366 int Size = Mask.size();
1367 unsigned SizeInBits = Size * ScalarSizeInBits;
1368
1369 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1370 for (int i = 0; i < Size; i += Scale)
1371 for (int j = 0; j < Shift; ++j)
1372 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1373 return false;
1374
1375 return true;
1376 };
1377
1378 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1379 int Step = 1) {
1380 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1381 if (!(Mask[i] == -1 || Mask[i] == Low))
1382 return false;
1383 return true;
1384 };
1385
1386 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1387 for (int i = 0; i != Size; i += Scale) {
1388 unsigned Pos = Left ? i + Shift : i;
1389 unsigned Low = Left ? i : i + Shift;
1390 unsigned Len = Scale - Shift;
1391 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1392 return -1;
1393 }
1394
1395 int ShiftEltBits = ScalarSizeInBits * Scale;
1396 bool ByteShift = ShiftEltBits > 64;
1397 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1398 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1399 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1400
1401 // Normalize the scale for byte shifts to still produce an i64 element
1402 // type.
1403 Scale = ByteShift ? Scale / 2 : Scale;
1404
1405 // We need to round trip through the appropriate type for the shift.
1406 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1407 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1408 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1409 return (int)ShiftAmt;
1410 };
1411
1412 unsigned MaxWidth = 128;
1413 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1414 for (int Shift = 1; Shift != Scale; ++Shift)
1415 for (bool Left : {true, false})
1416 if (CheckZeros(Shift, Scale, Left)) {
1417 int ShiftAmt = MatchShift(Shift, Scale, Left);
1418 if (0 < ShiftAmt)
1419 return ShiftAmt;
1420 }
1421
1422 // no match
1423 return -1;
1424}
1425
1426/// Lower VECTOR_SHUFFLE as shift (if possible).
1427///
1428/// For example:
1429/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1430/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1431/// is lowered to:
1432/// (VBSLL_V $v0, $v0, 4)
1433///
1434/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1435/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1436/// is lowered to:
1437/// (VSLLI_D $v0, $v0, 32)
1439 MVT VT, SDValue V1, SDValue V2,
1440 SelectionDAG &DAG,
1441 const LoongArchSubtarget &Subtarget,
1442 const APInt &Zeroable) {
1443 int Size = Mask.size();
1444 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1445
1446 MVT ShiftVT;
1447 SDValue V = V1;
1448 unsigned Opcode;
1449
1450 // Try to match shuffle against V1 shift.
1451 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1452 Mask, 0, Zeroable);
1453
1454 // If V1 failed, try to match shuffle against V2 shift.
1455 if (ShiftAmt < 0) {
1456 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1457 Mask, Size, Zeroable);
1458 V = V2;
1459 }
1460
1461 if (ShiftAmt < 0)
1462 return SDValue();
1463
1464 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1465 "Illegal integer vector type");
1466 V = DAG.getBitcast(ShiftVT, V);
1467 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1468 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1469 return DAG.getBitcast(VT, V);
1470}
1471
1472/// Determine whether a range fits a regular pattern of values.
1473/// This function accounts for the possibility of jumping over the End iterator.
1474template <typename ValType>
1475static bool
1477 unsigned CheckStride,
1479 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1480 auto &I = Begin;
1481
1482 while (I != End) {
1483 if (*I != -1 && *I != ExpectedIndex)
1484 return false;
1485 ExpectedIndex += ExpectedIndexStride;
1486
1487 // Incrementing past End is undefined behaviour so we must increment one
1488 // step at a time and check for End at each step.
1489 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1490 ; // Empty loop body.
1491 }
1492 return true;
1493}
1494
1495/// Compute whether each element of a shuffle is zeroable.
1496///
1497/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1499 SDValue V2, APInt &KnownUndef,
1500 APInt &KnownZero) {
1501 int Size = Mask.size();
1502 KnownUndef = KnownZero = APInt::getZero(Size);
1503
1504 V1 = peekThroughBitcasts(V1);
1505 V2 = peekThroughBitcasts(V2);
1506
1507 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1508 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1509
1510 int VectorSizeInBits = V1.getValueSizeInBits();
1511 int ScalarSizeInBits = VectorSizeInBits / Size;
1512 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1513 (void)ScalarSizeInBits;
1514
1515 for (int i = 0; i < Size; ++i) {
1516 int M = Mask[i];
1517 if (M < 0) {
1518 KnownUndef.setBit(i);
1519 continue;
1520 }
1521 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1522 KnownZero.setBit(i);
1523 continue;
1524 }
1525 }
1526}
1527
1528/// Test whether a shuffle mask is equivalent within each sub-lane.
1529///
1530/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1531/// non-trivial to compute in the face of undef lanes. The representation is
1532/// suitable for use with existing 128-bit shuffles as entries from the second
1533/// vector have been remapped to [LaneSize, 2*LaneSize).
1534static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1535 ArrayRef<int> Mask,
1536 SmallVectorImpl<int> &RepeatedMask) {
1537 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1538 RepeatedMask.assign(LaneSize, -1);
1539 int Size = Mask.size();
1540 for (int i = 0; i < Size; ++i) {
1541 assert(Mask[i] == -1 || Mask[i] >= 0);
1542 if (Mask[i] < 0)
1543 continue;
1544 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1545 // This entry crosses lanes, so there is no way to model this shuffle.
1546 return false;
1547
1548 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1549 // Adjust second vector indices to start at LaneSize instead of Size.
1550 int LocalM =
1551 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1552 if (RepeatedMask[i % LaneSize] < 0)
1553 // This is the first non-undef entry in this slot of a 128-bit lane.
1554 RepeatedMask[i % LaneSize] = LocalM;
1555 else if (RepeatedMask[i % LaneSize] != LocalM)
1556 // Found a mismatch with the repeated mask.
1557 return false;
1558 }
1559 return true;
1560}
1561
1562/// Attempts to match vector shuffle as byte rotation.
1564 ArrayRef<int> Mask) {
1565
1566 SDValue Lo, Hi;
1567 SmallVector<int, 16> RepeatedMask;
1568
1569 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1570 return -1;
1571
1572 int NumElts = RepeatedMask.size();
1573 int Rotation = 0;
1574 int Scale = 16 / NumElts;
1575
1576 for (int i = 0; i < NumElts; ++i) {
1577 int M = RepeatedMask[i];
1578 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1579 "Unexpected mask index.");
1580 if (M < 0)
1581 continue;
1582
1583 // Determine where a rotated vector would have started.
1584 int StartIdx = i - (M % NumElts);
1585 if (StartIdx == 0)
1586 return -1;
1587
1588 // If we found the tail of a vector the rotation must be the missing
1589 // front. If we found the head of a vector, it must be how much of the
1590 // head.
1591 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1592
1593 if (Rotation == 0)
1594 Rotation = CandidateRotation;
1595 else if (Rotation != CandidateRotation)
1596 return -1;
1597
1598 // Compute which value this mask is pointing at.
1599 SDValue MaskV = M < NumElts ? V1 : V2;
1600
1601 // Compute which of the two target values this index should be assigned
1602 // to. This reflects whether the high elements are remaining or the low
1603 // elements are remaining.
1604 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1605
1606 // Either set up this value if we've not encountered it before, or check
1607 // that it remains consistent.
1608 if (!TargetV)
1609 TargetV = MaskV;
1610 else if (TargetV != MaskV)
1611 return -1;
1612 }
1613
1614 // Check that we successfully analyzed the mask, and normalize the results.
1615 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1616 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1617 if (!Lo)
1618 Lo = Hi;
1619 else if (!Hi)
1620 Hi = Lo;
1621
1622 V1 = Lo;
1623 V2 = Hi;
1624
1625 return Rotation * Scale;
1626}
1627
1628/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1629///
1630/// For example:
1631/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1632/// <2 x i32> <i32 3, i32 0>
1633/// is lowered to:
1634/// (VBSRL_V $v1, $v1, 8)
1635/// (VBSLL_V $v0, $v0, 8)
1636/// (VOR_V $v0, $V0, $v1)
1637static SDValue
1639 SDValue V1, SDValue V2, SelectionDAG &DAG,
1640 const LoongArchSubtarget &Subtarget) {
1641
1642 SDValue Lo = V1, Hi = V2;
1643 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1644 if (ByteRotation <= 0)
1645 return SDValue();
1646
1647 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1648 Lo = DAG.getBitcast(ByteVT, Lo);
1649 Hi = DAG.getBitcast(ByteVT, Hi);
1650
1651 int LoByteShift = 16 - ByteRotation;
1652 int HiByteShift = ByteRotation;
1653 MVT GRLenVT = Subtarget.getGRLenVT();
1654
1655 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1656 DAG.getConstant(LoByteShift, DL, GRLenVT));
1657 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1658 DAG.getConstant(HiByteShift, DL, GRLenVT));
1659 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1660}
1661
1662/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1663///
1664/// For example:
1665/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1666/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1667/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1668/// is lowered to:
1669/// (VREPLI $v1, 0)
1670/// (VILVL $v0, $v1, $v0)
1672 ArrayRef<int> Mask, MVT VT,
1673 SDValue V1, SDValue V2,
1674 SelectionDAG &DAG,
1675 const APInt &Zeroable) {
1676 int Bits = VT.getSizeInBits();
1677 int EltBits = VT.getScalarSizeInBits();
1678 int NumElements = VT.getVectorNumElements();
1679
1680 if (Zeroable.isAllOnes())
1681 return DAG.getConstant(0, DL, VT);
1682
1683 // Define a helper function to check a particular ext-scale and lower to it if
1684 // valid.
1685 auto Lower = [&](int Scale) -> SDValue {
1686 SDValue InputV;
1687 bool AnyExt = true;
1688 int Offset = 0;
1689 for (int i = 0; i < NumElements; i++) {
1690 int M = Mask[i];
1691 if (M < 0)
1692 continue;
1693 if (i % Scale != 0) {
1694 // Each of the extended elements need to be zeroable.
1695 if (!Zeroable[i])
1696 return SDValue();
1697
1698 AnyExt = false;
1699 continue;
1700 }
1701
1702 // Each of the base elements needs to be consecutive indices into the
1703 // same input vector.
1704 SDValue V = M < NumElements ? V1 : V2;
1705 M = M % NumElements;
1706 if (!InputV) {
1707 InputV = V;
1708 Offset = M - (i / Scale);
1709
1710 // These offset can't be handled
1711 if (Offset % (NumElements / Scale))
1712 return SDValue();
1713 } else if (InputV != V)
1714 return SDValue();
1715
1716 if (M != (Offset + (i / Scale)))
1717 return SDValue(); // Non-consecutive strided elements.
1718 }
1719
1720 // If we fail to find an input, we have a zero-shuffle which should always
1721 // have already been handled.
1722 if (!InputV)
1723 return SDValue();
1724
1725 do {
1726 unsigned VilVLoHi = LoongArchISD::VILVL;
1727 if (Offset >= (NumElements / 2)) {
1728 VilVLoHi = LoongArchISD::VILVH;
1729 Offset -= (NumElements / 2);
1730 }
1731
1732 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1733 SDValue Ext =
1734 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1735 InputV = DAG.getBitcast(InputVT, InputV);
1736 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1737 Scale /= 2;
1738 EltBits *= 2;
1739 NumElements /= 2;
1740 } while (Scale > 1);
1741 return DAG.getBitcast(VT, InputV);
1742 };
1743
1744 // Each iteration, try extending the elements half as much, but into twice as
1745 // many elements.
1746 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1747 NumExtElements *= 2) {
1748 if (SDValue V = Lower(NumElements / NumExtElements))
1749 return V;
1750 }
1751 return SDValue();
1752}
1753
1754/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1755///
1756/// VREPLVEI performs vector broadcast based on an element specified by an
1757/// integer immediate, with its mask being similar to:
1758/// <x, x, x, ...>
1759/// where x is any valid index.
1760///
1761/// When undef's appear in the mask they are treated as if they were whatever
1762/// value is necessary in order to fit the above form.
1763static SDValue
1765 SDValue V1, SelectionDAG &DAG,
1766 const LoongArchSubtarget &Subtarget) {
1767 int SplatIndex = -1;
1768 for (const auto &M : Mask) {
1769 if (M != -1) {
1770 SplatIndex = M;
1771 break;
1772 }
1773 }
1774
1775 if (SplatIndex == -1)
1776 return DAG.getUNDEF(VT);
1777
1778 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1779 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1780 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1781 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1782 }
1783
1784 return SDValue();
1785}
1786
1787/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1788///
1789/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1790/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1791///
1792/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1793/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1794/// When undef's appear they are treated as if they were whatever value is
1795/// necessary in order to fit the above forms.
1796///
1797/// For example:
1798/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1799/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1800/// i32 7, i32 6, i32 5, i32 4>
1801/// is lowered to:
1802/// (VSHUF4I_H $v0, $v1, 27)
1803/// where the 27 comes from:
1804/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1805static SDValue
1807 SDValue V1, SDValue V2, SelectionDAG &DAG,
1808 const LoongArchSubtarget &Subtarget) {
1809
1810 unsigned SubVecSize = 4;
1811 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1812 SubVecSize = 2;
1813
1814 int SubMask[4] = {-1, -1, -1, -1};
1815 for (unsigned i = 0; i < SubVecSize; ++i) {
1816 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1817 int M = Mask[j];
1818
1819 // Convert from vector index to 4-element subvector index
1820 // If an index refers to an element outside of the subvector then give up
1821 if (M != -1) {
1822 M -= 4 * (j / SubVecSize);
1823 if (M < 0 || M >= 4)
1824 return SDValue();
1825 }
1826
1827 // If the mask has an undef, replace it with the current index.
1828 // Note that it might still be undef if the current index is also undef
1829 if (SubMask[i] == -1)
1830 SubMask[i] = M;
1831 // Check that non-undef values are the same as in the mask. If they
1832 // aren't then give up
1833 else if (M != -1 && M != SubMask[i])
1834 return SDValue();
1835 }
1836 }
1837
1838 // Calculate the immediate. Replace any remaining undefs with zero
1839 int Imm = 0;
1840 for (int i = SubVecSize - 1; i >= 0; --i) {
1841 int M = SubMask[i];
1842
1843 if (M == -1)
1844 M = 0;
1845
1846 Imm <<= 2;
1847 Imm |= M & 0x3;
1848 }
1849
1850 MVT GRLenVT = Subtarget.getGRLenVT();
1851
1852 // Return vshuf4i.d
1853 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1854 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
1855 DAG.getConstant(Imm, DL, GRLenVT));
1856
1857 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1858 DAG.getConstant(Imm, DL, GRLenVT));
1859}
1860
1861/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1862///
1863/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1864/// reverse whose mask likes:
1865/// <7, 6, 5, 4, 3, 2, 1, 0>
1866///
1867/// When undef's appear in the mask they are treated as if they were whatever
1868/// value is necessary in order to fit the above forms.
1869static SDValue
1871 SDValue V1, SelectionDAG &DAG,
1872 const LoongArchSubtarget &Subtarget) {
1873 // Only vectors with i8/i16 elements which cannot match other patterns
1874 // directly needs to do this.
1875 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1876 VT != MVT::v16i16)
1877 return SDValue();
1878
1879 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1880 return SDValue();
1881
1882 int WidenNumElts = VT.getVectorNumElements() / 4;
1883 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1884 for (int i = 0; i < WidenNumElts; ++i)
1885 WidenMask[i] = WidenNumElts - 1 - i;
1886
1887 MVT WidenVT = MVT::getVectorVT(
1888 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1889 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1890 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1891 DAG.getUNDEF(WidenVT), WidenMask);
1892
1893 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1894 DAG.getBitcast(VT, WidenRev),
1895 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1896}
1897
1898/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1899///
1900/// VPACKEV interleaves the even elements from each vector.
1901///
1902/// It is possible to lower into VPACKEV when the mask consists of two of the
1903/// following forms interleaved:
1904/// <0, 2, 4, ...>
1905/// <n, n+2, n+4, ...>
1906/// where n is the number of elements in the vector.
1907/// For example:
1908/// <0, 0, 2, 2, 4, 4, ...>
1909/// <0, n, 2, n+2, 4, n+4, ...>
1910///
1911/// When undef's appear in the mask they are treated as if they were whatever
1912/// value is necessary in order to fit the above forms.
1914 MVT VT, SDValue V1, SDValue V2,
1915 SelectionDAG &DAG) {
1916
1917 const auto &Begin = Mask.begin();
1918 const auto &End = Mask.end();
1919 SDValue OriV1 = V1, OriV2 = V2;
1920
1921 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1922 V1 = OriV1;
1923 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1924 V1 = OriV2;
1925 else
1926 return SDValue();
1927
1928 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1929 V2 = OriV1;
1930 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1931 V2 = OriV2;
1932 else
1933 return SDValue();
1934
1935 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1936}
1937
1938/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1939///
1940/// VPACKOD interleaves the odd elements from each vector.
1941///
1942/// It is possible to lower into VPACKOD when the mask consists of two of the
1943/// following forms interleaved:
1944/// <1, 3, 5, ...>
1945/// <n+1, n+3, n+5, ...>
1946/// where n is the number of elements in the vector.
1947/// For example:
1948/// <1, 1, 3, 3, 5, 5, ...>
1949/// <1, n+1, 3, n+3, 5, n+5, ...>
1950///
1951/// When undef's appear in the mask they are treated as if they were whatever
1952/// value is necessary in order to fit the above forms.
1954 MVT VT, SDValue V1, SDValue V2,
1955 SelectionDAG &DAG) {
1956
1957 const auto &Begin = Mask.begin();
1958 const auto &End = Mask.end();
1959 SDValue OriV1 = V1, OriV2 = V2;
1960
1961 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1962 V1 = OriV1;
1963 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1964 V1 = OriV2;
1965 else
1966 return SDValue();
1967
1968 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1969 V2 = OriV1;
1970 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1971 V2 = OriV2;
1972 else
1973 return SDValue();
1974
1975 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1976}
1977
1978/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1979///
1980/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1981/// of each vector.
1982///
1983/// It is possible to lower into VILVH when the mask consists of two of the
1984/// following forms interleaved:
1985/// <x, x+1, x+2, ...>
1986/// <n+x, n+x+1, n+x+2, ...>
1987/// where n is the number of elements in the vector and x is half n.
1988/// For example:
1989/// <x, x, x+1, x+1, x+2, x+2, ...>
1990/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1991///
1992/// When undef's appear in the mask they are treated as if they were whatever
1993/// value is necessary in order to fit the above forms.
1995 MVT VT, SDValue V1, SDValue V2,
1996 SelectionDAG &DAG) {
1997
1998 const auto &Begin = Mask.begin();
1999 const auto &End = Mask.end();
2000 unsigned HalfSize = Mask.size() / 2;
2001 SDValue OriV1 = V1, OriV2 = V2;
2002
2003 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2004 V1 = OriV1;
2005 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2006 V1 = OriV2;
2007 else
2008 return SDValue();
2009
2010 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2011 V2 = OriV1;
2012 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2013 1))
2014 V2 = OriV2;
2015 else
2016 return SDValue();
2017
2018 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2019}
2020
2021/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2022///
2023/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2024/// of each vector.
2025///
2026/// It is possible to lower into VILVL when the mask consists of two of the
2027/// following forms interleaved:
2028/// <0, 1, 2, ...>
2029/// <n, n+1, n+2, ...>
2030/// where n is the number of elements in the vector.
2031/// For example:
2032/// <0, 0, 1, 1, 2, 2, ...>
2033/// <0, n, 1, n+1, 2, n+2, ...>
2034///
2035/// When undef's appear in the mask they are treated as if they were whatever
2036/// value is necessary in order to fit the above forms.
2038 MVT VT, SDValue V1, SDValue V2,
2039 SelectionDAG &DAG) {
2040
2041 const auto &Begin = Mask.begin();
2042 const auto &End = Mask.end();
2043 SDValue OriV1 = V1, OriV2 = V2;
2044
2045 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2046 V1 = OriV1;
2047 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2048 V1 = OriV2;
2049 else
2050 return SDValue();
2051
2052 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2053 V2 = OriV1;
2054 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2055 V2 = OriV2;
2056 else
2057 return SDValue();
2058
2059 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2060}
2061
2062/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2063///
2064/// VPICKEV copies the even elements of each vector into the result vector.
2065///
2066/// It is possible to lower into VPICKEV when the mask consists of two of the
2067/// following forms concatenated:
2068/// <0, 2, 4, ...>
2069/// <n, n+2, n+4, ...>
2070/// where n is the number of elements in the vector.
2071/// For example:
2072/// <0, 2, 4, ..., 0, 2, 4, ...>
2073/// <0, 2, 4, ..., n, n+2, n+4, ...>
2074///
2075/// When undef's appear in the mask they are treated as if they were whatever
2076/// value is necessary in order to fit the above forms.
2078 MVT VT, SDValue V1, SDValue V2,
2079 SelectionDAG &DAG) {
2080
2081 const auto &Begin = Mask.begin();
2082 const auto &Mid = Mask.begin() + Mask.size() / 2;
2083 const auto &End = Mask.end();
2084 SDValue OriV1 = V1, OriV2 = V2;
2085
2086 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2087 V1 = OriV1;
2088 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2089 V1 = OriV2;
2090 else
2091 return SDValue();
2092
2093 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2094 V2 = OriV1;
2095 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2096 V2 = OriV2;
2097
2098 else
2099 return SDValue();
2100
2101 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2102}
2103
2104/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2105///
2106/// VPICKOD copies the odd elements of each vector into the result vector.
2107///
2108/// It is possible to lower into VPICKOD when the mask consists of two of the
2109/// following forms concatenated:
2110/// <1, 3, 5, ...>
2111/// <n+1, n+3, n+5, ...>
2112/// where n is the number of elements in the vector.
2113/// For example:
2114/// <1, 3, 5, ..., 1, 3, 5, ...>
2115/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2116///
2117/// When undef's appear in the mask they are treated as if they were whatever
2118/// value is necessary in order to fit the above forms.
2120 MVT VT, SDValue V1, SDValue V2,
2121 SelectionDAG &DAG) {
2122
2123 const auto &Begin = Mask.begin();
2124 const auto &Mid = Mask.begin() + Mask.size() / 2;
2125 const auto &End = Mask.end();
2126 SDValue OriV1 = V1, OriV2 = V2;
2127
2128 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2129 V1 = OriV1;
2130 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2131 V1 = OriV2;
2132 else
2133 return SDValue();
2134
2135 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2136 V2 = OriV1;
2137 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2138 V2 = OriV2;
2139 else
2140 return SDValue();
2141
2142 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2143}
2144
2145/// Lower VECTOR_SHUFFLE into VSHUF.
2146///
2147/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2148/// adding it as an operand to the resulting VSHUF.
2150 MVT VT, SDValue V1, SDValue V2,
2151 SelectionDAG &DAG,
2152 const LoongArchSubtarget &Subtarget) {
2153
2155 for (auto M : Mask)
2156 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2157
2158 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2159 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2160
2161 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2162 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2163 // VSHF concatenates the vectors in a bitwise fashion:
2164 // <0b00, 0b01> + <0b10, 0b11> ->
2165 // 0b0100 + 0b1110 -> 0b01001110
2166 // <0b10, 0b11, 0b00, 0b01>
2167 // We must therefore swap the operands to get the correct result.
2168 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2169}
2170
2171/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2172///
2173/// This routine breaks down the specific type of 128-bit shuffle and
2174/// dispatches to the lowering routines accordingly.
2176 SDValue V1, SDValue V2, SelectionDAG &DAG,
2177 const LoongArchSubtarget &Subtarget) {
2178 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2179 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2180 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2181 "Vector type is unsupported for lsx!");
2183 "Two operands have different types!");
2184 assert(VT.getVectorNumElements() == Mask.size() &&
2185 "Unexpected mask size for shuffle!");
2186 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2187
2188 APInt KnownUndef, KnownZero;
2189 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2190 APInt Zeroable = KnownUndef | KnownZero;
2191
2192 SDValue Result;
2193 // TODO: Add more comparison patterns.
2194 if (V2.isUndef()) {
2195 if ((Result =
2196 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2197 return Result;
2198 if ((Result =
2199 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2200 return Result;
2201 if ((Result =
2202 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2203 return Result;
2204
2205 // TODO: This comment may be enabled in the future to better match the
2206 // pattern for instruction selection.
2207 /* V2 = V1; */
2208 }
2209
2210 // It is recommended not to change the pattern comparison order for better
2211 // performance.
2212 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2213 return Result;
2214 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2215 return Result;
2216 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2217 return Result;
2218 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2219 return Result;
2220 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2221 return Result;
2222 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2223 return Result;
2224 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2225 (Result =
2226 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2227 return Result;
2228 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2229 Zeroable)))
2230 return Result;
2231 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2232 Zeroable)))
2233 return Result;
2234 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2235 Subtarget)))
2236 return Result;
2237 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2238 return NewShuffle;
2239 if ((Result =
2240 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2241 return Result;
2242 return SDValue();
2243}
2244
2245/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2246///
2247/// It is a XVREPLVEI when the mask is:
2248/// <x, x, x, ..., x+n, x+n, x+n, ...>
2249/// where the number of x is equal to n and n is half the length of vector.
2250///
2251/// When undef's appear in the mask they are treated as if they were whatever
2252/// value is necessary in order to fit the above form.
2253static SDValue
2255 SDValue V1, SelectionDAG &DAG,
2256 const LoongArchSubtarget &Subtarget) {
2257 int SplatIndex = -1;
2258 for (const auto &M : Mask) {
2259 if (M != -1) {
2260 SplatIndex = M;
2261 break;
2262 }
2263 }
2264
2265 if (SplatIndex == -1)
2266 return DAG.getUNDEF(VT);
2267
2268 const auto &Begin = Mask.begin();
2269 const auto &End = Mask.end();
2270 int HalfSize = Mask.size() / 2;
2271
2272 if (SplatIndex >= HalfSize)
2273 return SDValue();
2274
2275 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2276 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2277 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2278 0)) {
2279 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2280 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2281 }
2282
2283 return SDValue();
2284}
2285
2286/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2287static SDValue
2289 SDValue V1, SDValue V2, SelectionDAG &DAG,
2290 const LoongArchSubtarget &Subtarget) {
2291 // When the size is less than or equal to 4, lower cost instructions may be
2292 // used.
2293 if (Mask.size() <= 4)
2294 return SDValue();
2295 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2296}
2297
2298/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2299static SDValue
2301 SDValue V1, SelectionDAG &DAG,
2302 const LoongArchSubtarget &Subtarget) {
2303 // Only consider XVPERMI_D.
2304 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2305 return SDValue();
2306
2307 unsigned MaskImm = 0;
2308 for (unsigned i = 0; i < Mask.size(); ++i) {
2309 if (Mask[i] == -1)
2310 continue;
2311 MaskImm |= Mask[i] << (i * 2);
2312 }
2313
2314 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2315 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2316}
2317
2318/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2320 MVT VT, SDValue V1, SelectionDAG &DAG,
2321 const LoongArchSubtarget &Subtarget) {
2322 // LoongArch LASX only have XVPERM_W.
2323 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2324 return SDValue();
2325
2326 unsigned NumElts = VT.getVectorNumElements();
2327 unsigned HalfSize = NumElts / 2;
2328 bool FrontLo = true, FrontHi = true;
2329 bool BackLo = true, BackHi = true;
2330
2331 auto inRange = [](int val, int low, int high) {
2332 return (val == -1) || (val >= low && val < high);
2333 };
2334
2335 for (unsigned i = 0; i < HalfSize; ++i) {
2336 int Fronti = Mask[i];
2337 int Backi = Mask[i + HalfSize];
2338
2339 FrontLo &= inRange(Fronti, 0, HalfSize);
2340 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2341 BackLo &= inRange(Backi, 0, HalfSize);
2342 BackHi &= inRange(Backi, HalfSize, NumElts);
2343 }
2344
2345 // If both the lower and upper 128-bit parts access only one half of the
2346 // vector (either lower or upper), avoid using xvperm.w. The latency of
2347 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2348 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2349 return SDValue();
2350
2352 MVT GRLenVT = Subtarget.getGRLenVT();
2353 for (unsigned i = 0; i < NumElts; ++i)
2354 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2355 : DAG.getConstant(Mask[i], DL, GRLenVT));
2356 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2357
2358 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2359}
2360
2361/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2363 MVT VT, SDValue V1, SDValue V2,
2364 SelectionDAG &DAG) {
2365 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2366}
2367
2368/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2370 MVT VT, SDValue V1, SDValue V2,
2371 SelectionDAG &DAG) {
2372 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2373}
2374
2375/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2377 MVT VT, SDValue V1, SDValue V2,
2378 SelectionDAG &DAG) {
2379
2380 const auto &Begin = Mask.begin();
2381 const auto &End = Mask.end();
2382 unsigned HalfSize = Mask.size() / 2;
2383 unsigned LeftSize = HalfSize / 2;
2384 SDValue OriV1 = V1, OriV2 = V2;
2385
2386 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2387 1) &&
2388 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2389 V1 = OriV1;
2390 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2391 Mask.size() + HalfSize - LeftSize, 1) &&
2392 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2393 Mask.size() + HalfSize + LeftSize, 1))
2394 V1 = OriV2;
2395 else
2396 return SDValue();
2397
2398 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2399 1) &&
2400 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2401 1))
2402 V2 = OriV1;
2403 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2404 Mask.size() + HalfSize - LeftSize, 1) &&
2405 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2406 Mask.size() + HalfSize + LeftSize, 1))
2407 V2 = OriV2;
2408 else
2409 return SDValue();
2410
2411 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2412}
2413
2414/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2416 MVT VT, SDValue V1, SDValue V2,
2417 SelectionDAG &DAG) {
2418
2419 const auto &Begin = Mask.begin();
2420 const auto &End = Mask.end();
2421 unsigned HalfSize = Mask.size() / 2;
2422 SDValue OriV1 = V1, OriV2 = V2;
2423
2424 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2425 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2426 V1 = OriV1;
2427 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2428 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2429 Mask.size() + HalfSize, 1))
2430 V1 = OriV2;
2431 else
2432 return SDValue();
2433
2434 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2435 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2436 V2 = OriV1;
2437 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2438 1) &&
2439 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2440 Mask.size() + HalfSize, 1))
2441 V2 = OriV2;
2442 else
2443 return SDValue();
2444
2445 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2446}
2447
2448/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2450 MVT VT, SDValue V1, SDValue V2,
2451 SelectionDAG &DAG) {
2452
2453 const auto &Begin = Mask.begin();
2454 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2455 const auto &Mid = Mask.begin() + Mask.size() / 2;
2456 const auto &RightMid = Mask.end() - Mask.size() / 4;
2457 const auto &End = Mask.end();
2458 unsigned HalfSize = Mask.size() / 2;
2459 SDValue OriV1 = V1, OriV2 = V2;
2460
2461 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2462 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2463 V1 = OriV1;
2464 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2465 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2466 V1 = OriV2;
2467 else
2468 return SDValue();
2469
2470 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2471 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2472 V2 = OriV1;
2473 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2474 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2475 V2 = OriV2;
2476
2477 else
2478 return SDValue();
2479
2480 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2481}
2482
2483/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2485 MVT VT, SDValue V1, SDValue V2,
2486 SelectionDAG &DAG) {
2487
2488 const auto &Begin = Mask.begin();
2489 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2490 const auto &Mid = Mask.begin() + Mask.size() / 2;
2491 const auto &RightMid = Mask.end() - Mask.size() / 4;
2492 const auto &End = Mask.end();
2493 unsigned HalfSize = Mask.size() / 2;
2494 SDValue OriV1 = V1, OriV2 = V2;
2495
2496 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2497 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2498 V1 = OriV1;
2499 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2500 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2501 2))
2502 V1 = OriV2;
2503 else
2504 return SDValue();
2505
2506 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2507 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2508 V2 = OriV1;
2509 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2510 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2511 2))
2512 V2 = OriV2;
2513 else
2514 return SDValue();
2515
2516 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2517}
2518
2519/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2520static SDValue
2522 SDValue V1, SDValue V2, SelectionDAG &DAG,
2523 const LoongArchSubtarget &Subtarget) {
2524 // LoongArch LASX only supports xvinsve0.{w/d}.
2525 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2526 VT != MVT::v4f64)
2527 return SDValue();
2528
2529 MVT GRLenVT = Subtarget.getGRLenVT();
2530 int MaskSize = Mask.size();
2531 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2532
2533 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2534 // all other elements are either 'Base + i' or undef (-1). On success, return
2535 // the index of the replaced element. Otherwise, just return -1.
2536 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2537 int Idx = -1;
2538 for (int i = 0; i < MaskSize; ++i) {
2539 if (Mask[i] == Base + i || Mask[i] == -1)
2540 continue;
2541 if (Mask[i] != Replaced)
2542 return -1;
2543 if (Idx == -1)
2544 Idx = i;
2545 else
2546 return -1;
2547 }
2548 return Idx;
2549 };
2550
2551 // Case 1: the lowest element of V2 replaces one element in V1.
2552 int Idx = checkReplaceOne(0, MaskSize);
2553 if (Idx != -1)
2554 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2555 DAG.getConstant(Idx, DL, GRLenVT));
2556
2557 // Case 2: the lowest element of V1 replaces one element in V2.
2558 Idx = checkReplaceOne(MaskSize, 0);
2559 if (Idx != -1)
2560 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2561 DAG.getConstant(Idx, DL, GRLenVT));
2562
2563 return SDValue();
2564}
2565
2566/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2568 MVT VT, SDValue V1, SDValue V2,
2569 SelectionDAG &DAG) {
2570
2571 int MaskSize = Mask.size();
2572 int HalfSize = Mask.size() / 2;
2573 const auto &Begin = Mask.begin();
2574 const auto &Mid = Mask.begin() + HalfSize;
2575 const auto &End = Mask.end();
2576
2577 // VECTOR_SHUFFLE concatenates the vectors:
2578 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2579 // shuffling ->
2580 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2581 //
2582 // XVSHUF concatenates the vectors:
2583 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2584 // shuffling ->
2585 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2586 SmallVector<SDValue, 8> MaskAlloc;
2587 for (auto it = Begin; it < Mid; it++) {
2588 if (*it < 0) // UNDEF
2589 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2590 else if ((*it >= 0 && *it < HalfSize) ||
2591 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2592 int M = *it < HalfSize ? *it : *it - HalfSize;
2593 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2594 } else
2595 return SDValue();
2596 }
2597 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2598
2599 for (auto it = Mid; it < End; it++) {
2600 if (*it < 0) // UNDEF
2601 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2602 else if ((*it >= HalfSize && *it < MaskSize) ||
2603 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2604 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2605 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2606 } else
2607 return SDValue();
2608 }
2609 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2610
2611 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2612 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2613 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2614}
2615
2616/// Shuffle vectors by lane to generate more optimized instructions.
2617/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2618///
2619/// Therefore, except for the following four cases, other cases are regarded
2620/// as cross-lane shuffles, where optimization is relatively limited.
2621///
2622/// - Shuffle high, low lanes of two inputs vector
2623/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2624/// - Shuffle low, high lanes of two inputs vector
2625/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2626/// - Shuffle low, low lanes of two inputs vector
2627/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2628/// - Shuffle high, high lanes of two inputs vector
2629/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2630///
2631/// The first case is the closest to LoongArch instructions and the other
2632/// cases need to be converted to it for processing.
2633///
2634/// This function will return true for the last three cases above and will
2635/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2636/// cross-lane shuffle cases.
2638 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2639 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2640
2641 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2642
2643 int MaskSize = Mask.size();
2644 int HalfSize = Mask.size() / 2;
2645 MVT GRLenVT = Subtarget.getGRLenVT();
2646
2647 HalfMaskType preMask = None, postMask = None;
2648
2649 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2650 return M < 0 || (M >= 0 && M < HalfSize) ||
2651 (M >= MaskSize && M < MaskSize + HalfSize);
2652 }))
2653 preMask = HighLaneTy;
2654 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2655 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2656 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2657 }))
2658 preMask = LowLaneTy;
2659
2660 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2661 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2662 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2663 }))
2664 postMask = LowLaneTy;
2665 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2666 return M < 0 || (M >= 0 && M < HalfSize) ||
2667 (M >= MaskSize && M < MaskSize + HalfSize);
2668 }))
2669 postMask = HighLaneTy;
2670
2671 // The pre-half of mask is high lane type, and the post-half of mask
2672 // is low lane type, which is closest to the LoongArch instructions.
2673 //
2674 // Note: In the LoongArch architecture, the high lane of mask corresponds
2675 // to the lower 128-bit of vector register, and the low lane of mask
2676 // corresponds the higher 128-bit of vector register.
2677 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2678 return false;
2679 }
2680 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2681 V1 = DAG.getBitcast(MVT::v4i64, V1);
2682 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2683 DAG.getConstant(0b01001110, DL, GRLenVT));
2684 V1 = DAG.getBitcast(VT, V1);
2685
2686 if (!V2.isUndef()) {
2687 V2 = DAG.getBitcast(MVT::v4i64, V2);
2688 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2689 DAG.getConstant(0b01001110, DL, GRLenVT));
2690 V2 = DAG.getBitcast(VT, V2);
2691 }
2692
2693 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2694 *it = *it < 0 ? *it : *it - HalfSize;
2695 }
2696 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2697 *it = *it < 0 ? *it : *it + HalfSize;
2698 }
2699 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2700 V1 = DAG.getBitcast(MVT::v4i64, V1);
2701 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2702 DAG.getConstant(0b11101110, DL, GRLenVT));
2703 V1 = DAG.getBitcast(VT, V1);
2704
2705 if (!V2.isUndef()) {
2706 V2 = DAG.getBitcast(MVT::v4i64, V2);
2707 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2708 DAG.getConstant(0b11101110, DL, GRLenVT));
2709 V2 = DAG.getBitcast(VT, V2);
2710 }
2711
2712 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2713 *it = *it < 0 ? *it : *it - HalfSize;
2714 }
2715 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2716 V1 = DAG.getBitcast(MVT::v4i64, V1);
2717 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2718 DAG.getConstant(0b01000100, DL, GRLenVT));
2719 V1 = DAG.getBitcast(VT, V1);
2720
2721 if (!V2.isUndef()) {
2722 V2 = DAG.getBitcast(MVT::v4i64, V2);
2723 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2724 DAG.getConstant(0b01000100, DL, GRLenVT));
2725 V2 = DAG.getBitcast(VT, V2);
2726 }
2727
2728 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2729 *it = *it < 0 ? *it : *it + HalfSize;
2730 }
2731 } else { // cross-lane
2732 return false;
2733 }
2734
2735 return true;
2736}
2737
2738/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2739/// Only for 256-bit vector.
2740///
2741/// For example:
2742/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2743/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2744/// is lowerded to:
2745/// (XVPERMI $xr2, $xr0, 78)
2746/// (XVSHUF $xr1, $xr2, $xr0)
2747/// (XVORI $xr0, $xr1, 0)
2749 ArrayRef<int> Mask,
2750 MVT VT, SDValue V1,
2751 SDValue V2,
2752 SelectionDAG &DAG) {
2753 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2754 int Size = Mask.size();
2755 int LaneSize = Size / 2;
2756
2757 bool LaneCrossing[2] = {false, false};
2758 for (int i = 0; i < Size; ++i)
2759 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2760 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2761
2762 // Ensure that all lanes ared involved.
2763 if (!LaneCrossing[0] && !LaneCrossing[1])
2764 return SDValue();
2765
2766 SmallVector<int> InLaneMask;
2767 InLaneMask.assign(Mask.begin(), Mask.end());
2768 for (int i = 0; i < Size; ++i) {
2769 int &M = InLaneMask[i];
2770 if (M < 0)
2771 continue;
2772 if (((M % Size) / LaneSize) != (i / LaneSize))
2773 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2774 }
2775
2776 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2777 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2778 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2779 Flipped = DAG.getBitcast(VT, Flipped);
2780 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2781}
2782
2783/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2784///
2785/// This routine breaks down the specific type of 256-bit shuffle and
2786/// dispatches to the lowering routines accordingly.
2788 SDValue V1, SDValue V2, SelectionDAG &DAG,
2789 const LoongArchSubtarget &Subtarget) {
2790 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2791 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2792 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2793 "Vector type is unsupported for lasx!");
2795 "Two operands have different types!");
2796 assert(VT.getVectorNumElements() == Mask.size() &&
2797 "Unexpected mask size for shuffle!");
2798 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2799 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2800
2801 APInt KnownUndef, KnownZero;
2802 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2803 APInt Zeroable = KnownUndef | KnownZero;
2804
2805 SDValue Result;
2806 // TODO: Add more comparison patterns.
2807 if (V2.isUndef()) {
2808 if ((Result =
2809 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2810 return Result;
2811 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2812 Subtarget)))
2813 return Result;
2814 // Try to widen vectors to gain more optimization opportunities.
2815 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2816 return NewShuffle;
2817 if ((Result =
2818 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2819 return Result;
2820 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2821 return Result;
2822 if ((Result =
2823 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2824 return Result;
2825
2826 // TODO: This comment may be enabled in the future to better match the
2827 // pattern for instruction selection.
2828 /* V2 = V1; */
2829 }
2830
2831 // It is recommended not to change the pattern comparison order for better
2832 // performance.
2833 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2834 return Result;
2835 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2836 return Result;
2837 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2838 return Result;
2839 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2840 return Result;
2841 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2842 return Result;
2843 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2844 return Result;
2845 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2846 Zeroable)))
2847 return Result;
2848 if ((Result =
2849 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2850 return Result;
2851 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2852 Subtarget)))
2853 return Result;
2854
2855 // canonicalize non cross-lane shuffle vector
2856 SmallVector<int> NewMask(Mask);
2857 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2858 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2859
2860 // FIXME: Handling the remaining cases earlier can degrade performance
2861 // in some situations. Further analysis is required to enable more
2862 // effective optimizations.
2863 if (V2.isUndef()) {
2864 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2865 V1, V2, DAG)))
2866 return Result;
2867 }
2868
2869 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2870 return NewShuffle;
2871 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2872 return Result;
2873
2874 return SDValue();
2875}
2876
2877SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2878 SelectionDAG &DAG) const {
2879 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2880 ArrayRef<int> OrigMask = SVOp->getMask();
2881 SDValue V1 = Op.getOperand(0);
2882 SDValue V2 = Op.getOperand(1);
2883 MVT VT = Op.getSimpleValueType();
2884 int NumElements = VT.getVectorNumElements();
2885 SDLoc DL(Op);
2886
2887 bool V1IsUndef = V1.isUndef();
2888 bool V2IsUndef = V2.isUndef();
2889 if (V1IsUndef && V2IsUndef)
2890 return DAG.getUNDEF(VT);
2891
2892 // When we create a shuffle node we put the UNDEF node to second operand,
2893 // but in some cases the first operand may be transformed to UNDEF.
2894 // In this case we should just commute the node.
2895 if (V1IsUndef)
2896 return DAG.getCommutedVectorShuffle(*SVOp);
2897
2898 // Check for non-undef masks pointing at an undef vector and make the masks
2899 // undef as well. This makes it easier to match the shuffle based solely on
2900 // the mask.
2901 if (V2IsUndef &&
2902 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2903 SmallVector<int, 8> NewMask(OrigMask);
2904 for (int &M : NewMask)
2905 if (M >= NumElements)
2906 M = -1;
2907 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2908 }
2909
2910 // Check for illegal shuffle mask element index values.
2911 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2912 (void)MaskUpperLimit;
2913 assert(llvm::all_of(OrigMask,
2914 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2915 "Out of bounds shuffle index");
2916
2917 // For each vector width, delegate to a specialized lowering routine.
2918 if (VT.is128BitVector())
2919 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2920
2921 if (VT.is256BitVector())
2922 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2923
2924 return SDValue();
2925}
2926
2927SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2928 SelectionDAG &DAG) const {
2929 // Custom lower to ensure the libcall return is passed in an FPR on hard
2930 // float ABIs.
2931 SDLoc DL(Op);
2932 MakeLibCallOptions CallOptions;
2933 SDValue Op0 = Op.getOperand(0);
2934 SDValue Chain = SDValue();
2935 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2936 SDValue Res;
2937 std::tie(Res, Chain) =
2938 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2939 if (Subtarget.is64Bit())
2940 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2941 return DAG.getBitcast(MVT::i32, Res);
2942}
2943
2944SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2945 SelectionDAG &DAG) const {
2946 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2947 // float ABIs.
2948 SDLoc DL(Op);
2949 MakeLibCallOptions CallOptions;
2950 SDValue Op0 = Op.getOperand(0);
2951 SDValue Chain = SDValue();
2952 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2953 DL, MVT::f32, Op0)
2954 : DAG.getBitcast(MVT::f32, Op0);
2955 SDValue Res;
2956 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2957 CallOptions, DL, Chain);
2958 return Res;
2959}
2960
2961SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2962 SelectionDAG &DAG) const {
2963 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2964 SDLoc DL(Op);
2965 MakeLibCallOptions CallOptions;
2966 RTLIB::Libcall LC =
2967 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2968 SDValue Res =
2969 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2970 if (Subtarget.is64Bit())
2971 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2972 return DAG.getBitcast(MVT::i32, Res);
2973}
2974
2975SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2976 SelectionDAG &DAG) const {
2977 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2978 MVT VT = Op.getSimpleValueType();
2979 SDLoc DL(Op);
2980 Op = DAG.getNode(
2981 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2982 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2983 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2984 DL, MVT::f32, Op)
2985 : DAG.getBitcast(MVT::f32, Op);
2986 if (VT != MVT::f32)
2987 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2988 return Res;
2989}
2990
2991// Lower BUILD_VECTOR as broadcast load (if possible).
2992// For example:
2993// %a = load i8, ptr %ptr
2994// %b = build_vector %a, %a, %a, %a
2995// is lowered to :
2996// (VLDREPL_B $a0, 0)
2998 const SDLoc &DL,
2999 SelectionDAG &DAG) {
3000 MVT VT = BVOp->getSimpleValueType(0);
3001 int NumOps = BVOp->getNumOperands();
3002
3003 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3004 "Unsupported vector type for broadcast.");
3005
3006 SDValue IdentitySrc;
3007 bool IsIdeneity = true;
3008
3009 for (int i = 0; i != NumOps; i++) {
3010 SDValue Op = BVOp->getOperand(i);
3011 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3012 IsIdeneity = false;
3013 break;
3014 }
3015 IdentitySrc = BVOp->getOperand(0);
3016 }
3017
3018 // make sure that this load is valid and only has one user.
3019 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3020 return SDValue();
3021
3022 auto *LN = cast<LoadSDNode>(IdentitySrc);
3023 auto ExtType = LN->getExtensionType();
3024
3025 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3026 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3027 // Indexed loads and stores are not supported on LoongArch.
3028 assert(LN->isUnindexed() && "Unexpected indexed load.");
3029
3030 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3031 // The offset operand of unindexed load is always undefined, so there is
3032 // no need to pass it to VLDREPL.
3033 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3034 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3035 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3036 return BCast;
3037 }
3038 return SDValue();
3039}
3040
3041// Sequentially insert elements from Ops into Vector, from low to high indices.
3042// Note: Ops can have fewer elements than Vector.
3044 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3045 EVT ResTy) {
3046 assert(Ops.size() <= ResTy.getVectorNumElements());
3047
3048 SDValue Op0 = Ops[0];
3049 if (!Op0.isUndef())
3050 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3051 for (unsigned i = 1; i < Ops.size(); ++i) {
3052 SDValue Opi = Ops[i];
3053 if (Opi.isUndef())
3054 continue;
3055 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3056 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3057 }
3058}
3059
3060// Build a ResTy subvector from Node, taking NumElts elements starting at index
3061// 'first'.
3063 SelectionDAG &DAG, SDLoc DL,
3064 const LoongArchSubtarget &Subtarget,
3065 EVT ResTy, unsigned first) {
3066 unsigned NumElts = ResTy.getVectorNumElements();
3067
3068 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3069
3070 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3071 Node->op_begin() + first + NumElts);
3072 SDValue Vector = DAG.getUNDEF(ResTy);
3073 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3074 return Vector;
3075}
3076
3077SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3078 SelectionDAG &DAG) const {
3079 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3080 MVT VT = Node->getSimpleValueType(0);
3081 EVT ResTy = Op->getValueType(0);
3082 unsigned NumElts = ResTy.getVectorNumElements();
3083 SDLoc DL(Op);
3084 APInt SplatValue, SplatUndef;
3085 unsigned SplatBitSize;
3086 bool HasAnyUndefs;
3087 bool IsConstant = false;
3088 bool UseSameConstant = true;
3089 SDValue ConstantValue;
3090 bool Is128Vec = ResTy.is128BitVector();
3091 bool Is256Vec = ResTy.is256BitVector();
3092
3093 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3094 (!Subtarget.hasExtLASX() || !Is256Vec))
3095 return SDValue();
3096
3097 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3098 return Result;
3099
3100 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3101 /*MinSplatBits=*/8) &&
3102 SplatBitSize <= 64) {
3103 // We can only cope with 8, 16, 32, or 64-bit elements.
3104 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3105 SplatBitSize != 64)
3106 return SDValue();
3107
3108 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3109 // We can only handle 64-bit elements that are within
3110 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3111 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3112 if (!SplatValue.isSignedIntN(10) &&
3113 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3114 return SDValue();
3115 if ((Is128Vec && ResTy == MVT::v4i32) ||
3116 (Is256Vec && ResTy == MVT::v8i32))
3117 return Op;
3118 }
3119
3120 EVT ViaVecTy;
3121
3122 switch (SplatBitSize) {
3123 default:
3124 return SDValue();
3125 case 8:
3126 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3127 break;
3128 case 16:
3129 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3130 break;
3131 case 32:
3132 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3133 break;
3134 case 64:
3135 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3136 break;
3137 }
3138
3139 // SelectionDAG::getConstant will promote SplatValue appropriately.
3140 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3141
3142 // Bitcast to the type we originally wanted.
3143 if (ViaVecTy != ResTy)
3144 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3145
3146 return Result;
3147 }
3148
3149 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3150 return Op;
3151
3152 for (unsigned i = 0; i < NumElts; ++i) {
3153 SDValue Opi = Node->getOperand(i);
3154 if (isIntOrFPConstant(Opi)) {
3155 IsConstant = true;
3156 if (!ConstantValue.getNode())
3157 ConstantValue = Opi;
3158 else if (ConstantValue != Opi)
3159 UseSameConstant = false;
3160 }
3161 }
3162
3163 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3164 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3165 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3166 for (unsigned i = 0; i < NumElts; ++i) {
3167 SDValue Opi = Node->getOperand(i);
3168 if (!isIntOrFPConstant(Opi))
3169 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3170 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3171 }
3172 return Result;
3173 }
3174
3175 if (!IsConstant) {
3176 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3177 // the sub-sequence of the vector and then broadcast the sub-sequence.
3178 //
3179 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3180 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3181 // generates worse code in some cases. This could be further optimized
3182 // with more consideration.
3184 BitVector UndefElements;
3185 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3186 UndefElements.count() == 0) {
3187 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3188 // because the high part can be simply treated as undef.
3189 SDValue Vector = DAG.getUNDEF(ResTy);
3190 EVT FillTy = Is256Vec
3192 : ResTy;
3193 SDValue FillVec =
3194 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3195
3196 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3197
3198 unsigned SeqLen = Sequence.size();
3199 unsigned SplatLen = NumElts / SeqLen;
3200 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3201 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3202
3203 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3204 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3205 if (SplatEltTy == MVT::i128)
3206 SplatTy = MVT::v4i64;
3207
3208 SDValue SplatVec;
3209 SDValue SrcVec = DAG.getBitcast(
3210 SplatTy,
3211 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3212 if (Is256Vec) {
3213 SplatVec =
3214 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3215 : LoongArchISD::XVREPLVE0,
3216 DL, SplatTy, SrcVec);
3217 } else {
3218 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3219 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3220 }
3221
3222 return DAG.getBitcast(ResTy, SplatVec);
3223 }
3224
3225 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3226 // using memory operations is much lower.
3227 //
3228 // For 256-bit vectors, normally split into two halves and concatenate.
3229 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3230 // one non-undef element, skip spliting to avoid a worse result.
3231 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3232 ResTy == MVT::v4f64) {
3233 unsigned NonUndefCount = 0;
3234 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3235 if (!Node->getOperand(i).isUndef()) {
3236 ++NonUndefCount;
3237 if (NonUndefCount > 1)
3238 break;
3239 }
3240 }
3241 if (NonUndefCount == 1)
3242 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3243 }
3244
3245 EVT VecTy =
3246 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3247 SDValue Vector =
3248 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3249
3250 if (Is128Vec)
3251 return Vector;
3252
3253 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3254 VecTy, NumElts / 2);
3255
3256 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3257 }
3258
3259 return SDValue();
3260}
3261
3262SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3263 SelectionDAG &DAG) const {
3264 SDLoc DL(Op);
3265 MVT ResVT = Op.getSimpleValueType();
3266 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3267
3268 unsigned NumOperands = Op.getNumOperands();
3269 unsigned NumFreezeUndef = 0;
3270 unsigned NumZero = 0;
3271 unsigned NumNonZero = 0;
3272 unsigned NonZeros = 0;
3273 SmallSet<SDValue, 4> Undefs;
3274 for (unsigned i = 0; i != NumOperands; ++i) {
3275 SDValue SubVec = Op.getOperand(i);
3276 if (SubVec.isUndef())
3277 continue;
3278 if (ISD::isFreezeUndef(SubVec.getNode())) {
3279 // If the freeze(undef) has multiple uses then we must fold to zero.
3280 if (SubVec.hasOneUse()) {
3281 ++NumFreezeUndef;
3282 } else {
3283 ++NumZero;
3284 Undefs.insert(SubVec);
3285 }
3286 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3287 ++NumZero;
3288 else {
3289 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3290 NonZeros |= 1 << i;
3291 ++NumNonZero;
3292 }
3293 }
3294
3295 // If we have more than 2 non-zeros, build each half separately.
3296 if (NumNonZero > 2) {
3297 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3298 ArrayRef<SDUse> Ops = Op->ops();
3299 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3300 Ops.slice(0, NumOperands / 2));
3301 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3302 Ops.slice(NumOperands / 2));
3303 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3304 }
3305
3306 // Otherwise, build it up through insert_subvectors.
3307 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3308 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3309 : DAG.getUNDEF(ResVT));
3310
3311 // Replace Undef operands with ZeroVector.
3312 for (SDValue U : Undefs)
3313 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3314
3315 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3316 unsigned NumSubElems = SubVT.getVectorNumElements();
3317 for (unsigned i = 0; i != NumOperands; ++i) {
3318 if ((NonZeros & (1 << i)) == 0)
3319 continue;
3320
3321 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3322 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3323 }
3324
3325 return Vec;
3326}
3327
3328SDValue
3329LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3330 SelectionDAG &DAG) const {
3331 MVT EltVT = Op.getSimpleValueType();
3332 SDValue Vec = Op->getOperand(0);
3333 EVT VecTy = Vec->getValueType(0);
3334 SDValue Idx = Op->getOperand(1);
3335 SDLoc DL(Op);
3336 MVT GRLenVT = Subtarget.getGRLenVT();
3337
3338 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3339
3340 if (isa<ConstantSDNode>(Idx))
3341 return Op;
3342
3343 switch (VecTy.getSimpleVT().SimpleTy) {
3344 default:
3345 llvm_unreachable("Unexpected type");
3346 case MVT::v32i8:
3347 case MVT::v16i16:
3348 case MVT::v4i64:
3349 case MVT::v4f64: {
3350 // Extract the high half subvector and place it to the low half of a new
3351 // vector. It doesn't matter what the high half of the new vector is.
3352 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3353 SDValue VecHi =
3354 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3355 SDValue TmpVec =
3356 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3357 VecHi, DAG.getConstant(0, DL, GRLenVT));
3358
3359 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3360 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3361 // desired element.
3362 SDValue IdxCp =
3363 Subtarget.is64Bit()
3364 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3365 : DAG.getBitcast(MVT::f32, Idx);
3366 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3367 SDValue MaskVec =
3368 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3369 SDValue ResVec =
3370 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3371
3372 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3373 DAG.getConstant(0, DL, GRLenVT));
3374 }
3375 case MVT::v8i32:
3376 case MVT::v8f32: {
3377 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3378 SDValue SplatValue =
3379 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3380
3381 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3382 DAG.getConstant(0, DL, GRLenVT));
3383 }
3384 }
3385}
3386
3387SDValue
3388LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3389 SelectionDAG &DAG) const {
3390 MVT VT = Op.getSimpleValueType();
3391 MVT EltVT = VT.getVectorElementType();
3392 unsigned NumElts = VT.getVectorNumElements();
3393 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3394 SDLoc DL(Op);
3395 SDValue Op0 = Op.getOperand(0);
3396 SDValue Op1 = Op.getOperand(1);
3397 SDValue Op2 = Op.getOperand(2);
3398
3399 if (isa<ConstantSDNode>(Op2))
3400 return Op;
3401
3402 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3403 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3404
3405 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3406 return SDValue();
3407
3408 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3409 SmallVector<SDValue, 32> RawIndices;
3410 SDValue SplatIdx;
3411 SDValue Indices;
3412
3413 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3414 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3415 for (unsigned i = 0; i < NumElts; ++i) {
3416 RawIndices.push_back(Op2);
3417 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3418 }
3419 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3420 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3421
3422 RawIndices.clear();
3423 for (unsigned i = 0; i < NumElts; ++i) {
3424 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3425 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3426 }
3427 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3428 Indices = DAG.getBitcast(IdxVTy, Indices);
3429 } else {
3430 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3431
3432 for (unsigned i = 0; i < NumElts; ++i)
3433 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3434 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3435 }
3436
3437 // insert vec, elt, idx
3438 // =>
3439 // select (splatidx == {0,1,2...}) ? splatelt : vec
3440 SDValue SelectCC =
3441 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3442 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3443}
3444
3445SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3446 SelectionDAG &DAG) const {
3447 SDLoc DL(Op);
3448 SyncScope::ID FenceSSID =
3449 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3450
3451 // singlethread fences only synchronize with signal handlers on the same
3452 // thread and thus only need to preserve instruction order, not actually
3453 // enforce memory ordering.
3454 if (FenceSSID == SyncScope::SingleThread)
3455 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3456 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3457
3458 return Op;
3459}
3460
3461SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3462 SelectionDAG &DAG) const {
3463
3464 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3465 DAG.getContext()->emitError(
3466 "On LA64, only 64-bit registers can be written.");
3467 return Op.getOperand(0);
3468 }
3469
3470 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3471 DAG.getContext()->emitError(
3472 "On LA32, only 32-bit registers can be written.");
3473 return Op.getOperand(0);
3474 }
3475
3476 return Op;
3477}
3478
3479SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3480 SelectionDAG &DAG) const {
3481 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3482 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3483 "be a constant integer");
3484 return SDValue();
3485 }
3486
3487 MachineFunction &MF = DAG.getMachineFunction();
3489 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3490 EVT VT = Op.getValueType();
3491 SDLoc DL(Op);
3492 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3493 unsigned Depth = Op.getConstantOperandVal(0);
3494 int GRLenInBytes = Subtarget.getGRLen() / 8;
3495
3496 while (Depth--) {
3497 int Offset = -(GRLenInBytes * 2);
3498 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3499 DAG.getSignedConstant(Offset, DL, VT));
3500 FrameAddr =
3501 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3502 }
3503 return FrameAddr;
3504}
3505
3506SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3507 SelectionDAG &DAG) const {
3508 // Currently only support lowering return address for current frame.
3509 if (Op.getConstantOperandVal(0) != 0) {
3510 DAG.getContext()->emitError(
3511 "return address can only be determined for the current frame");
3512 return SDValue();
3513 }
3514
3515 MachineFunction &MF = DAG.getMachineFunction();
3517 MVT GRLenVT = Subtarget.getGRLenVT();
3518
3519 // Return the value of the return address register, marking it an implicit
3520 // live-in.
3521 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3522 getRegClassFor(GRLenVT));
3523 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3524}
3525
3526SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3527 SelectionDAG &DAG) const {
3528 MachineFunction &MF = DAG.getMachineFunction();
3529 auto Size = Subtarget.getGRLen() / 8;
3530 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3531 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3532}
3533
3534SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3535 SelectionDAG &DAG) const {
3536 MachineFunction &MF = DAG.getMachineFunction();
3537 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3538
3539 SDLoc DL(Op);
3540 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3542
3543 // vastart just stores the address of the VarArgsFrameIndex slot into the
3544 // memory location argument.
3545 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3546 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3547 MachinePointerInfo(SV));
3548}
3549
3550SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3551 SelectionDAG &DAG) const {
3552 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3553 !Subtarget.hasBasicD() && "unexpected target features");
3554
3555 SDLoc DL(Op);
3556 SDValue Op0 = Op.getOperand(0);
3557 if (Op0->getOpcode() == ISD::AND) {
3558 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3559 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3560 return Op;
3561 }
3562
3563 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3564 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3565 Op0.getConstantOperandVal(2) == UINT64_C(0))
3566 return Op;
3567
3568 if (Op0.getOpcode() == ISD::AssertZext &&
3569 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3570 return Op;
3571
3572 EVT OpVT = Op0.getValueType();
3573 EVT RetVT = Op.getValueType();
3574 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3575 MakeLibCallOptions CallOptions;
3576 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3577 SDValue Chain = SDValue();
3579 std::tie(Result, Chain) =
3580 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3581 return Result;
3582}
3583
3584SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3585 SelectionDAG &DAG) const {
3586 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3587 !Subtarget.hasBasicD() && "unexpected target features");
3588
3589 SDLoc DL(Op);
3590 SDValue Op0 = Op.getOperand(0);
3591
3592 if ((Op0.getOpcode() == ISD::AssertSext ||
3594 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3595 return Op;
3596
3597 EVT OpVT = Op0.getValueType();
3598 EVT RetVT = Op.getValueType();
3599 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3600 MakeLibCallOptions CallOptions;
3601 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3602 SDValue Chain = SDValue();
3604 std::tie(Result, Chain) =
3605 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3606 return Result;
3607}
3608
3609SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3610 SelectionDAG &DAG) const {
3611
3612 SDLoc DL(Op);
3613 EVT VT = Op.getValueType();
3614 SDValue Op0 = Op.getOperand(0);
3615 EVT Op0VT = Op0.getValueType();
3616
3617 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3618 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3619 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3620 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3621 }
3622 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3623 SDValue Lo, Hi;
3624 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3625 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3626 }
3627 return Op;
3628}
3629
3630SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3631 SelectionDAG &DAG) const {
3632
3633 SDLoc DL(Op);
3634 SDValue Op0 = Op.getOperand(0);
3635
3636 if (Op0.getValueType() == MVT::f16)
3637 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3638
3639 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3640 !Subtarget.hasBasicD()) {
3641 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3642 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3643 }
3644
3645 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3646 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3647 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3648}
3649
3651 SelectionDAG &DAG, unsigned Flags) {
3652 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3653}
3654
3656 SelectionDAG &DAG, unsigned Flags) {
3657 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3658 Flags);
3659}
3660
3662 SelectionDAG &DAG, unsigned Flags) {
3663 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3664 N->getOffset(), Flags);
3665}
3666
3668 SelectionDAG &DAG, unsigned Flags) {
3669 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3670}
3671
3672template <class NodeTy>
3673SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3675 bool IsLocal) const {
3676 SDLoc DL(N);
3677 EVT Ty = getPointerTy(DAG.getDataLayout());
3678 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3679 SDValue Load;
3680
3681 switch (M) {
3682 default:
3683 report_fatal_error("Unsupported code model");
3684
3685 case CodeModel::Large: {
3686 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3687
3688 // This is not actually used, but is necessary for successfully matching
3689 // the PseudoLA_*_LARGE nodes.
3690 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3691 if (IsLocal) {
3692 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3693 // eventually becomes the desired 5-insn code sequence.
3694 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3695 Tmp, Addr),
3696 0);
3697 } else {
3698 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3699 // eventually becomes the desired 5-insn code sequence.
3700 Load = SDValue(
3701 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3702 0);
3703 }
3704 break;
3705 }
3706
3707 case CodeModel::Small:
3708 case CodeModel::Medium:
3709 if (IsLocal) {
3710 // This generates the pattern (PseudoLA_PCREL sym), which
3711 //
3712 // for la32r expands to:
3713 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3714 //
3715 // for la32s and la64 expands to:
3716 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3717 Load = SDValue(
3718 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3719 } else {
3720 // This generates the pattern (PseudoLA_GOT sym), which
3721 //
3722 // for la32r expands to:
3723 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3724 //
3725 // for la32s and la64 expands to:
3726 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3727 Load =
3728 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3729 }
3730 }
3731
3732 if (!IsLocal) {
3733 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3734 MachineFunction &MF = DAG.getMachineFunction();
3735 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3739 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3740 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3741 }
3742
3743 return Load;
3744}
3745
3746SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3747 SelectionDAG &DAG) const {
3748 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3749 DAG.getTarget().getCodeModel());
3750}
3751
3752SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3753 SelectionDAG &DAG) const {
3754 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3755 DAG.getTarget().getCodeModel());
3756}
3757
3758SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3759 SelectionDAG &DAG) const {
3760 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3761 DAG.getTarget().getCodeModel());
3762}
3763
3764SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3765 SelectionDAG &DAG) const {
3766 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3767 assert(N->getOffset() == 0 && "unexpected offset in global node");
3768 auto CM = DAG.getTarget().getCodeModel();
3769 const GlobalValue *GV = N->getGlobal();
3770
3771 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3772 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3773 CM = *GCM;
3774 }
3775
3776 return getAddr(N, DAG, CM, GV->isDSOLocal());
3777}
3778
3779SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3780 SelectionDAG &DAG,
3781 unsigned Opc, bool UseGOT,
3782 bool Large) const {
3783 SDLoc DL(N);
3784 EVT Ty = getPointerTy(DAG.getDataLayout());
3785 MVT GRLenVT = Subtarget.getGRLenVT();
3786
3787 // This is not actually used, but is necessary for successfully matching the
3788 // PseudoLA_*_LARGE nodes.
3789 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3790 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3791
3792 // Only IE needs an extra argument for large code model.
3793 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3794 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3795 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3796
3797 // If it is LE for normal/medium code model, the add tp operation will occur
3798 // during the pseudo-instruction expansion.
3799 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3800 return Offset;
3801
3802 if (UseGOT) {
3803 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3804 MachineFunction &MF = DAG.getMachineFunction();
3805 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3809 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3810 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3811 }
3812
3813 // Add the thread pointer.
3814 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3815 DAG.getRegister(LoongArch::R2, GRLenVT));
3816}
3817
3818SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3819 SelectionDAG &DAG,
3820 unsigned Opc,
3821 bool Large) const {
3822 SDLoc DL(N);
3823 EVT Ty = getPointerTy(DAG.getDataLayout());
3824 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3825
3826 // This is not actually used, but is necessary for successfully matching the
3827 // PseudoLA_*_LARGE nodes.
3828 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3829
3830 // Use a PC-relative addressing mode to access the dynamic GOT address.
3831 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3832 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3833 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3834
3835 // Prepare argument list to generate call.
3837 Args.emplace_back(Load, CallTy);
3838
3839 // Setup call to __tls_get_addr.
3840 TargetLowering::CallLoweringInfo CLI(DAG);
3841 CLI.setDebugLoc(DL)
3842 .setChain(DAG.getEntryNode())
3843 .setLibCallee(CallingConv::C, CallTy,
3844 DAG.getExternalSymbol("__tls_get_addr", Ty),
3845 std::move(Args));
3846
3847 return LowerCallTo(CLI).first;
3848}
3849
3850SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3851 SelectionDAG &DAG, unsigned Opc,
3852 bool Large) const {
3853 SDLoc DL(N);
3854 EVT Ty = getPointerTy(DAG.getDataLayout());
3855 const GlobalValue *GV = N->getGlobal();
3856
3857 // This is not actually used, but is necessary for successfully matching the
3858 // PseudoLA_*_LARGE nodes.
3859 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3860
3861 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3862 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3863 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3864 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3865 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3866}
3867
3868SDValue
3869LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3870 SelectionDAG &DAG) const {
3873 report_fatal_error("In GHC calling convention TLS is not supported");
3874
3875 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3876 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3877
3878 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3879 assert(N->getOffset() == 0 && "unexpected offset in global node");
3880
3881 if (DAG.getTarget().useEmulatedTLS())
3882 reportFatalUsageError("the emulated TLS is prohibited");
3883
3884 bool IsDesc = DAG.getTarget().useTLSDESC();
3885
3886 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3888 // In this model, application code calls the dynamic linker function
3889 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3890 // runtime.
3891 if (!IsDesc)
3892 return getDynamicTLSAddr(N, DAG,
3893 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3894 : LoongArch::PseudoLA_TLS_GD,
3895 Large);
3896 break;
3898 // Same as GeneralDynamic, except for assembly modifiers and relocation
3899 // records.
3900 if (!IsDesc)
3901 return getDynamicTLSAddr(N, DAG,
3902 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3903 : LoongArch::PseudoLA_TLS_LD,
3904 Large);
3905 break;
3907 // This model uses the GOT to resolve TLS offsets.
3908 return getStaticTLSAddr(N, DAG,
3909 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3910 : LoongArch::PseudoLA_TLS_IE,
3911 /*UseGOT=*/true, Large);
3913 // This model is used when static linking as the TLS offsets are resolved
3914 // during program linking.
3915 //
3916 // This node doesn't need an extra argument for the large code model.
3917 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3918 /*UseGOT=*/false, Large);
3919 }
3920
3921 return getTLSDescAddr(N, DAG,
3922 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3923 : LoongArch::PseudoLA_TLS_DESC,
3924 Large);
3925}
3926
3927template <unsigned N>
3929 SelectionDAG &DAG, bool IsSigned = false) {
3930 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3931 // Check the ImmArg.
3932 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3933 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3934 DAG.getContext()->emitError(Op->getOperationName(0) +
3935 ": argument out of range.");
3936 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3937 }
3938 return SDValue();
3939}
3940
3941SDValue
3942LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3943 SelectionDAG &DAG) const {
3944 switch (Op.getConstantOperandVal(0)) {
3945 default:
3946 return SDValue(); // Don't custom lower most intrinsics.
3947 case Intrinsic::thread_pointer: {
3948 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3949 return DAG.getRegister(LoongArch::R2, PtrVT);
3950 }
3951 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3952 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3953 case Intrinsic::loongarch_lsx_vreplvei_d:
3954 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3955 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3956 case Intrinsic::loongarch_lsx_vreplvei_w:
3957 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3958 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3959 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3960 case Intrinsic::loongarch_lasx_xvpickve_d:
3961 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3962 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3963 case Intrinsic::loongarch_lasx_xvinsve0_d:
3964 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3965 case Intrinsic::loongarch_lsx_vsat_b:
3966 case Intrinsic::loongarch_lsx_vsat_bu:
3967 case Intrinsic::loongarch_lsx_vrotri_b:
3968 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3969 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3970 case Intrinsic::loongarch_lsx_vsrlri_b:
3971 case Intrinsic::loongarch_lsx_vsrari_b:
3972 case Intrinsic::loongarch_lsx_vreplvei_h:
3973 case Intrinsic::loongarch_lasx_xvsat_b:
3974 case Intrinsic::loongarch_lasx_xvsat_bu:
3975 case Intrinsic::loongarch_lasx_xvrotri_b:
3976 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3977 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3978 case Intrinsic::loongarch_lasx_xvsrlri_b:
3979 case Intrinsic::loongarch_lasx_xvsrari_b:
3980 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3981 case Intrinsic::loongarch_lasx_xvpickve_w:
3982 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3983 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3984 case Intrinsic::loongarch_lasx_xvinsve0_w:
3985 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3986 case Intrinsic::loongarch_lsx_vsat_h:
3987 case Intrinsic::loongarch_lsx_vsat_hu:
3988 case Intrinsic::loongarch_lsx_vrotri_h:
3989 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3990 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3991 case Intrinsic::loongarch_lsx_vsrlri_h:
3992 case Intrinsic::loongarch_lsx_vsrari_h:
3993 case Intrinsic::loongarch_lsx_vreplvei_b:
3994 case Intrinsic::loongarch_lasx_xvsat_h:
3995 case Intrinsic::loongarch_lasx_xvsat_hu:
3996 case Intrinsic::loongarch_lasx_xvrotri_h:
3997 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3998 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3999 case Intrinsic::loongarch_lasx_xvsrlri_h:
4000 case Intrinsic::loongarch_lasx_xvsrari_h:
4001 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4002 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4003 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4004 case Intrinsic::loongarch_lsx_vsrani_b_h:
4005 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4006 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4007 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4008 case Intrinsic::loongarch_lsx_vssrani_b_h:
4009 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4010 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4011 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4012 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4013 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4014 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4015 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4016 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4017 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4018 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4019 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4020 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4021 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4022 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4023 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4024 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4025 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4026 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4027 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4028 case Intrinsic::loongarch_lsx_vsat_w:
4029 case Intrinsic::loongarch_lsx_vsat_wu:
4030 case Intrinsic::loongarch_lsx_vrotri_w:
4031 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4032 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4033 case Intrinsic::loongarch_lsx_vsrlri_w:
4034 case Intrinsic::loongarch_lsx_vsrari_w:
4035 case Intrinsic::loongarch_lsx_vslei_bu:
4036 case Intrinsic::loongarch_lsx_vslei_hu:
4037 case Intrinsic::loongarch_lsx_vslei_wu:
4038 case Intrinsic::loongarch_lsx_vslei_du:
4039 case Intrinsic::loongarch_lsx_vslti_bu:
4040 case Intrinsic::loongarch_lsx_vslti_hu:
4041 case Intrinsic::loongarch_lsx_vslti_wu:
4042 case Intrinsic::loongarch_lsx_vslti_du:
4043 case Intrinsic::loongarch_lsx_vbsll_v:
4044 case Intrinsic::loongarch_lsx_vbsrl_v:
4045 case Intrinsic::loongarch_lasx_xvsat_w:
4046 case Intrinsic::loongarch_lasx_xvsat_wu:
4047 case Intrinsic::loongarch_lasx_xvrotri_w:
4048 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4049 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4050 case Intrinsic::loongarch_lasx_xvsrlri_w:
4051 case Intrinsic::loongarch_lasx_xvsrari_w:
4052 case Intrinsic::loongarch_lasx_xvslei_bu:
4053 case Intrinsic::loongarch_lasx_xvslei_hu:
4054 case Intrinsic::loongarch_lasx_xvslei_wu:
4055 case Intrinsic::loongarch_lasx_xvslei_du:
4056 case Intrinsic::loongarch_lasx_xvslti_bu:
4057 case Intrinsic::loongarch_lasx_xvslti_hu:
4058 case Intrinsic::loongarch_lasx_xvslti_wu:
4059 case Intrinsic::loongarch_lasx_xvslti_du:
4060 case Intrinsic::loongarch_lasx_xvbsll_v:
4061 case Intrinsic::loongarch_lasx_xvbsrl_v:
4062 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4063 case Intrinsic::loongarch_lsx_vseqi_b:
4064 case Intrinsic::loongarch_lsx_vseqi_h:
4065 case Intrinsic::loongarch_lsx_vseqi_w:
4066 case Intrinsic::loongarch_lsx_vseqi_d:
4067 case Intrinsic::loongarch_lsx_vslei_b:
4068 case Intrinsic::loongarch_lsx_vslei_h:
4069 case Intrinsic::loongarch_lsx_vslei_w:
4070 case Intrinsic::loongarch_lsx_vslei_d:
4071 case Intrinsic::loongarch_lsx_vslti_b:
4072 case Intrinsic::loongarch_lsx_vslti_h:
4073 case Intrinsic::loongarch_lsx_vslti_w:
4074 case Intrinsic::loongarch_lsx_vslti_d:
4075 case Intrinsic::loongarch_lasx_xvseqi_b:
4076 case Intrinsic::loongarch_lasx_xvseqi_h:
4077 case Intrinsic::loongarch_lasx_xvseqi_w:
4078 case Intrinsic::loongarch_lasx_xvseqi_d:
4079 case Intrinsic::loongarch_lasx_xvslei_b:
4080 case Intrinsic::loongarch_lasx_xvslei_h:
4081 case Intrinsic::loongarch_lasx_xvslei_w:
4082 case Intrinsic::loongarch_lasx_xvslei_d:
4083 case Intrinsic::loongarch_lasx_xvslti_b:
4084 case Intrinsic::loongarch_lasx_xvslti_h:
4085 case Intrinsic::loongarch_lasx_xvslti_w:
4086 case Intrinsic::loongarch_lasx_xvslti_d:
4087 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4088 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4089 case Intrinsic::loongarch_lsx_vsrani_h_w:
4090 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4091 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4092 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4093 case Intrinsic::loongarch_lsx_vssrani_h_w:
4094 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4095 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4096 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4097 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4098 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4099 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4100 case Intrinsic::loongarch_lsx_vfrstpi_b:
4101 case Intrinsic::loongarch_lsx_vfrstpi_h:
4102 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4103 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4104 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4105 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4106 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4107 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4108 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4109 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4110 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4111 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4112 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4113 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4114 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4115 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4116 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4117 case Intrinsic::loongarch_lsx_vsat_d:
4118 case Intrinsic::loongarch_lsx_vsat_du:
4119 case Intrinsic::loongarch_lsx_vrotri_d:
4120 case Intrinsic::loongarch_lsx_vsrlri_d:
4121 case Intrinsic::loongarch_lsx_vsrari_d:
4122 case Intrinsic::loongarch_lasx_xvsat_d:
4123 case Intrinsic::loongarch_lasx_xvsat_du:
4124 case Intrinsic::loongarch_lasx_xvrotri_d:
4125 case Intrinsic::loongarch_lasx_xvsrlri_d:
4126 case Intrinsic::loongarch_lasx_xvsrari_d:
4127 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4128 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4129 case Intrinsic::loongarch_lsx_vsrani_w_d:
4130 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4131 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4132 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4133 case Intrinsic::loongarch_lsx_vssrani_w_d:
4134 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4135 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4136 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4137 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4138 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4139 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4140 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4141 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4142 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4143 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4144 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4145 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4146 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4147 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4148 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4149 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4150 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4151 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4152 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4153 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4154 case Intrinsic::loongarch_lsx_vsrani_d_q:
4155 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4156 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4157 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4158 case Intrinsic::loongarch_lsx_vssrani_d_q:
4159 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4160 case Intrinsic::loongarch_lsx_vssrani_du_q:
4161 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4162 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4163 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4164 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4165 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4166 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4167 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4168 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4169 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4170 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4171 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4172 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4173 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4174 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4175 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4176 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4177 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4178 case Intrinsic::loongarch_lsx_vnori_b:
4179 case Intrinsic::loongarch_lsx_vshuf4i_b:
4180 case Intrinsic::loongarch_lsx_vshuf4i_h:
4181 case Intrinsic::loongarch_lsx_vshuf4i_w:
4182 case Intrinsic::loongarch_lasx_xvnori_b:
4183 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4184 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4185 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4186 case Intrinsic::loongarch_lasx_xvpermi_d:
4187 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4188 case Intrinsic::loongarch_lsx_vshuf4i_d:
4189 case Intrinsic::loongarch_lsx_vpermi_w:
4190 case Intrinsic::loongarch_lsx_vbitseli_b:
4191 case Intrinsic::loongarch_lsx_vextrins_b:
4192 case Intrinsic::loongarch_lsx_vextrins_h:
4193 case Intrinsic::loongarch_lsx_vextrins_w:
4194 case Intrinsic::loongarch_lsx_vextrins_d:
4195 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4196 case Intrinsic::loongarch_lasx_xvpermi_w:
4197 case Intrinsic::loongarch_lasx_xvpermi_q:
4198 case Intrinsic::loongarch_lasx_xvbitseli_b:
4199 case Intrinsic::loongarch_lasx_xvextrins_b:
4200 case Intrinsic::loongarch_lasx_xvextrins_h:
4201 case Intrinsic::loongarch_lasx_xvextrins_w:
4202 case Intrinsic::loongarch_lasx_xvextrins_d:
4203 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4204 case Intrinsic::loongarch_lsx_vrepli_b:
4205 case Intrinsic::loongarch_lsx_vrepli_h:
4206 case Intrinsic::loongarch_lsx_vrepli_w:
4207 case Intrinsic::loongarch_lsx_vrepli_d:
4208 case Intrinsic::loongarch_lasx_xvrepli_b:
4209 case Intrinsic::loongarch_lasx_xvrepli_h:
4210 case Intrinsic::loongarch_lasx_xvrepli_w:
4211 case Intrinsic::loongarch_lasx_xvrepli_d:
4212 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4213 case Intrinsic::loongarch_lsx_vldi:
4214 case Intrinsic::loongarch_lasx_xvldi:
4215 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4216 }
4217}
4218
4219// Helper function that emits error message for intrinsics with chain and return
4220// merge values of a UNDEF and the chain.
4222 StringRef ErrorMsg,
4223 SelectionDAG &DAG) {
4224 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4225 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4226 SDLoc(Op));
4227}
4228
4229SDValue
4230LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4231 SelectionDAG &DAG) const {
4232 SDLoc DL(Op);
4233 MVT GRLenVT = Subtarget.getGRLenVT();
4234 EVT VT = Op.getValueType();
4235 SDValue Chain = Op.getOperand(0);
4236 const StringRef ErrorMsgOOR = "argument out of range";
4237 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4238 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4239
4240 switch (Op.getConstantOperandVal(1)) {
4241 default:
4242 return Op;
4243 case Intrinsic::loongarch_crc_w_b_w:
4244 case Intrinsic::loongarch_crc_w_h_w:
4245 case Intrinsic::loongarch_crc_w_w_w:
4246 case Intrinsic::loongarch_crc_w_d_w:
4247 case Intrinsic::loongarch_crcc_w_b_w:
4248 case Intrinsic::loongarch_crcc_w_h_w:
4249 case Intrinsic::loongarch_crcc_w_w_w:
4250 case Intrinsic::loongarch_crcc_w_d_w:
4251 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4252 case Intrinsic::loongarch_csrrd_w:
4253 case Intrinsic::loongarch_csrrd_d: {
4254 unsigned Imm = Op.getConstantOperandVal(2);
4255 return !isUInt<14>(Imm)
4256 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4257 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4258 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4259 }
4260 case Intrinsic::loongarch_csrwr_w:
4261 case Intrinsic::loongarch_csrwr_d: {
4262 unsigned Imm = Op.getConstantOperandVal(3);
4263 return !isUInt<14>(Imm)
4264 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4265 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4266 {Chain, Op.getOperand(2),
4267 DAG.getConstant(Imm, DL, GRLenVT)});
4268 }
4269 case Intrinsic::loongarch_csrxchg_w:
4270 case Intrinsic::loongarch_csrxchg_d: {
4271 unsigned Imm = Op.getConstantOperandVal(4);
4272 return !isUInt<14>(Imm)
4273 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4274 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4275 {Chain, Op.getOperand(2), Op.getOperand(3),
4276 DAG.getConstant(Imm, DL, GRLenVT)});
4277 }
4278 case Intrinsic::loongarch_iocsrrd_d: {
4279 return DAG.getNode(
4280 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4281 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4282 }
4283#define IOCSRRD_CASE(NAME, NODE) \
4284 case Intrinsic::loongarch_##NAME: { \
4285 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4286 {Chain, Op.getOperand(2)}); \
4287 }
4288 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4289 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4290 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4291#undef IOCSRRD_CASE
4292 case Intrinsic::loongarch_cpucfg: {
4293 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4294 {Chain, Op.getOperand(2)});
4295 }
4296 case Intrinsic::loongarch_lddir_d: {
4297 unsigned Imm = Op.getConstantOperandVal(3);
4298 return !isUInt<8>(Imm)
4299 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4300 : Op;
4301 }
4302 case Intrinsic::loongarch_movfcsr2gr: {
4303 if (!Subtarget.hasBasicF())
4304 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4305 unsigned Imm = Op.getConstantOperandVal(2);
4306 return !isUInt<2>(Imm)
4307 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4308 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4309 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4310 }
4311 case Intrinsic::loongarch_lsx_vld:
4312 case Intrinsic::loongarch_lsx_vldrepl_b:
4313 case Intrinsic::loongarch_lasx_xvld:
4314 case Intrinsic::loongarch_lasx_xvldrepl_b:
4315 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4316 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4317 : SDValue();
4318 case Intrinsic::loongarch_lsx_vldrepl_h:
4319 case Intrinsic::loongarch_lasx_xvldrepl_h:
4320 return !isShiftedInt<11, 1>(
4321 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4323 Op, "argument out of range or not a multiple of 2", DAG)
4324 : SDValue();
4325 case Intrinsic::loongarch_lsx_vldrepl_w:
4326 case Intrinsic::loongarch_lasx_xvldrepl_w:
4327 return !isShiftedInt<10, 2>(
4328 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4330 Op, "argument out of range or not a multiple of 4", DAG)
4331 : SDValue();
4332 case Intrinsic::loongarch_lsx_vldrepl_d:
4333 case Intrinsic::loongarch_lasx_xvldrepl_d:
4334 return !isShiftedInt<9, 3>(
4335 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4337 Op, "argument out of range or not a multiple of 8", DAG)
4338 : SDValue();
4339 }
4340}
4341
4342// Helper function that emits error message for intrinsics with void return
4343// value and return the chain.
4345 SelectionDAG &DAG) {
4346
4347 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4348 return Op.getOperand(0);
4349}
4350
4351SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4352 SelectionDAG &DAG) const {
4353 SDLoc DL(Op);
4354 MVT GRLenVT = Subtarget.getGRLenVT();
4355 SDValue Chain = Op.getOperand(0);
4356 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4357 SDValue Op2 = Op.getOperand(2);
4358 const StringRef ErrorMsgOOR = "argument out of range";
4359 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4360 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4361 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4362
4363 switch (IntrinsicEnum) {
4364 default:
4365 // TODO: Add more Intrinsics.
4366 return SDValue();
4367 case Intrinsic::loongarch_cacop_d:
4368 case Intrinsic::loongarch_cacop_w: {
4369 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4370 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4371 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4372 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4373 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4374 unsigned Imm1 = Op2->getAsZExtVal();
4375 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4376 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4377 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4378 return Op;
4379 }
4380 case Intrinsic::loongarch_dbar: {
4381 unsigned Imm = Op2->getAsZExtVal();
4382 return !isUInt<15>(Imm)
4383 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4384 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4385 DAG.getConstant(Imm, DL, GRLenVT));
4386 }
4387 case Intrinsic::loongarch_ibar: {
4388 unsigned Imm = Op2->getAsZExtVal();
4389 return !isUInt<15>(Imm)
4390 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4391 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4392 DAG.getConstant(Imm, DL, GRLenVT));
4393 }
4394 case Intrinsic::loongarch_break: {
4395 unsigned Imm = Op2->getAsZExtVal();
4396 return !isUInt<15>(Imm)
4397 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4398 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4399 DAG.getConstant(Imm, DL, GRLenVT));
4400 }
4401 case Intrinsic::loongarch_movgr2fcsr: {
4402 if (!Subtarget.hasBasicF())
4403 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4404 unsigned Imm = Op2->getAsZExtVal();
4405 return !isUInt<2>(Imm)
4406 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4407 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4408 DAG.getConstant(Imm, DL, GRLenVT),
4409 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4410 Op.getOperand(3)));
4411 }
4412 case Intrinsic::loongarch_syscall: {
4413 unsigned Imm = Op2->getAsZExtVal();
4414 return !isUInt<15>(Imm)
4415 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4416 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4417 DAG.getConstant(Imm, DL, GRLenVT));
4418 }
4419#define IOCSRWR_CASE(NAME, NODE) \
4420 case Intrinsic::loongarch_##NAME: { \
4421 SDValue Op3 = Op.getOperand(3); \
4422 return Subtarget.is64Bit() \
4423 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4424 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4426 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4427 Op3); \
4428 }
4429 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4430 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4431 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4432#undef IOCSRWR_CASE
4433 case Intrinsic::loongarch_iocsrwr_d: {
4434 return !Subtarget.is64Bit()
4435 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4436 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4437 Op2,
4438 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4439 Op.getOperand(3)));
4440 }
4441#define ASRT_LE_GT_CASE(NAME) \
4442 case Intrinsic::loongarch_##NAME: { \
4443 return !Subtarget.is64Bit() \
4444 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4445 : Op; \
4446 }
4447 ASRT_LE_GT_CASE(asrtle_d)
4448 ASRT_LE_GT_CASE(asrtgt_d)
4449#undef ASRT_LE_GT_CASE
4450 case Intrinsic::loongarch_ldpte_d: {
4451 unsigned Imm = Op.getConstantOperandVal(3);
4452 return !Subtarget.is64Bit()
4453 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4454 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4455 : Op;
4456 }
4457 case Intrinsic::loongarch_lsx_vst:
4458 case Intrinsic::loongarch_lasx_xvst:
4459 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4460 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4461 : SDValue();
4462 case Intrinsic::loongarch_lasx_xvstelm_b:
4463 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4464 !isUInt<5>(Op.getConstantOperandVal(5)))
4465 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4466 : SDValue();
4467 case Intrinsic::loongarch_lsx_vstelm_b:
4468 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4469 !isUInt<4>(Op.getConstantOperandVal(5)))
4470 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4471 : SDValue();
4472 case Intrinsic::loongarch_lasx_xvstelm_h:
4473 return (!isShiftedInt<8, 1>(
4474 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4475 !isUInt<4>(Op.getConstantOperandVal(5)))
4477 Op, "argument out of range or not a multiple of 2", DAG)
4478 : SDValue();
4479 case Intrinsic::loongarch_lsx_vstelm_h:
4480 return (!isShiftedInt<8, 1>(
4481 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4482 !isUInt<3>(Op.getConstantOperandVal(5)))
4484 Op, "argument out of range or not a multiple of 2", DAG)
4485 : SDValue();
4486 case Intrinsic::loongarch_lasx_xvstelm_w:
4487 return (!isShiftedInt<8, 2>(
4488 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4489 !isUInt<3>(Op.getConstantOperandVal(5)))
4491 Op, "argument out of range or not a multiple of 4", DAG)
4492 : SDValue();
4493 case Intrinsic::loongarch_lsx_vstelm_w:
4494 return (!isShiftedInt<8, 2>(
4495 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4496 !isUInt<2>(Op.getConstantOperandVal(5)))
4498 Op, "argument out of range or not a multiple of 4", DAG)
4499 : SDValue();
4500 case Intrinsic::loongarch_lasx_xvstelm_d:
4501 return (!isShiftedInt<8, 3>(
4502 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4503 !isUInt<2>(Op.getConstantOperandVal(5)))
4505 Op, "argument out of range or not a multiple of 8", DAG)
4506 : SDValue();
4507 case Intrinsic::loongarch_lsx_vstelm_d:
4508 return (!isShiftedInt<8, 3>(
4509 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4510 !isUInt<1>(Op.getConstantOperandVal(5)))
4512 Op, "argument out of range or not a multiple of 8", DAG)
4513 : SDValue();
4514 }
4515}
4516
4517SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4518 SelectionDAG &DAG) const {
4519 SDLoc DL(Op);
4520 SDValue Lo = Op.getOperand(0);
4521 SDValue Hi = Op.getOperand(1);
4522 SDValue Shamt = Op.getOperand(2);
4523 EVT VT = Lo.getValueType();
4524
4525 // if Shamt-GRLen < 0: // Shamt < GRLen
4526 // Lo = Lo << Shamt
4527 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4528 // else:
4529 // Lo = 0
4530 // Hi = Lo << (Shamt-GRLen)
4531
4532 SDValue Zero = DAG.getConstant(0, DL, VT);
4533 SDValue One = DAG.getConstant(1, DL, VT);
4534 SDValue MinusGRLen =
4535 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4536 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4537 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4538 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4539
4540 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4541 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4542 SDValue ShiftRightLo =
4543 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4544 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4545 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4546 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4547
4548 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4549
4550 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4551 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4552
4553 SDValue Parts[2] = {Lo, Hi};
4554 return DAG.getMergeValues(Parts, DL);
4555}
4556
4557SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4558 SelectionDAG &DAG,
4559 bool IsSRA) const {
4560 SDLoc DL(Op);
4561 SDValue Lo = Op.getOperand(0);
4562 SDValue Hi = Op.getOperand(1);
4563 SDValue Shamt = Op.getOperand(2);
4564 EVT VT = Lo.getValueType();
4565
4566 // SRA expansion:
4567 // if Shamt-GRLen < 0: // Shamt < GRLen
4568 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4569 // Hi = Hi >>s Shamt
4570 // else:
4571 // Lo = Hi >>s (Shamt-GRLen);
4572 // Hi = Hi >>s (GRLen-1)
4573 //
4574 // SRL expansion:
4575 // if Shamt-GRLen < 0: // Shamt < GRLen
4576 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4577 // Hi = Hi >>u Shamt
4578 // else:
4579 // Lo = Hi >>u (Shamt-GRLen);
4580 // Hi = 0;
4581
4582 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4583
4584 SDValue Zero = DAG.getConstant(0, DL, VT);
4585 SDValue One = DAG.getConstant(1, DL, VT);
4586 SDValue MinusGRLen =
4587 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4588 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4589 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4590 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4591
4592 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4593 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4594 SDValue ShiftLeftHi =
4595 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4596 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4597 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4598 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4599 SDValue HiFalse =
4600 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4601
4602 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4603
4604 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4605 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4606
4607 SDValue Parts[2] = {Lo, Hi};
4608 return DAG.getMergeValues(Parts, DL);
4609}
4610
4611// Returns the opcode of the target-specific SDNode that implements the 32-bit
4612// form of the given Opcode.
4613static unsigned getLoongArchWOpcode(unsigned Opcode) {
4614 switch (Opcode) {
4615 default:
4616 llvm_unreachable("Unexpected opcode");
4617 case ISD::SDIV:
4618 return LoongArchISD::DIV_W;
4619 case ISD::UDIV:
4620 return LoongArchISD::DIV_WU;
4621 case ISD::SREM:
4622 return LoongArchISD::MOD_W;
4623 case ISD::UREM:
4624 return LoongArchISD::MOD_WU;
4625 case ISD::SHL:
4626 return LoongArchISD::SLL_W;
4627 case ISD::SRA:
4628 return LoongArchISD::SRA_W;
4629 case ISD::SRL:
4630 return LoongArchISD::SRL_W;
4631 case ISD::ROTL:
4632 case ISD::ROTR:
4633 return LoongArchISD::ROTR_W;
4634 case ISD::CTTZ:
4635 return LoongArchISD::CTZ_W;
4636 case ISD::CTLZ:
4637 return LoongArchISD::CLZ_W;
4638 }
4639}
4640
4641// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4642// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4643// otherwise be promoted to i64, making it difficult to select the
4644// SLL_W/.../*W later one because the fact the operation was originally of
4645// type i8/i16/i32 is lost.
4647 unsigned ExtOpc = ISD::ANY_EXTEND) {
4648 SDLoc DL(N);
4649 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
4650 SDValue NewOp0, NewRes;
4651
4652 switch (NumOp) {
4653 default:
4654 llvm_unreachable("Unexpected NumOp");
4655 case 1: {
4656 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4657 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4658 break;
4659 }
4660 case 2: {
4661 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4662 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4663 if (N->getOpcode() == ISD::ROTL) {
4664 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4665 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4666 }
4667 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4668 break;
4669 }
4670 // TODO:Handle more NumOp.
4671 }
4672
4673 // ReplaceNodeResults requires we maintain the same type for the return
4674 // value.
4675 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4676}
4677
4678// Converts the given 32-bit operation to a i64 operation with signed extension
4679// semantic to reduce the signed extension instructions.
4681 SDLoc DL(N);
4682 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4683 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4684 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4685 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4686 DAG.getValueType(MVT::i32));
4687 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4688}
4689
4690// Helper function that emits error message for intrinsics with/without chain
4691// and return a UNDEF or and the chain as the results.
4694 StringRef ErrorMsg, bool WithChain = true) {
4695 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4696 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4697 if (!WithChain)
4698 return;
4699 Results.push_back(N->getOperand(0));
4700}
4701
4702template <unsigned N>
4703static void
4705 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4706 unsigned ResOp) {
4707 const StringRef ErrorMsgOOR = "argument out of range";
4708 unsigned Imm = Node->getConstantOperandVal(2);
4709 if (!isUInt<N>(Imm)) {
4711 /*WithChain=*/false);
4712 return;
4713 }
4714 SDLoc DL(Node);
4715 SDValue Vec = Node->getOperand(1);
4716
4717 SDValue PickElt =
4718 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4719 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4721 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4722 PickElt.getValue(0)));
4723}
4724
4727 SelectionDAG &DAG,
4728 const LoongArchSubtarget &Subtarget,
4729 unsigned ResOp) {
4730 SDLoc DL(N);
4731 SDValue Vec = N->getOperand(1);
4732
4733 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4734 Results.push_back(
4735 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4736}
4737
4738static void
4740 SelectionDAG &DAG,
4741 const LoongArchSubtarget &Subtarget) {
4742 switch (N->getConstantOperandVal(0)) {
4743 default:
4744 llvm_unreachable("Unexpected Intrinsic.");
4745 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4746 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4747 LoongArchISD::VPICK_SEXT_ELT);
4748 break;
4749 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4750 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4751 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4752 LoongArchISD::VPICK_SEXT_ELT);
4753 break;
4754 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4755 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4756 LoongArchISD::VPICK_SEXT_ELT);
4757 break;
4758 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4759 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4760 LoongArchISD::VPICK_ZEXT_ELT);
4761 break;
4762 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4763 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4764 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4765 LoongArchISD::VPICK_ZEXT_ELT);
4766 break;
4767 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4768 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4769 LoongArchISD::VPICK_ZEXT_ELT);
4770 break;
4771 case Intrinsic::loongarch_lsx_bz_b:
4772 case Intrinsic::loongarch_lsx_bz_h:
4773 case Intrinsic::loongarch_lsx_bz_w:
4774 case Intrinsic::loongarch_lsx_bz_d:
4775 case Intrinsic::loongarch_lasx_xbz_b:
4776 case Intrinsic::loongarch_lasx_xbz_h:
4777 case Intrinsic::loongarch_lasx_xbz_w:
4778 case Intrinsic::loongarch_lasx_xbz_d:
4779 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4780 LoongArchISD::VALL_ZERO);
4781 break;
4782 case Intrinsic::loongarch_lsx_bz_v:
4783 case Intrinsic::loongarch_lasx_xbz_v:
4784 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4785 LoongArchISD::VANY_ZERO);
4786 break;
4787 case Intrinsic::loongarch_lsx_bnz_b:
4788 case Intrinsic::loongarch_lsx_bnz_h:
4789 case Intrinsic::loongarch_lsx_bnz_w:
4790 case Intrinsic::loongarch_lsx_bnz_d:
4791 case Intrinsic::loongarch_lasx_xbnz_b:
4792 case Intrinsic::loongarch_lasx_xbnz_h:
4793 case Intrinsic::loongarch_lasx_xbnz_w:
4794 case Intrinsic::loongarch_lasx_xbnz_d:
4795 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4796 LoongArchISD::VALL_NONZERO);
4797 break;
4798 case Intrinsic::loongarch_lsx_bnz_v:
4799 case Intrinsic::loongarch_lasx_xbnz_v:
4800 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4801 LoongArchISD::VANY_NONZERO);
4802 break;
4803 }
4804}
4805
4808 SelectionDAG &DAG) {
4809 assert(N->getValueType(0) == MVT::i128 &&
4810 "AtomicCmpSwap on types less than 128 should be legal");
4811 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4812
4813 unsigned Opcode;
4814 switch (MemOp->getMergedOrdering()) {
4818 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4819 break;
4822 Opcode = LoongArch::PseudoCmpXchg128;
4823 break;
4824 default:
4825 llvm_unreachable("Unexpected ordering!");
4826 }
4827
4828 SDLoc DL(N);
4829 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4830 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4831 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4832 NewVal.first, NewVal.second, N->getOperand(0)};
4833
4834 SDNode *CmpSwap = DAG.getMachineNode(
4835 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4836 Ops);
4837 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4838 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4839 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4840 Results.push_back(SDValue(CmpSwap, 3));
4841}
4842
4845 SDLoc DL(N);
4846 EVT VT = N->getValueType(0);
4847 switch (N->getOpcode()) {
4848 default:
4849 llvm_unreachable("Don't know how to legalize this operation");
4850 case ISD::ADD:
4851 case ISD::SUB:
4852 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4853 "Unexpected custom legalisation");
4854 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4855 break;
4856 case ISD::SDIV:
4857 case ISD::UDIV:
4858 case ISD::SREM:
4859 case ISD::UREM:
4860 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4861 "Unexpected custom legalisation");
4862 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4863 Subtarget.hasDiv32() && VT == MVT::i32
4865 : ISD::SIGN_EXTEND));
4866 break;
4867 case ISD::SHL:
4868 case ISD::SRA:
4869 case ISD::SRL:
4870 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4871 "Unexpected custom legalisation");
4872 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4873 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4874 break;
4875 }
4876 break;
4877 case ISD::ROTL:
4878 case ISD::ROTR:
4879 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4880 "Unexpected custom legalisation");
4881 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4882 break;
4883 case ISD::FP_TO_SINT: {
4884 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4885 "Unexpected custom legalisation");
4886 SDValue Src = N->getOperand(0);
4887 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4888 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4890 if (!isTypeLegal(Src.getValueType()))
4891 return;
4892 if (Src.getValueType() == MVT::f16)
4893 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4894 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4895 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4896 return;
4897 }
4898 // If the FP type needs to be softened, emit a library call using the 'si'
4899 // version. If we left it to default legalization we'd end up with 'di'.
4900 RTLIB::Libcall LC;
4901 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4902 MakeLibCallOptions CallOptions;
4903 EVT OpVT = Src.getValueType();
4904 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4905 SDValue Chain = SDValue();
4906 SDValue Result;
4907 std::tie(Result, Chain) =
4908 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4909 Results.push_back(Result);
4910 break;
4911 }
4912 case ISD::BITCAST: {
4913 SDValue Src = N->getOperand(0);
4914 EVT SrcVT = Src.getValueType();
4915 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4916 Subtarget.hasBasicF()) {
4917 SDValue Dst =
4918 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4919 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4920 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4921 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
4922 DAG.getVTList(MVT::i32, MVT::i32), Src);
4923 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4924 NewReg.getValue(0), NewReg.getValue(1));
4925 Results.push_back(RetReg);
4926 }
4927 break;
4928 }
4929 case ISD::FP_TO_UINT: {
4930 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4931 "Unexpected custom legalisation");
4932 auto &TLI = DAG.getTargetLoweringInfo();
4933 SDValue Tmp1, Tmp2;
4934 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4935 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4936 break;
4937 }
4938 case ISD::BSWAP: {
4939 SDValue Src = N->getOperand(0);
4940 assert((VT == MVT::i16 || VT == MVT::i32) &&
4941 "Unexpected custom legalization");
4942 MVT GRLenVT = Subtarget.getGRLenVT();
4943 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4944 SDValue Tmp;
4945 switch (VT.getSizeInBits()) {
4946 default:
4947 llvm_unreachable("Unexpected operand width");
4948 case 16:
4949 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4950 break;
4951 case 32:
4952 // Only LA64 will get to here due to the size mismatch between VT and
4953 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4954 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4955 break;
4956 }
4957 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4958 break;
4959 }
4960 case ISD::BITREVERSE: {
4961 SDValue Src = N->getOperand(0);
4962 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4963 "Unexpected custom legalization");
4964 MVT GRLenVT = Subtarget.getGRLenVT();
4965 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4966 SDValue Tmp;
4967 switch (VT.getSizeInBits()) {
4968 default:
4969 llvm_unreachable("Unexpected operand width");
4970 case 8:
4971 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4972 break;
4973 case 32:
4974 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4975 break;
4976 }
4977 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4978 break;
4979 }
4980 case ISD::CTLZ:
4981 case ISD::CTTZ: {
4982 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4983 "Unexpected custom legalisation");
4984 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4985 break;
4986 }
4988 SDValue Chain = N->getOperand(0);
4989 SDValue Op2 = N->getOperand(2);
4990 MVT GRLenVT = Subtarget.getGRLenVT();
4991 const StringRef ErrorMsgOOR = "argument out of range";
4992 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4993 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4994
4995 switch (N->getConstantOperandVal(1)) {
4996 default:
4997 llvm_unreachable("Unexpected Intrinsic.");
4998 case Intrinsic::loongarch_movfcsr2gr: {
4999 if (!Subtarget.hasBasicF()) {
5000 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5001 return;
5002 }
5003 unsigned Imm = Op2->getAsZExtVal();
5004 if (!isUInt<2>(Imm)) {
5005 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5006 return;
5007 }
5008 SDValue MOVFCSR2GRResults = DAG.getNode(
5009 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5010 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5011 Results.push_back(
5012 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5013 Results.push_back(MOVFCSR2GRResults.getValue(1));
5014 break;
5015 }
5016#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5017 case Intrinsic::loongarch_##NAME: { \
5018 SDValue NODE = DAG.getNode( \
5019 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5020 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5021 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5022 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5023 Results.push_back(NODE.getValue(1)); \
5024 break; \
5025 }
5026 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5027 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5028 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5029 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5030 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5031 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5032#undef CRC_CASE_EXT_BINARYOP
5033
5034#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5035 case Intrinsic::loongarch_##NAME: { \
5036 SDValue NODE = DAG.getNode( \
5037 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5038 {Chain, Op2, \
5039 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5040 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5041 Results.push_back(NODE.getValue(1)); \
5042 break; \
5043 }
5044 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5045 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5046#undef CRC_CASE_EXT_UNARYOP
5047#define CSR_CASE(ID) \
5048 case Intrinsic::loongarch_##ID: { \
5049 if (!Subtarget.is64Bit()) \
5050 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5051 break; \
5052 }
5053 CSR_CASE(csrrd_d);
5054 CSR_CASE(csrwr_d);
5055 CSR_CASE(csrxchg_d);
5056 CSR_CASE(iocsrrd_d);
5057#undef CSR_CASE
5058 case Intrinsic::loongarch_csrrd_w: {
5059 unsigned Imm = Op2->getAsZExtVal();
5060 if (!isUInt<14>(Imm)) {
5061 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5062 return;
5063 }
5064 SDValue CSRRDResults =
5065 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5066 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5067 Results.push_back(
5068 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5069 Results.push_back(CSRRDResults.getValue(1));
5070 break;
5071 }
5072 case Intrinsic::loongarch_csrwr_w: {
5073 unsigned Imm = N->getConstantOperandVal(3);
5074 if (!isUInt<14>(Imm)) {
5075 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5076 return;
5077 }
5078 SDValue CSRWRResults =
5079 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5080 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5081 DAG.getConstant(Imm, DL, GRLenVT)});
5082 Results.push_back(
5083 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5084 Results.push_back(CSRWRResults.getValue(1));
5085 break;
5086 }
5087 case Intrinsic::loongarch_csrxchg_w: {
5088 unsigned Imm = N->getConstantOperandVal(4);
5089 if (!isUInt<14>(Imm)) {
5090 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5091 return;
5092 }
5093 SDValue CSRXCHGResults = DAG.getNode(
5094 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5095 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5096 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5097 DAG.getConstant(Imm, DL, GRLenVT)});
5098 Results.push_back(
5099 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5100 Results.push_back(CSRXCHGResults.getValue(1));
5101 break;
5102 }
5103#define IOCSRRD_CASE(NAME, NODE) \
5104 case Intrinsic::loongarch_##NAME: { \
5105 SDValue IOCSRRDResults = \
5106 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5107 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5108 Results.push_back( \
5109 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5110 Results.push_back(IOCSRRDResults.getValue(1)); \
5111 break; \
5112 }
5113 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5114 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5115 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5116#undef IOCSRRD_CASE
5117 case Intrinsic::loongarch_cpucfg: {
5118 SDValue CPUCFGResults =
5119 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5120 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5121 Results.push_back(
5122 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5123 Results.push_back(CPUCFGResults.getValue(1));
5124 break;
5125 }
5126 case Intrinsic::loongarch_lddir_d: {
5127 if (!Subtarget.is64Bit()) {
5128 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5129 return;
5130 }
5131 break;
5132 }
5133 }
5134 break;
5135 }
5136 case ISD::READ_REGISTER: {
5137 if (Subtarget.is64Bit())
5138 DAG.getContext()->emitError(
5139 "On LA64, only 64-bit registers can be read.");
5140 else
5141 DAG.getContext()->emitError(
5142 "On LA32, only 32-bit registers can be read.");
5143 Results.push_back(DAG.getUNDEF(VT));
5144 Results.push_back(N->getOperand(0));
5145 break;
5146 }
5148 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5149 break;
5150 }
5151 case ISD::LROUND: {
5152 SDValue Op0 = N->getOperand(0);
5153 EVT OpVT = Op0.getValueType();
5154 RTLIB::Libcall LC =
5155 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5156 MakeLibCallOptions CallOptions;
5157 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5158 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5159 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5160 Results.push_back(Result);
5161 break;
5162 }
5163 case ISD::ATOMIC_CMP_SWAP: {
5165 break;
5166 }
5167 case ISD::TRUNCATE: {
5168 MVT VT = N->getSimpleValueType(0);
5169 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5170 return;
5171
5172 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5173 SDValue In = N->getOperand(0);
5174 EVT InVT = In.getValueType();
5175 EVT InEltVT = InVT.getVectorElementType();
5176 EVT EltVT = VT.getVectorElementType();
5177 unsigned MinElts = VT.getVectorNumElements();
5178 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5179 unsigned InBits = InVT.getSizeInBits();
5180
5181 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5182 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5183 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5184 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5185 for (unsigned I = 0; I < MinElts; ++I)
5186 TruncMask[I] = Scale * I;
5187
5188 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5189 MVT SVT = In.getSimpleValueType().getScalarType();
5190 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5191 SDValue WidenIn =
5192 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5193 DAG.getVectorIdxConstant(0, DL));
5194 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5195 "Illegal vector type in truncation");
5196 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5197 Results.push_back(
5198 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5199 return;
5200 }
5201 }
5202
5203 break;
5204 }
5205 }
5206}
5207
5208/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5210 SelectionDAG &DAG) {
5211 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5212
5213 MVT VT = N->getSimpleValueType(0);
5214 if (!VT.is128BitVector() && !VT.is256BitVector())
5215 return SDValue();
5216
5217 SDValue X, Y;
5218 SDValue N0 = N->getOperand(0);
5219 SDValue N1 = N->getOperand(1);
5220
5221 if (SDValue Not = isNOT(N0, DAG)) {
5222 X = Not;
5223 Y = N1;
5224 } else if (SDValue Not = isNOT(N1, DAG)) {
5225 X = Not;
5226 Y = N0;
5227 } else
5228 return SDValue();
5229
5230 X = DAG.getBitcast(VT, X);
5231 Y = DAG.getBitcast(VT, Y);
5232 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5233}
5234
5237 const LoongArchSubtarget &Subtarget) {
5238 if (DCI.isBeforeLegalizeOps())
5239 return SDValue();
5240
5241 SDValue FirstOperand = N->getOperand(0);
5242 SDValue SecondOperand = N->getOperand(1);
5243 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5244 EVT ValTy = N->getValueType(0);
5245 SDLoc DL(N);
5246 uint64_t lsb, msb;
5247 unsigned SMIdx, SMLen;
5248 ConstantSDNode *CN;
5249 SDValue NewOperand;
5250 MVT GRLenVT = Subtarget.getGRLenVT();
5251
5252 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5253 return R;
5254
5255 // BSTRPICK requires the 32S feature.
5256 if (!Subtarget.has32S())
5257 return SDValue();
5258
5259 // Op's second operand must be a shifted mask.
5260 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5261 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5262 return SDValue();
5263
5264 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5265 // Pattern match BSTRPICK.
5266 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5267 // => BSTRPICK $dst, $src, msb, lsb
5268 // where msb = lsb + len - 1
5269
5270 // The second operand of the shift must be an immediate.
5271 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5272 return SDValue();
5273
5274 lsb = CN->getZExtValue();
5275
5276 // Return if the shifted mask does not start at bit 0 or the sum of its
5277 // length and lsb exceeds the word's size.
5278 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5279 return SDValue();
5280
5281 NewOperand = FirstOperand.getOperand(0);
5282 } else {
5283 // Pattern match BSTRPICK.
5284 // $dst = and $src, (2**len- 1) , if len > 12
5285 // => BSTRPICK $dst, $src, msb, lsb
5286 // where lsb = 0 and msb = len - 1
5287
5288 // If the mask is <= 0xfff, andi can be used instead.
5289 if (CN->getZExtValue() <= 0xfff)
5290 return SDValue();
5291
5292 // Return if the MSB exceeds.
5293 if (SMIdx + SMLen > ValTy.getSizeInBits())
5294 return SDValue();
5295
5296 if (SMIdx > 0) {
5297 // Omit if the constant has more than 2 uses. This a conservative
5298 // decision. Whether it is a win depends on the HW microarchitecture.
5299 // However it should always be better for 1 and 2 uses.
5300 if (CN->use_size() > 2)
5301 return SDValue();
5302 // Return if the constant can be composed by a single LU12I.W.
5303 if ((CN->getZExtValue() & 0xfff) == 0)
5304 return SDValue();
5305 // Return if the constand can be composed by a single ADDI with
5306 // the zero register.
5307 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5308 return SDValue();
5309 }
5310
5311 lsb = SMIdx;
5312 NewOperand = FirstOperand;
5313 }
5314
5315 msb = lsb + SMLen - 1;
5316 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5317 DAG.getConstant(msb, DL, GRLenVT),
5318 DAG.getConstant(lsb, DL, GRLenVT));
5319 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5320 return NR0;
5321 // Try to optimize to
5322 // bstrpick $Rd, $Rs, msb, lsb
5323 // slli $Rd, $Rd, lsb
5324 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5325 DAG.getConstant(lsb, DL, GRLenVT));
5326}
5327
5330 const LoongArchSubtarget &Subtarget) {
5331 // BSTRPICK requires the 32S feature.
5332 if (!Subtarget.has32S())
5333 return SDValue();
5334
5335 if (DCI.isBeforeLegalizeOps())
5336 return SDValue();
5337
5338 // $dst = srl (and $src, Mask), Shamt
5339 // =>
5340 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5341 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5342 //
5343
5344 SDValue FirstOperand = N->getOperand(0);
5345 ConstantSDNode *CN;
5346 EVT ValTy = N->getValueType(0);
5347 SDLoc DL(N);
5348 MVT GRLenVT = Subtarget.getGRLenVT();
5349 unsigned MaskIdx, MaskLen;
5350 uint64_t Shamt;
5351
5352 // The first operand must be an AND and the second operand of the AND must be
5353 // a shifted mask.
5354 if (FirstOperand.getOpcode() != ISD::AND ||
5355 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5356 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5357 return SDValue();
5358
5359 // The second operand (shift amount) must be an immediate.
5360 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5361 return SDValue();
5362
5363 Shamt = CN->getZExtValue();
5364 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5365 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5366 FirstOperand->getOperand(0),
5367 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5368 DAG.getConstant(Shamt, DL, GRLenVT));
5369
5370 return SDValue();
5371}
5372
5373// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5374// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5375static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5376 unsigned Depth) {
5377 // Limit recursion.
5379 return false;
5380 switch (Src.getOpcode()) {
5381 case ISD::SETCC:
5382 case ISD::TRUNCATE:
5383 return Src.getOperand(0).getValueSizeInBits() == Size;
5384 case ISD::FREEZE:
5385 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5386 case ISD::AND:
5387 case ISD::XOR:
5388 case ISD::OR:
5389 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5390 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5391 case ISD::SELECT:
5392 case ISD::VSELECT:
5393 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5394 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5395 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5396 case ISD::BUILD_VECTOR:
5397 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5398 ISD::isBuildVectorAllOnes(Src.getNode());
5399 }
5400 return false;
5401}
5402
5403// Helper to push sign extension of vXi1 SETCC result through bitops.
5405 SDValue Src, const SDLoc &DL) {
5406 switch (Src.getOpcode()) {
5407 case ISD::SETCC:
5408 case ISD::FREEZE:
5409 case ISD::TRUNCATE:
5410 case ISD::BUILD_VECTOR:
5411 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5412 case ISD::AND:
5413 case ISD::XOR:
5414 case ISD::OR:
5415 return DAG.getNode(
5416 Src.getOpcode(), DL, SExtVT,
5417 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5418 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5419 case ISD::SELECT:
5420 case ISD::VSELECT:
5421 return DAG.getSelect(
5422 DL, SExtVT, Src.getOperand(0),
5423 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5424 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5425 }
5426 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5427}
5428
5429static SDValue
5432 const LoongArchSubtarget &Subtarget) {
5433 SDLoc DL(N);
5434 EVT VT = N->getValueType(0);
5435 SDValue Src = N->getOperand(0);
5436 EVT SrcVT = Src.getValueType();
5437
5438 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5439 return SDValue();
5440
5441 bool UseLASX;
5442 unsigned Opc = ISD::DELETED_NODE;
5443 EVT CmpVT = Src.getOperand(0).getValueType();
5444 EVT EltVT = CmpVT.getVectorElementType();
5445
5446 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5447 UseLASX = false;
5448 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5449 CmpVT.getSizeInBits() == 256)
5450 UseLASX = true;
5451 else
5452 return SDValue();
5453
5454 SDValue SrcN1 = Src.getOperand(1);
5455 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5456 default:
5457 break;
5458 case ISD::SETEQ:
5459 // x == 0 => not (vmsknez.b x)
5460 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5461 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5462 break;
5463 case ISD::SETGT:
5464 // x > -1 => vmskgez.b x
5465 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5466 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5467 break;
5468 case ISD::SETGE:
5469 // x >= 0 => vmskgez.b x
5470 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5471 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5472 break;
5473 case ISD::SETLT:
5474 // x < 0 => vmskltz.{b,h,w,d} x
5475 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5476 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5477 EltVT == MVT::i64))
5478 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5479 break;
5480 case ISD::SETLE:
5481 // x <= -1 => vmskltz.{b,h,w,d} x
5482 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5483 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5484 EltVT == MVT::i64))
5485 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5486 break;
5487 case ISD::SETNE:
5488 // x != 0 => vmsknez.b x
5489 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5490 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5491 break;
5492 }
5493
5494 if (Opc == ISD::DELETED_NODE)
5495 return SDValue();
5496
5497 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5499 V = DAG.getZExtOrTrunc(V, DL, T);
5500 return DAG.getBitcast(VT, V);
5501}
5502
5505 const LoongArchSubtarget &Subtarget) {
5506 SDLoc DL(N);
5507 EVT VT = N->getValueType(0);
5508 SDValue Src = N->getOperand(0);
5509 EVT SrcVT = Src.getValueType();
5510 MVT GRLenVT = Subtarget.getGRLenVT();
5511
5512 if (!DCI.isBeforeLegalizeOps())
5513 return SDValue();
5514
5515 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5516 return SDValue();
5517
5518 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5519 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5520 if (Res)
5521 return Res;
5522
5523 // Generate vXi1 using [X]VMSKLTZ
5524 MVT SExtVT;
5525 unsigned Opc;
5526 bool UseLASX = false;
5527 bool PropagateSExt = false;
5528
5529 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5530 EVT CmpVT = Src.getOperand(0).getValueType();
5531 if (CmpVT.getSizeInBits() > 256)
5532 return SDValue();
5533 }
5534
5535 switch (SrcVT.getSimpleVT().SimpleTy) {
5536 default:
5537 return SDValue();
5538 case MVT::v2i1:
5539 SExtVT = MVT::v2i64;
5540 break;
5541 case MVT::v4i1:
5542 SExtVT = MVT::v4i32;
5543 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5544 SExtVT = MVT::v4i64;
5545 UseLASX = true;
5546 PropagateSExt = true;
5547 }
5548 break;
5549 case MVT::v8i1:
5550 SExtVT = MVT::v8i16;
5551 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5552 SExtVT = MVT::v8i32;
5553 UseLASX = true;
5554 PropagateSExt = true;
5555 }
5556 break;
5557 case MVT::v16i1:
5558 SExtVT = MVT::v16i8;
5559 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5560 SExtVT = MVT::v16i16;
5561 UseLASX = true;
5562 PropagateSExt = true;
5563 }
5564 break;
5565 case MVT::v32i1:
5566 SExtVT = MVT::v32i8;
5567 UseLASX = true;
5568 break;
5569 };
5570 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5571 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5572
5573 SDValue V;
5574 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5575 if (Src.getSimpleValueType() == MVT::v32i8) {
5576 SDValue Lo, Hi;
5577 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5578 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5579 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5580 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5581 DAG.getShiftAmountConstant(16, GRLenVT, DL));
5582 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5583 } else if (UseLASX) {
5584 return SDValue();
5585 }
5586 }
5587
5588 if (!V) {
5589 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5590 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5591 }
5592
5594 V = DAG.getZExtOrTrunc(V, DL, T);
5595 return DAG.getBitcast(VT, V);
5596}
5597
5600 const LoongArchSubtarget &Subtarget) {
5601 MVT GRLenVT = Subtarget.getGRLenVT();
5602 EVT ValTy = N->getValueType(0);
5603 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5604 ConstantSDNode *CN0, *CN1;
5605 SDLoc DL(N);
5606 unsigned ValBits = ValTy.getSizeInBits();
5607 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5608 unsigned Shamt;
5609 bool SwapAndRetried = false;
5610
5611 // BSTRPICK requires the 32S feature.
5612 if (!Subtarget.has32S())
5613 return SDValue();
5614
5615 if (DCI.isBeforeLegalizeOps())
5616 return SDValue();
5617
5618 if (ValBits != 32 && ValBits != 64)
5619 return SDValue();
5620
5621Retry:
5622 // 1st pattern to match BSTRINS:
5623 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5624 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5625 // =>
5626 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5627 if (N0.getOpcode() == ISD::AND &&
5628 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5629 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5630 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5631 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5632 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5633 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5634 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5635 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5636 (MaskIdx0 + MaskLen0 <= ValBits)) {
5637 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5638 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5639 N1.getOperand(0).getOperand(0),
5640 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5641 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5642 }
5643
5644 // 2nd pattern to match BSTRINS:
5645 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5646 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5647 // =>
5648 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5649 if (N0.getOpcode() == ISD::AND &&
5650 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5651 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5652 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5653 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5654 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5655 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5656 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5657 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5658 (MaskIdx0 + MaskLen0 <= ValBits)) {
5659 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5660 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5661 N1.getOperand(0).getOperand(0),
5662 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5663 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5664 }
5665
5666 // 3rd pattern to match BSTRINS:
5667 // R = or (and X, mask0), (and Y, mask1)
5668 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5669 // =>
5670 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5671 // where msb = lsb + size - 1
5672 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5673 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5674 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5675 (MaskIdx0 + MaskLen0 <= 64) &&
5676 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5677 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5678 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5679 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5680 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5681 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5682 DAG.getConstant(ValBits == 32
5683 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5684 : (MaskIdx0 + MaskLen0 - 1),
5685 DL, GRLenVT),
5686 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5687 }
5688
5689 // 4th pattern to match BSTRINS:
5690 // R = or (and X, mask), (shl Y, shamt)
5691 // where mask = (2**shamt - 1)
5692 // =>
5693 // R = BSTRINS X, Y, ValBits - 1, shamt
5694 // where ValBits = 32 or 64
5695 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5696 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5697 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5698 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5699 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5700 (MaskIdx0 + MaskLen0 <= ValBits)) {
5701 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5702 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5703 N1.getOperand(0),
5704 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5705 DAG.getConstant(Shamt, DL, GRLenVT));
5706 }
5707
5708 // 5th pattern to match BSTRINS:
5709 // R = or (and X, mask), const
5710 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5711 // =>
5712 // R = BSTRINS X, (const >> lsb), msb, lsb
5713 // where msb = lsb + size - 1
5714 if (N0.getOpcode() == ISD::AND &&
5715 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5716 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5717 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5718 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5719 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5720 return DAG.getNode(
5721 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5722 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5723 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5724 : (MaskIdx0 + MaskLen0 - 1),
5725 DL, GRLenVT),
5726 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5727 }
5728
5729 // 6th pattern.
5730 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5731 // by the incoming bits are known to be zero.
5732 // =>
5733 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5734 //
5735 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5736 // pattern is more common than the 1st. So we put the 1st before the 6th in
5737 // order to match as many nodes as possible.
5738 ConstantSDNode *CNMask, *CNShamt;
5739 unsigned MaskIdx, MaskLen;
5740 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5741 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5742 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5743 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5744 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5745 Shamt = CNShamt->getZExtValue();
5746 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5747 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5748 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5749 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5750 N1.getOperand(0).getOperand(0),
5751 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5752 DAG.getConstant(Shamt, DL, GRLenVT));
5753 }
5754 }
5755
5756 // 7th pattern.
5757 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5758 // overwritten by the incoming bits are known to be zero.
5759 // =>
5760 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5761 //
5762 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5763 // before the 7th in order to match as many nodes as possible.
5764 if (N1.getOpcode() == ISD::AND &&
5765 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5766 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5767 N1.getOperand(0).getOpcode() == ISD::SHL &&
5768 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5769 CNShamt->getZExtValue() == MaskIdx) {
5770 APInt ShMask(ValBits, CNMask->getZExtValue());
5771 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5772 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5773 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5774 N1.getOperand(0).getOperand(0),
5775 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5776 DAG.getConstant(MaskIdx, DL, GRLenVT));
5777 }
5778 }
5779
5780 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5781 if (!SwapAndRetried) {
5782 std::swap(N0, N1);
5783 SwapAndRetried = true;
5784 goto Retry;
5785 }
5786
5787 SwapAndRetried = false;
5788Retry2:
5789 // 8th pattern.
5790 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5791 // the incoming bits are known to be zero.
5792 // =>
5793 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5794 //
5795 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5796 // we put it here in order to match as many nodes as possible or generate less
5797 // instructions.
5798 if (N1.getOpcode() == ISD::AND &&
5799 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5800 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5801 APInt ShMask(ValBits, CNMask->getZExtValue());
5802 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5803 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5804 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5805 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5806 N1->getOperand(0),
5807 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5808 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5809 DAG.getConstant(MaskIdx, DL, GRLenVT));
5810 }
5811 }
5812 // Swap N0/N1 and retry.
5813 if (!SwapAndRetried) {
5814 std::swap(N0, N1);
5815 SwapAndRetried = true;
5816 goto Retry2;
5817 }
5818
5819 return SDValue();
5820}
5821
5822static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5823 ExtType = ISD::NON_EXTLOAD;
5824
5825 switch (V.getNode()->getOpcode()) {
5826 case ISD::LOAD: {
5827 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5828 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5829 (LoadNode->getMemoryVT() == MVT::i16)) {
5830 ExtType = LoadNode->getExtensionType();
5831 return true;
5832 }
5833 return false;
5834 }
5835 case ISD::AssertSext: {
5836 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5837 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5838 ExtType = ISD::SEXTLOAD;
5839 return true;
5840 }
5841 return false;
5842 }
5843 case ISD::AssertZext: {
5844 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5845 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5846 ExtType = ISD::ZEXTLOAD;
5847 return true;
5848 }
5849 return false;
5850 }
5851 default:
5852 return false;
5853 }
5854
5855 return false;
5856}
5857
5858// Eliminate redundant truncation and zero-extension nodes.
5859// * Case 1:
5860// +------------+ +------------+ +------------+
5861// | Input1 | | Input2 | | CC |
5862// +------------+ +------------+ +------------+
5863// | | |
5864// V V +----+
5865// +------------+ +------------+ |
5866// | TRUNCATE | | TRUNCATE | |
5867// +------------+ +------------+ |
5868// | | |
5869// V V |
5870// +------------+ +------------+ |
5871// | ZERO_EXT | | ZERO_EXT | |
5872// +------------+ +------------+ |
5873// | | |
5874// | +-------------+ |
5875// V V | |
5876// +----------------+ | |
5877// | AND | | |
5878// +----------------+ | |
5879// | | |
5880// +---------------+ | |
5881// | | |
5882// V V V
5883// +-------------+
5884// | CMP |
5885// +-------------+
5886// * Case 2:
5887// +------------+ +------------+ +-------------+ +------------+ +------------+
5888// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5889// +------------+ +------------+ +-------------+ +------------+ +------------+
5890// | | | | |
5891// V | | | |
5892// +------------+ | | | |
5893// | XOR |<---------------------+ | |
5894// +------------+ | | |
5895// | | | |
5896// V V +---------------+ |
5897// +------------+ +------------+ | |
5898// | TRUNCATE | | TRUNCATE | | +-------------------------+
5899// +------------+ +------------+ | |
5900// | | | |
5901// V V | |
5902// +------------+ +------------+ | |
5903// | ZERO_EXT | | ZERO_EXT | | |
5904// +------------+ +------------+ | |
5905// | | | |
5906// V V | |
5907// +----------------+ | |
5908// | AND | | |
5909// +----------------+ | |
5910// | | |
5911// +---------------+ | |
5912// | | |
5913// V V V
5914// +-------------+
5915// | CMP |
5916// +-------------+
5919 const LoongArchSubtarget &Subtarget) {
5920 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5921
5922 SDNode *AndNode = N->getOperand(0).getNode();
5923 if (AndNode->getOpcode() != ISD::AND)
5924 return SDValue();
5925
5926 SDValue AndInputValue2 = AndNode->getOperand(1);
5927 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5928 return SDValue();
5929
5930 SDValue CmpInputValue = N->getOperand(1);
5931 SDValue AndInputValue1 = AndNode->getOperand(0);
5932 if (AndInputValue1.getOpcode() == ISD::XOR) {
5933 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5934 return SDValue();
5935 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5936 if (!CN || !CN->isAllOnes())
5937 return SDValue();
5938 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5939 if (!CN || !CN->isZero())
5940 return SDValue();
5941 AndInputValue1 = AndInputValue1.getOperand(0);
5942 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5943 return SDValue();
5944 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5945 if (AndInputValue2 != CmpInputValue)
5946 return SDValue();
5947 } else {
5948 return SDValue();
5949 }
5950
5951 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5952 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5953 return SDValue();
5954
5955 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5956 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5957 return SDValue();
5958
5959 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5960 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5961 ISD::LoadExtType ExtType1;
5962 ISD::LoadExtType ExtType2;
5963
5964 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5965 !checkValueWidth(TruncInputValue2, ExtType2))
5966 return SDValue();
5967
5968 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5969 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5970 return SDValue();
5971
5972 if ((ExtType2 != ISD::ZEXTLOAD) &&
5973 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5974 return SDValue();
5975
5976 // These truncation and zero-extension nodes are not necessary, remove them.
5977 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5978 TruncInputValue1, TruncInputValue2);
5979 SDValue NewSetCC =
5980 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5981 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5982 return SDValue(N, 0);
5983}
5984
5985// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5988 const LoongArchSubtarget &Subtarget) {
5989 if (DCI.isBeforeLegalizeOps())
5990 return SDValue();
5991
5992 SDValue Src = N->getOperand(0);
5993 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5994 return SDValue();
5995
5996 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5997 Src.getOperand(0));
5998}
5999
6000// Perform common combines for BR_CC and SELECT_CC conditions.
6001static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6002 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6003 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6004
6005 // As far as arithmetic right shift always saves the sign,
6006 // shift can be omitted.
6007 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6008 // setge (sra X, N), 0 -> setge X, 0
6009 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6010 LHS.getOpcode() == ISD::SRA) {
6011 LHS = LHS.getOperand(0);
6012 return true;
6013 }
6014
6015 if (!ISD::isIntEqualitySetCC(CCVal))
6016 return false;
6017
6018 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6019 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6020 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6021 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6022 // If we're looking for eq 0 instead of ne 0, we need to invert the
6023 // condition.
6024 bool Invert = CCVal == ISD::SETEQ;
6025 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6026 if (Invert)
6027 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6028
6029 RHS = LHS.getOperand(1);
6030 LHS = LHS.getOperand(0);
6031 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6032
6033 CC = DAG.getCondCode(CCVal);
6034 return true;
6035 }
6036
6037 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6038 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6039 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6040 SDValue LHS0 = LHS.getOperand(0);
6041 if (LHS0.getOpcode() == ISD::AND &&
6042 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6043 uint64_t Mask = LHS0.getConstantOperandVal(1);
6044 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6045 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6046 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6047 CC = DAG.getCondCode(CCVal);
6048
6049 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6050 LHS = LHS0.getOperand(0);
6051 if (ShAmt != 0)
6052 LHS =
6053 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6054 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6055 return true;
6056 }
6057 }
6058 }
6059
6060 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6061 // This can occur when legalizing some floating point comparisons.
6062 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6063 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6064 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6065 CC = DAG.getCondCode(CCVal);
6066 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6067 return true;
6068 }
6069
6070 return false;
6071}
6072
6075 const LoongArchSubtarget &Subtarget) {
6076 SDValue LHS = N->getOperand(1);
6077 SDValue RHS = N->getOperand(2);
6078 SDValue CC = N->getOperand(3);
6079 SDLoc DL(N);
6080
6081 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6082 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6083 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6084
6085 return SDValue();
6086}
6087
6090 const LoongArchSubtarget &Subtarget) {
6091 // Transform
6092 SDValue LHS = N->getOperand(0);
6093 SDValue RHS = N->getOperand(1);
6094 SDValue CC = N->getOperand(2);
6095 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6096 SDValue TrueV = N->getOperand(3);
6097 SDValue FalseV = N->getOperand(4);
6098 SDLoc DL(N);
6099 EVT VT = N->getValueType(0);
6100
6101 // If the True and False values are the same, we don't need a select_cc.
6102 if (TrueV == FalseV)
6103 return TrueV;
6104
6105 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6106 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6107 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6109 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6110 if (CCVal == ISD::CondCode::SETGE)
6111 std::swap(TrueV, FalseV);
6112
6113 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6114 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6115 // Only handle simm12, if it is not in this range, it can be considered as
6116 // register.
6117 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6118 isInt<12>(TrueSImm - FalseSImm)) {
6119 SDValue SRA =
6120 DAG.getNode(ISD::SRA, DL, VT, LHS,
6121 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6122 SDValue AND =
6123 DAG.getNode(ISD::AND, DL, VT, SRA,
6124 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6125 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6126 }
6127
6128 if (CCVal == ISD::CondCode::SETGE)
6129 std::swap(TrueV, FalseV);
6130 }
6131
6132 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6133 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6134 {LHS, RHS, CC, TrueV, FalseV});
6135
6136 return SDValue();
6137}
6138
6139template <unsigned N>
6141 SelectionDAG &DAG,
6142 const LoongArchSubtarget &Subtarget,
6143 bool IsSigned = false) {
6144 SDLoc DL(Node);
6145 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6146 // Check the ImmArg.
6147 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6148 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6149 DAG.getContext()->emitError(Node->getOperationName(0) +
6150 ": argument out of range.");
6151 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6152 }
6153 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6154}
6155
6156template <unsigned N>
6157static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6158 SelectionDAG &DAG, bool IsSigned = false) {
6159 SDLoc DL(Node);
6160 EVT ResTy = Node->getValueType(0);
6161 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6162
6163 // Check the ImmArg.
6164 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6165 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6166 DAG.getContext()->emitError(Node->getOperationName(0) +
6167 ": argument out of range.");
6168 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6169 }
6170 return DAG.getConstant(
6172 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6173 DL, ResTy);
6174}
6175
6177 SDLoc DL(Node);
6178 EVT ResTy = Node->getValueType(0);
6179 SDValue Vec = Node->getOperand(2);
6180 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6181 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6182}
6183
6185 SDLoc DL(Node);
6186 EVT ResTy = Node->getValueType(0);
6187 SDValue One = DAG.getConstant(1, DL, ResTy);
6188 SDValue Bit =
6189 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6190
6191 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6192 DAG.getNOT(DL, Bit, ResTy));
6193}
6194
6195template <unsigned N>
6197 SDLoc DL(Node);
6198 EVT ResTy = Node->getValueType(0);
6199 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6200 // Check the unsigned ImmArg.
6201 if (!isUInt<N>(CImm->getZExtValue())) {
6202 DAG.getContext()->emitError(Node->getOperationName(0) +
6203 ": argument out of range.");
6204 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6205 }
6206
6207 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6208 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6209
6210 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6211}
6212
6213template <unsigned N>
6215 SDLoc DL(Node);
6216 EVT ResTy = Node->getValueType(0);
6217 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6218 // Check the unsigned ImmArg.
6219 if (!isUInt<N>(CImm->getZExtValue())) {
6220 DAG.getContext()->emitError(Node->getOperationName(0) +
6221 ": argument out of range.");
6222 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6223 }
6224
6225 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6226 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6227 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6228}
6229
6230template <unsigned N>
6232 SDLoc DL(Node);
6233 EVT ResTy = Node->getValueType(0);
6234 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6235 // Check the unsigned ImmArg.
6236 if (!isUInt<N>(CImm->getZExtValue())) {
6237 DAG.getContext()->emitError(Node->getOperationName(0) +
6238 ": argument out of range.");
6239 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6240 }
6241
6242 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6243 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6244 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6245}
6246
6247template <unsigned W>
6249 unsigned ResOp) {
6250 unsigned Imm = N->getConstantOperandVal(2);
6251 if (!isUInt<W>(Imm)) {
6252 const StringRef ErrorMsg = "argument out of range";
6253 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6254 return DAG.getUNDEF(N->getValueType(0));
6255 }
6256 SDLoc DL(N);
6257 SDValue Vec = N->getOperand(1);
6258 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6260 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6261}
6262
6263static SDValue
6266 const LoongArchSubtarget &Subtarget) {
6267 SDLoc DL(N);
6268 switch (N->getConstantOperandVal(0)) {
6269 default:
6270 break;
6271 case Intrinsic::loongarch_lsx_vadd_b:
6272 case Intrinsic::loongarch_lsx_vadd_h:
6273 case Intrinsic::loongarch_lsx_vadd_w:
6274 case Intrinsic::loongarch_lsx_vadd_d:
6275 case Intrinsic::loongarch_lasx_xvadd_b:
6276 case Intrinsic::loongarch_lasx_xvadd_h:
6277 case Intrinsic::loongarch_lasx_xvadd_w:
6278 case Intrinsic::loongarch_lasx_xvadd_d:
6279 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6280 N->getOperand(2));
6281 case Intrinsic::loongarch_lsx_vaddi_bu:
6282 case Intrinsic::loongarch_lsx_vaddi_hu:
6283 case Intrinsic::loongarch_lsx_vaddi_wu:
6284 case Intrinsic::loongarch_lsx_vaddi_du:
6285 case Intrinsic::loongarch_lasx_xvaddi_bu:
6286 case Intrinsic::loongarch_lasx_xvaddi_hu:
6287 case Intrinsic::loongarch_lasx_xvaddi_wu:
6288 case Intrinsic::loongarch_lasx_xvaddi_du:
6289 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6290 lowerVectorSplatImm<5>(N, 2, DAG));
6291 case Intrinsic::loongarch_lsx_vsub_b:
6292 case Intrinsic::loongarch_lsx_vsub_h:
6293 case Intrinsic::loongarch_lsx_vsub_w:
6294 case Intrinsic::loongarch_lsx_vsub_d:
6295 case Intrinsic::loongarch_lasx_xvsub_b:
6296 case Intrinsic::loongarch_lasx_xvsub_h:
6297 case Intrinsic::loongarch_lasx_xvsub_w:
6298 case Intrinsic::loongarch_lasx_xvsub_d:
6299 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6300 N->getOperand(2));
6301 case Intrinsic::loongarch_lsx_vsubi_bu:
6302 case Intrinsic::loongarch_lsx_vsubi_hu:
6303 case Intrinsic::loongarch_lsx_vsubi_wu:
6304 case Intrinsic::loongarch_lsx_vsubi_du:
6305 case Intrinsic::loongarch_lasx_xvsubi_bu:
6306 case Intrinsic::loongarch_lasx_xvsubi_hu:
6307 case Intrinsic::loongarch_lasx_xvsubi_wu:
6308 case Intrinsic::loongarch_lasx_xvsubi_du:
6309 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6310 lowerVectorSplatImm<5>(N, 2, DAG));
6311 case Intrinsic::loongarch_lsx_vneg_b:
6312 case Intrinsic::loongarch_lsx_vneg_h:
6313 case Intrinsic::loongarch_lsx_vneg_w:
6314 case Intrinsic::loongarch_lsx_vneg_d:
6315 case Intrinsic::loongarch_lasx_xvneg_b:
6316 case Intrinsic::loongarch_lasx_xvneg_h:
6317 case Intrinsic::loongarch_lasx_xvneg_w:
6318 case Intrinsic::loongarch_lasx_xvneg_d:
6319 return DAG.getNode(
6320 ISD::SUB, DL, N->getValueType(0),
6321 DAG.getConstant(
6322 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6323 /*isSigned=*/true),
6324 SDLoc(N), N->getValueType(0)),
6325 N->getOperand(1));
6326 case Intrinsic::loongarch_lsx_vmax_b:
6327 case Intrinsic::loongarch_lsx_vmax_h:
6328 case Intrinsic::loongarch_lsx_vmax_w:
6329 case Intrinsic::loongarch_lsx_vmax_d:
6330 case Intrinsic::loongarch_lasx_xvmax_b:
6331 case Intrinsic::loongarch_lasx_xvmax_h:
6332 case Intrinsic::loongarch_lasx_xvmax_w:
6333 case Intrinsic::loongarch_lasx_xvmax_d:
6334 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6335 N->getOperand(2));
6336 case Intrinsic::loongarch_lsx_vmax_bu:
6337 case Intrinsic::loongarch_lsx_vmax_hu:
6338 case Intrinsic::loongarch_lsx_vmax_wu:
6339 case Intrinsic::loongarch_lsx_vmax_du:
6340 case Intrinsic::loongarch_lasx_xvmax_bu:
6341 case Intrinsic::loongarch_lasx_xvmax_hu:
6342 case Intrinsic::loongarch_lasx_xvmax_wu:
6343 case Intrinsic::loongarch_lasx_xvmax_du:
6344 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6345 N->getOperand(2));
6346 case Intrinsic::loongarch_lsx_vmaxi_b:
6347 case Intrinsic::loongarch_lsx_vmaxi_h:
6348 case Intrinsic::loongarch_lsx_vmaxi_w:
6349 case Intrinsic::loongarch_lsx_vmaxi_d:
6350 case Intrinsic::loongarch_lasx_xvmaxi_b:
6351 case Intrinsic::loongarch_lasx_xvmaxi_h:
6352 case Intrinsic::loongarch_lasx_xvmaxi_w:
6353 case Intrinsic::loongarch_lasx_xvmaxi_d:
6354 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6355 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6356 case Intrinsic::loongarch_lsx_vmaxi_bu:
6357 case Intrinsic::loongarch_lsx_vmaxi_hu:
6358 case Intrinsic::loongarch_lsx_vmaxi_wu:
6359 case Intrinsic::loongarch_lsx_vmaxi_du:
6360 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6361 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6362 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6363 case Intrinsic::loongarch_lasx_xvmaxi_du:
6364 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6365 lowerVectorSplatImm<5>(N, 2, DAG));
6366 case Intrinsic::loongarch_lsx_vmin_b:
6367 case Intrinsic::loongarch_lsx_vmin_h:
6368 case Intrinsic::loongarch_lsx_vmin_w:
6369 case Intrinsic::loongarch_lsx_vmin_d:
6370 case Intrinsic::loongarch_lasx_xvmin_b:
6371 case Intrinsic::loongarch_lasx_xvmin_h:
6372 case Intrinsic::loongarch_lasx_xvmin_w:
6373 case Intrinsic::loongarch_lasx_xvmin_d:
6374 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6375 N->getOperand(2));
6376 case Intrinsic::loongarch_lsx_vmin_bu:
6377 case Intrinsic::loongarch_lsx_vmin_hu:
6378 case Intrinsic::loongarch_lsx_vmin_wu:
6379 case Intrinsic::loongarch_lsx_vmin_du:
6380 case Intrinsic::loongarch_lasx_xvmin_bu:
6381 case Intrinsic::loongarch_lasx_xvmin_hu:
6382 case Intrinsic::loongarch_lasx_xvmin_wu:
6383 case Intrinsic::loongarch_lasx_xvmin_du:
6384 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6385 N->getOperand(2));
6386 case Intrinsic::loongarch_lsx_vmini_b:
6387 case Intrinsic::loongarch_lsx_vmini_h:
6388 case Intrinsic::loongarch_lsx_vmini_w:
6389 case Intrinsic::loongarch_lsx_vmini_d:
6390 case Intrinsic::loongarch_lasx_xvmini_b:
6391 case Intrinsic::loongarch_lasx_xvmini_h:
6392 case Intrinsic::loongarch_lasx_xvmini_w:
6393 case Intrinsic::loongarch_lasx_xvmini_d:
6394 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6395 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6396 case Intrinsic::loongarch_lsx_vmini_bu:
6397 case Intrinsic::loongarch_lsx_vmini_hu:
6398 case Intrinsic::loongarch_lsx_vmini_wu:
6399 case Intrinsic::loongarch_lsx_vmini_du:
6400 case Intrinsic::loongarch_lasx_xvmini_bu:
6401 case Intrinsic::loongarch_lasx_xvmini_hu:
6402 case Intrinsic::loongarch_lasx_xvmini_wu:
6403 case Intrinsic::loongarch_lasx_xvmini_du:
6404 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6405 lowerVectorSplatImm<5>(N, 2, DAG));
6406 case Intrinsic::loongarch_lsx_vmul_b:
6407 case Intrinsic::loongarch_lsx_vmul_h:
6408 case Intrinsic::loongarch_lsx_vmul_w:
6409 case Intrinsic::loongarch_lsx_vmul_d:
6410 case Intrinsic::loongarch_lasx_xvmul_b:
6411 case Intrinsic::loongarch_lasx_xvmul_h:
6412 case Intrinsic::loongarch_lasx_xvmul_w:
6413 case Intrinsic::loongarch_lasx_xvmul_d:
6414 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6415 N->getOperand(2));
6416 case Intrinsic::loongarch_lsx_vmadd_b:
6417 case Intrinsic::loongarch_lsx_vmadd_h:
6418 case Intrinsic::loongarch_lsx_vmadd_w:
6419 case Intrinsic::loongarch_lsx_vmadd_d:
6420 case Intrinsic::loongarch_lasx_xvmadd_b:
6421 case Intrinsic::loongarch_lasx_xvmadd_h:
6422 case Intrinsic::loongarch_lasx_xvmadd_w:
6423 case Intrinsic::loongarch_lasx_xvmadd_d: {
6424 EVT ResTy = N->getValueType(0);
6425 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6426 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6427 N->getOperand(3)));
6428 }
6429 case Intrinsic::loongarch_lsx_vmsub_b:
6430 case Intrinsic::loongarch_lsx_vmsub_h:
6431 case Intrinsic::loongarch_lsx_vmsub_w:
6432 case Intrinsic::loongarch_lsx_vmsub_d:
6433 case Intrinsic::loongarch_lasx_xvmsub_b:
6434 case Intrinsic::loongarch_lasx_xvmsub_h:
6435 case Intrinsic::loongarch_lasx_xvmsub_w:
6436 case Intrinsic::loongarch_lasx_xvmsub_d: {
6437 EVT ResTy = N->getValueType(0);
6438 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6439 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6440 N->getOperand(3)));
6441 }
6442 case Intrinsic::loongarch_lsx_vdiv_b:
6443 case Intrinsic::loongarch_lsx_vdiv_h:
6444 case Intrinsic::loongarch_lsx_vdiv_w:
6445 case Intrinsic::loongarch_lsx_vdiv_d:
6446 case Intrinsic::loongarch_lasx_xvdiv_b:
6447 case Intrinsic::loongarch_lasx_xvdiv_h:
6448 case Intrinsic::loongarch_lasx_xvdiv_w:
6449 case Intrinsic::loongarch_lasx_xvdiv_d:
6450 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6451 N->getOperand(2));
6452 case Intrinsic::loongarch_lsx_vdiv_bu:
6453 case Intrinsic::loongarch_lsx_vdiv_hu:
6454 case Intrinsic::loongarch_lsx_vdiv_wu:
6455 case Intrinsic::loongarch_lsx_vdiv_du:
6456 case Intrinsic::loongarch_lasx_xvdiv_bu:
6457 case Intrinsic::loongarch_lasx_xvdiv_hu:
6458 case Intrinsic::loongarch_lasx_xvdiv_wu:
6459 case Intrinsic::loongarch_lasx_xvdiv_du:
6460 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6461 N->getOperand(2));
6462 case Intrinsic::loongarch_lsx_vmod_b:
6463 case Intrinsic::loongarch_lsx_vmod_h:
6464 case Intrinsic::loongarch_lsx_vmod_w:
6465 case Intrinsic::loongarch_lsx_vmod_d:
6466 case Intrinsic::loongarch_lasx_xvmod_b:
6467 case Intrinsic::loongarch_lasx_xvmod_h:
6468 case Intrinsic::loongarch_lasx_xvmod_w:
6469 case Intrinsic::loongarch_lasx_xvmod_d:
6470 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6471 N->getOperand(2));
6472 case Intrinsic::loongarch_lsx_vmod_bu:
6473 case Intrinsic::loongarch_lsx_vmod_hu:
6474 case Intrinsic::loongarch_lsx_vmod_wu:
6475 case Intrinsic::loongarch_lsx_vmod_du:
6476 case Intrinsic::loongarch_lasx_xvmod_bu:
6477 case Intrinsic::loongarch_lasx_xvmod_hu:
6478 case Intrinsic::loongarch_lasx_xvmod_wu:
6479 case Intrinsic::loongarch_lasx_xvmod_du:
6480 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6481 N->getOperand(2));
6482 case Intrinsic::loongarch_lsx_vand_v:
6483 case Intrinsic::loongarch_lasx_xvand_v:
6484 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6485 N->getOperand(2));
6486 case Intrinsic::loongarch_lsx_vor_v:
6487 case Intrinsic::loongarch_lasx_xvor_v:
6488 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6489 N->getOperand(2));
6490 case Intrinsic::loongarch_lsx_vxor_v:
6491 case Intrinsic::loongarch_lasx_xvxor_v:
6492 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6493 N->getOperand(2));
6494 case Intrinsic::loongarch_lsx_vnor_v:
6495 case Intrinsic::loongarch_lasx_xvnor_v: {
6496 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6497 N->getOperand(2));
6498 return DAG.getNOT(DL, Res, Res->getValueType(0));
6499 }
6500 case Intrinsic::loongarch_lsx_vandi_b:
6501 case Intrinsic::loongarch_lasx_xvandi_b:
6502 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6503 lowerVectorSplatImm<8>(N, 2, DAG));
6504 case Intrinsic::loongarch_lsx_vori_b:
6505 case Intrinsic::loongarch_lasx_xvori_b:
6506 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6507 lowerVectorSplatImm<8>(N, 2, DAG));
6508 case Intrinsic::loongarch_lsx_vxori_b:
6509 case Intrinsic::loongarch_lasx_xvxori_b:
6510 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6511 lowerVectorSplatImm<8>(N, 2, DAG));
6512 case Intrinsic::loongarch_lsx_vsll_b:
6513 case Intrinsic::loongarch_lsx_vsll_h:
6514 case Intrinsic::loongarch_lsx_vsll_w:
6515 case Intrinsic::loongarch_lsx_vsll_d:
6516 case Intrinsic::loongarch_lasx_xvsll_b:
6517 case Intrinsic::loongarch_lasx_xvsll_h:
6518 case Intrinsic::loongarch_lasx_xvsll_w:
6519 case Intrinsic::loongarch_lasx_xvsll_d:
6520 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6521 truncateVecElts(N, DAG));
6522 case Intrinsic::loongarch_lsx_vslli_b:
6523 case Intrinsic::loongarch_lasx_xvslli_b:
6524 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6525 lowerVectorSplatImm<3>(N, 2, DAG));
6526 case Intrinsic::loongarch_lsx_vslli_h:
6527 case Intrinsic::loongarch_lasx_xvslli_h:
6528 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6529 lowerVectorSplatImm<4>(N, 2, DAG));
6530 case Intrinsic::loongarch_lsx_vslli_w:
6531 case Intrinsic::loongarch_lasx_xvslli_w:
6532 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6533 lowerVectorSplatImm<5>(N, 2, DAG));
6534 case Intrinsic::loongarch_lsx_vslli_d:
6535 case Intrinsic::loongarch_lasx_xvslli_d:
6536 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6537 lowerVectorSplatImm<6>(N, 2, DAG));
6538 case Intrinsic::loongarch_lsx_vsrl_b:
6539 case Intrinsic::loongarch_lsx_vsrl_h:
6540 case Intrinsic::loongarch_lsx_vsrl_w:
6541 case Intrinsic::loongarch_lsx_vsrl_d:
6542 case Intrinsic::loongarch_lasx_xvsrl_b:
6543 case Intrinsic::loongarch_lasx_xvsrl_h:
6544 case Intrinsic::loongarch_lasx_xvsrl_w:
6545 case Intrinsic::loongarch_lasx_xvsrl_d:
6546 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6547 truncateVecElts(N, DAG));
6548 case Intrinsic::loongarch_lsx_vsrli_b:
6549 case Intrinsic::loongarch_lasx_xvsrli_b:
6550 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6551 lowerVectorSplatImm<3>(N, 2, DAG));
6552 case Intrinsic::loongarch_lsx_vsrli_h:
6553 case Intrinsic::loongarch_lasx_xvsrli_h:
6554 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6555 lowerVectorSplatImm<4>(N, 2, DAG));
6556 case Intrinsic::loongarch_lsx_vsrli_w:
6557 case Intrinsic::loongarch_lasx_xvsrli_w:
6558 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6559 lowerVectorSplatImm<5>(N, 2, DAG));
6560 case Intrinsic::loongarch_lsx_vsrli_d:
6561 case Intrinsic::loongarch_lasx_xvsrli_d:
6562 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6563 lowerVectorSplatImm<6>(N, 2, DAG));
6564 case Intrinsic::loongarch_lsx_vsra_b:
6565 case Intrinsic::loongarch_lsx_vsra_h:
6566 case Intrinsic::loongarch_lsx_vsra_w:
6567 case Intrinsic::loongarch_lsx_vsra_d:
6568 case Intrinsic::loongarch_lasx_xvsra_b:
6569 case Intrinsic::loongarch_lasx_xvsra_h:
6570 case Intrinsic::loongarch_lasx_xvsra_w:
6571 case Intrinsic::loongarch_lasx_xvsra_d:
6572 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6573 truncateVecElts(N, DAG));
6574 case Intrinsic::loongarch_lsx_vsrai_b:
6575 case Intrinsic::loongarch_lasx_xvsrai_b:
6576 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6577 lowerVectorSplatImm<3>(N, 2, DAG));
6578 case Intrinsic::loongarch_lsx_vsrai_h:
6579 case Intrinsic::loongarch_lasx_xvsrai_h:
6580 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6581 lowerVectorSplatImm<4>(N, 2, DAG));
6582 case Intrinsic::loongarch_lsx_vsrai_w:
6583 case Intrinsic::loongarch_lasx_xvsrai_w:
6584 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6585 lowerVectorSplatImm<5>(N, 2, DAG));
6586 case Intrinsic::loongarch_lsx_vsrai_d:
6587 case Intrinsic::loongarch_lasx_xvsrai_d:
6588 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6589 lowerVectorSplatImm<6>(N, 2, DAG));
6590 case Intrinsic::loongarch_lsx_vclz_b:
6591 case Intrinsic::loongarch_lsx_vclz_h:
6592 case Intrinsic::loongarch_lsx_vclz_w:
6593 case Intrinsic::loongarch_lsx_vclz_d:
6594 case Intrinsic::loongarch_lasx_xvclz_b:
6595 case Intrinsic::loongarch_lasx_xvclz_h:
6596 case Intrinsic::loongarch_lasx_xvclz_w:
6597 case Intrinsic::loongarch_lasx_xvclz_d:
6598 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6599 case Intrinsic::loongarch_lsx_vpcnt_b:
6600 case Intrinsic::loongarch_lsx_vpcnt_h:
6601 case Intrinsic::loongarch_lsx_vpcnt_w:
6602 case Intrinsic::loongarch_lsx_vpcnt_d:
6603 case Intrinsic::loongarch_lasx_xvpcnt_b:
6604 case Intrinsic::loongarch_lasx_xvpcnt_h:
6605 case Intrinsic::loongarch_lasx_xvpcnt_w:
6606 case Intrinsic::loongarch_lasx_xvpcnt_d:
6607 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6608 case Intrinsic::loongarch_lsx_vbitclr_b:
6609 case Intrinsic::loongarch_lsx_vbitclr_h:
6610 case Intrinsic::loongarch_lsx_vbitclr_w:
6611 case Intrinsic::loongarch_lsx_vbitclr_d:
6612 case Intrinsic::loongarch_lasx_xvbitclr_b:
6613 case Intrinsic::loongarch_lasx_xvbitclr_h:
6614 case Intrinsic::loongarch_lasx_xvbitclr_w:
6615 case Intrinsic::loongarch_lasx_xvbitclr_d:
6616 return lowerVectorBitClear(N, DAG);
6617 case Intrinsic::loongarch_lsx_vbitclri_b:
6618 case Intrinsic::loongarch_lasx_xvbitclri_b:
6619 return lowerVectorBitClearImm<3>(N, DAG);
6620 case Intrinsic::loongarch_lsx_vbitclri_h:
6621 case Intrinsic::loongarch_lasx_xvbitclri_h:
6622 return lowerVectorBitClearImm<4>(N, DAG);
6623 case Intrinsic::loongarch_lsx_vbitclri_w:
6624 case Intrinsic::loongarch_lasx_xvbitclri_w:
6625 return lowerVectorBitClearImm<5>(N, DAG);
6626 case Intrinsic::loongarch_lsx_vbitclri_d:
6627 case Intrinsic::loongarch_lasx_xvbitclri_d:
6628 return lowerVectorBitClearImm<6>(N, DAG);
6629 case Intrinsic::loongarch_lsx_vbitset_b:
6630 case Intrinsic::loongarch_lsx_vbitset_h:
6631 case Intrinsic::loongarch_lsx_vbitset_w:
6632 case Intrinsic::loongarch_lsx_vbitset_d:
6633 case Intrinsic::loongarch_lasx_xvbitset_b:
6634 case Intrinsic::loongarch_lasx_xvbitset_h:
6635 case Intrinsic::loongarch_lasx_xvbitset_w:
6636 case Intrinsic::loongarch_lasx_xvbitset_d: {
6637 EVT VecTy = N->getValueType(0);
6638 SDValue One = DAG.getConstant(1, DL, VecTy);
6639 return DAG.getNode(
6640 ISD::OR, DL, VecTy, N->getOperand(1),
6641 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6642 }
6643 case Intrinsic::loongarch_lsx_vbitseti_b:
6644 case Intrinsic::loongarch_lasx_xvbitseti_b:
6645 return lowerVectorBitSetImm<3>(N, DAG);
6646 case Intrinsic::loongarch_lsx_vbitseti_h:
6647 case Intrinsic::loongarch_lasx_xvbitseti_h:
6648 return lowerVectorBitSetImm<4>(N, DAG);
6649 case Intrinsic::loongarch_lsx_vbitseti_w:
6650 case Intrinsic::loongarch_lasx_xvbitseti_w:
6651 return lowerVectorBitSetImm<5>(N, DAG);
6652 case Intrinsic::loongarch_lsx_vbitseti_d:
6653 case Intrinsic::loongarch_lasx_xvbitseti_d:
6654 return lowerVectorBitSetImm<6>(N, DAG);
6655 case Intrinsic::loongarch_lsx_vbitrev_b:
6656 case Intrinsic::loongarch_lsx_vbitrev_h:
6657 case Intrinsic::loongarch_lsx_vbitrev_w:
6658 case Intrinsic::loongarch_lsx_vbitrev_d:
6659 case Intrinsic::loongarch_lasx_xvbitrev_b:
6660 case Intrinsic::loongarch_lasx_xvbitrev_h:
6661 case Intrinsic::loongarch_lasx_xvbitrev_w:
6662 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6663 EVT VecTy = N->getValueType(0);
6664 SDValue One = DAG.getConstant(1, DL, VecTy);
6665 return DAG.getNode(
6666 ISD::XOR, DL, VecTy, N->getOperand(1),
6667 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6668 }
6669 case Intrinsic::loongarch_lsx_vbitrevi_b:
6670 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6671 return lowerVectorBitRevImm<3>(N, DAG);
6672 case Intrinsic::loongarch_lsx_vbitrevi_h:
6673 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6674 return lowerVectorBitRevImm<4>(N, DAG);
6675 case Intrinsic::loongarch_lsx_vbitrevi_w:
6676 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6677 return lowerVectorBitRevImm<5>(N, DAG);
6678 case Intrinsic::loongarch_lsx_vbitrevi_d:
6679 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6680 return lowerVectorBitRevImm<6>(N, DAG);
6681 case Intrinsic::loongarch_lsx_vfadd_s:
6682 case Intrinsic::loongarch_lsx_vfadd_d:
6683 case Intrinsic::loongarch_lasx_xvfadd_s:
6684 case Intrinsic::loongarch_lasx_xvfadd_d:
6685 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6686 N->getOperand(2));
6687 case Intrinsic::loongarch_lsx_vfsub_s:
6688 case Intrinsic::loongarch_lsx_vfsub_d:
6689 case Intrinsic::loongarch_lasx_xvfsub_s:
6690 case Intrinsic::loongarch_lasx_xvfsub_d:
6691 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6692 N->getOperand(2));
6693 case Intrinsic::loongarch_lsx_vfmul_s:
6694 case Intrinsic::loongarch_lsx_vfmul_d:
6695 case Intrinsic::loongarch_lasx_xvfmul_s:
6696 case Intrinsic::loongarch_lasx_xvfmul_d:
6697 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6698 N->getOperand(2));
6699 case Intrinsic::loongarch_lsx_vfdiv_s:
6700 case Intrinsic::loongarch_lsx_vfdiv_d:
6701 case Intrinsic::loongarch_lasx_xvfdiv_s:
6702 case Intrinsic::loongarch_lasx_xvfdiv_d:
6703 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6704 N->getOperand(2));
6705 case Intrinsic::loongarch_lsx_vfmadd_s:
6706 case Intrinsic::loongarch_lsx_vfmadd_d:
6707 case Intrinsic::loongarch_lasx_xvfmadd_s:
6708 case Intrinsic::loongarch_lasx_xvfmadd_d:
6709 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6710 N->getOperand(2), N->getOperand(3));
6711 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6712 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6713 N->getOperand(1), N->getOperand(2),
6714 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6715 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6716 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6717 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6718 N->getOperand(1), N->getOperand(2),
6719 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6720 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6721 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6722 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6723 N->getOperand(1), N->getOperand(2),
6724 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6725 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6726 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6727 N->getOperand(1), N->getOperand(2),
6728 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6729 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6730 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6731 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6732 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6733 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6734 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6735 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6736 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6737 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6738 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6739 N->getOperand(1)));
6740 case Intrinsic::loongarch_lsx_vreplve_b:
6741 case Intrinsic::loongarch_lsx_vreplve_h:
6742 case Intrinsic::loongarch_lsx_vreplve_w:
6743 case Intrinsic::loongarch_lsx_vreplve_d:
6744 case Intrinsic::loongarch_lasx_xvreplve_b:
6745 case Intrinsic::loongarch_lasx_xvreplve_h:
6746 case Intrinsic::loongarch_lasx_xvreplve_w:
6747 case Intrinsic::loongarch_lasx_xvreplve_d:
6748 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6749 N->getOperand(1),
6750 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6751 N->getOperand(2)));
6752 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6753 if (!Subtarget.is64Bit())
6754 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6755 break;
6756 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6757 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6758 if (!Subtarget.is64Bit())
6759 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6760 break;
6761 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6762 if (!Subtarget.is64Bit())
6763 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6764 break;
6765 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6766 if (!Subtarget.is64Bit())
6767 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6768 break;
6769 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6770 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6771 if (!Subtarget.is64Bit())
6772 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6773 break;
6774 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6775 if (!Subtarget.is64Bit())
6776 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6777 break;
6778 case Intrinsic::loongarch_lsx_bz_b:
6779 case Intrinsic::loongarch_lsx_bz_h:
6780 case Intrinsic::loongarch_lsx_bz_w:
6781 case Intrinsic::loongarch_lsx_bz_d:
6782 case Intrinsic::loongarch_lasx_xbz_b:
6783 case Intrinsic::loongarch_lasx_xbz_h:
6784 case Intrinsic::loongarch_lasx_xbz_w:
6785 case Intrinsic::loongarch_lasx_xbz_d:
6786 if (!Subtarget.is64Bit())
6787 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6788 N->getOperand(1));
6789 break;
6790 case Intrinsic::loongarch_lsx_bz_v:
6791 case Intrinsic::loongarch_lasx_xbz_v:
6792 if (!Subtarget.is64Bit())
6793 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6794 N->getOperand(1));
6795 break;
6796 case Intrinsic::loongarch_lsx_bnz_b:
6797 case Intrinsic::loongarch_lsx_bnz_h:
6798 case Intrinsic::loongarch_lsx_bnz_w:
6799 case Intrinsic::loongarch_lsx_bnz_d:
6800 case Intrinsic::loongarch_lasx_xbnz_b:
6801 case Intrinsic::loongarch_lasx_xbnz_h:
6802 case Intrinsic::loongarch_lasx_xbnz_w:
6803 case Intrinsic::loongarch_lasx_xbnz_d:
6804 if (!Subtarget.is64Bit())
6805 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6806 N->getOperand(1));
6807 break;
6808 case Intrinsic::loongarch_lsx_bnz_v:
6809 case Intrinsic::loongarch_lasx_xbnz_v:
6810 if (!Subtarget.is64Bit())
6811 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6812 N->getOperand(1));
6813 break;
6814 case Intrinsic::loongarch_lasx_concat_128_s:
6815 case Intrinsic::loongarch_lasx_concat_128_d:
6816 case Intrinsic::loongarch_lasx_concat_128:
6817 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6818 N->getOperand(1), N->getOperand(2));
6819 }
6820 return SDValue();
6821}
6822
6825 const LoongArchSubtarget &Subtarget) {
6826 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6827 // conversion is unnecessary and can be replaced with the
6828 // MOVFR2GR_S_LA64 operand.
6829 SDValue Op0 = N->getOperand(0);
6830 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6831 return Op0.getOperand(0);
6832 return SDValue();
6833}
6834
6837 const LoongArchSubtarget &Subtarget) {
6838 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6839 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6840 // operand.
6841 SDValue Op0 = N->getOperand(0);
6842 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6843 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6844 "Unexpected value type!");
6845 return Op0.getOperand(0);
6846 }
6847 return SDValue();
6848}
6849
6852 const LoongArchSubtarget &Subtarget) {
6853 MVT VT = N->getSimpleValueType(0);
6854 unsigned NumBits = VT.getScalarSizeInBits();
6855
6856 // Simplify the inputs.
6857 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6858 APInt DemandedMask(APInt::getAllOnes(NumBits));
6859 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6860 return SDValue(N, 0);
6861
6862 return SDValue();
6863}
6864
6865static SDValue
6868 const LoongArchSubtarget &Subtarget) {
6869 SDValue Op0 = N->getOperand(0);
6870 SDLoc DL(N);
6871
6872 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6873 // redundant. Instead, use BuildPairF64's operands directly.
6874 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6875 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6876
6877 if (Op0->isUndef()) {
6878 SDValue Lo = DAG.getUNDEF(MVT::i32);
6879 SDValue Hi = DAG.getUNDEF(MVT::i32);
6880 return DCI.CombineTo(N, Lo, Hi);
6881 }
6882
6883 // It's cheaper to materialise two 32-bit integers than to load a double
6884 // from the constant pool and transfer it to integer registers through the
6885 // stack.
6887 APInt V = C->getValueAPF().bitcastToAPInt();
6888 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6889 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6890 return DCI.CombineTo(N, Lo, Hi);
6891 }
6892
6893 return SDValue();
6894}
6895
6896/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6899 const LoongArchSubtarget &Subtarget) {
6900 SDValue N0 = N->getOperand(0);
6901 SDValue N1 = N->getOperand(1);
6902 MVT VT = N->getSimpleValueType(0);
6903 SDLoc DL(N);
6904
6905 // VANDN(undef, x) -> 0
6906 // VANDN(x, undef) -> 0
6907 if (N0.isUndef() || N1.isUndef())
6908 return DAG.getConstant(0, DL, VT);
6909
6910 // VANDN(0, x) -> x
6912 return N1;
6913
6914 // VANDN(x, 0) -> 0
6916 return DAG.getConstant(0, DL, VT);
6917
6918 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6920 return DAG.getNOT(DL, N0, VT);
6921
6922 // Turn VANDN back to AND if input is inverted.
6923 if (SDValue Not = isNOT(N0, DAG))
6924 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
6925
6926 // Folds for better commutativity:
6927 if (N1->hasOneUse()) {
6928 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6929 if (SDValue Not = isNOT(N1, DAG))
6930 return DAG.getNOT(
6931 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
6932
6933 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6934 // -> NOT(OR(x, SplatVector(-Imm))
6935 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6936 // gain benefits.
6937 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6938 N1.getOpcode() == ISD::BUILD_VECTOR) {
6939 if (SDValue SplatValue =
6940 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
6941 if (!N1->isOnlyUserOf(SplatValue.getNode()))
6942 return SDValue();
6943
6944 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
6945 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6946 SDValue Not =
6947 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
6948 return DAG.getNOT(
6949 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
6950 VT);
6951 }
6952 }
6953 }
6954 }
6955
6956 return SDValue();
6957}
6958
6961 const LoongArchSubtarget &Subtarget) {
6962 SDLoc DL(N);
6963 EVT VT = N->getValueType(0);
6964
6965 if (VT != MVT::f32 && VT != MVT::f64)
6966 return SDValue();
6967 if (VT == MVT::f32 && !Subtarget.hasBasicF())
6968 return SDValue();
6969 if (VT == MVT::f64 && !Subtarget.hasBasicD())
6970 return SDValue();
6971
6972 // Only optimize when the source and destination types have the same width.
6973 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
6974 return SDValue();
6975
6976 SDValue Src = N->getOperand(0);
6977 // If the result of an integer load is only used by an integer-to-float
6978 // conversion, use a fp load instead. This eliminates an integer-to-float-move
6979 // (movgr2fr) instruction.
6980 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
6981 // Do not change the width of a volatile load. This condition check is
6982 // inspired by AArch64.
6983 !cast<LoadSDNode>(Src)->isVolatile()) {
6984 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
6985 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
6986 LN0->getPointerInfo(), LN0->getAlign(),
6987 LN0->getMemOperand()->getFlags());
6988
6989 // Make sure successors of the original load stay after it by updating them
6990 // to use the new Chain.
6991 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
6992 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
6993 }
6994
6995 return SDValue();
6996}
6997
6999 DAGCombinerInfo &DCI) const {
7000 SelectionDAG &DAG = DCI.DAG;
7001 switch (N->getOpcode()) {
7002 default:
7003 break;
7004 case ISD::AND:
7005 return performANDCombine(N, DAG, DCI, Subtarget);
7006 case ISD::OR:
7007 return performORCombine(N, DAG, DCI, Subtarget);
7008 case ISD::SETCC:
7009 return performSETCCCombine(N, DAG, DCI, Subtarget);
7010 case ISD::SRL:
7011 return performSRLCombine(N, DAG, DCI, Subtarget);
7012 case ISD::BITCAST:
7013 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7014 case ISD::SINT_TO_FP:
7015 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7016 case LoongArchISD::BITREV_W:
7017 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7018 case LoongArchISD::BR_CC:
7019 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7020 case LoongArchISD::SELECT_CC:
7021 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7023 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7024 case LoongArchISD::MOVGR2FR_W_LA64:
7025 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7026 case LoongArchISD::MOVFR2GR_S_LA64:
7027 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7028 case LoongArchISD::VMSKLTZ:
7029 case LoongArchISD::XVMSKLTZ:
7030 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7031 case LoongArchISD::SPLIT_PAIR_F64:
7032 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7033 case LoongArchISD::VANDN:
7034 return performVANDNCombine(N, DAG, DCI, Subtarget);
7035 }
7036 return SDValue();
7037}
7038
7041 if (!ZeroDivCheck)
7042 return MBB;
7043
7044 // Build instructions:
7045 // MBB:
7046 // div(or mod) $dst, $dividend, $divisor
7047 // bne $divisor, $zero, SinkMBB
7048 // BreakMBB:
7049 // break 7 // BRK_DIVZERO
7050 // SinkMBB:
7051 // fallthrough
7052 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7053 MachineFunction::iterator It = ++MBB->getIterator();
7054 MachineFunction *MF = MBB->getParent();
7055 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7056 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7057 MF->insert(It, BreakMBB);
7058 MF->insert(It, SinkMBB);
7059
7060 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7061 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7062 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7063
7064 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7065 DebugLoc DL = MI.getDebugLoc();
7066 MachineOperand &Divisor = MI.getOperand(2);
7067 Register DivisorReg = Divisor.getReg();
7068
7069 // MBB:
7070 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7071 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7072 .addReg(LoongArch::R0)
7073 .addMBB(SinkMBB);
7074 MBB->addSuccessor(BreakMBB);
7075 MBB->addSuccessor(SinkMBB);
7076
7077 // BreakMBB:
7078 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7079 // definition of BRK_DIVZERO.
7080 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7081 BreakMBB->addSuccessor(SinkMBB);
7082
7083 // Clear Divisor's kill flag.
7084 Divisor.setIsKill(false);
7085
7086 return SinkMBB;
7087}
7088
7089static MachineBasicBlock *
7091 const LoongArchSubtarget &Subtarget) {
7092 unsigned CondOpc;
7093 switch (MI.getOpcode()) {
7094 default:
7095 llvm_unreachable("Unexpected opcode");
7096 case LoongArch::PseudoVBZ:
7097 CondOpc = LoongArch::VSETEQZ_V;
7098 break;
7099 case LoongArch::PseudoVBZ_B:
7100 CondOpc = LoongArch::VSETANYEQZ_B;
7101 break;
7102 case LoongArch::PseudoVBZ_H:
7103 CondOpc = LoongArch::VSETANYEQZ_H;
7104 break;
7105 case LoongArch::PseudoVBZ_W:
7106 CondOpc = LoongArch::VSETANYEQZ_W;
7107 break;
7108 case LoongArch::PseudoVBZ_D:
7109 CondOpc = LoongArch::VSETANYEQZ_D;
7110 break;
7111 case LoongArch::PseudoVBNZ:
7112 CondOpc = LoongArch::VSETNEZ_V;
7113 break;
7114 case LoongArch::PseudoVBNZ_B:
7115 CondOpc = LoongArch::VSETALLNEZ_B;
7116 break;
7117 case LoongArch::PseudoVBNZ_H:
7118 CondOpc = LoongArch::VSETALLNEZ_H;
7119 break;
7120 case LoongArch::PseudoVBNZ_W:
7121 CondOpc = LoongArch::VSETALLNEZ_W;
7122 break;
7123 case LoongArch::PseudoVBNZ_D:
7124 CondOpc = LoongArch::VSETALLNEZ_D;
7125 break;
7126 case LoongArch::PseudoXVBZ:
7127 CondOpc = LoongArch::XVSETEQZ_V;
7128 break;
7129 case LoongArch::PseudoXVBZ_B:
7130 CondOpc = LoongArch::XVSETANYEQZ_B;
7131 break;
7132 case LoongArch::PseudoXVBZ_H:
7133 CondOpc = LoongArch::XVSETANYEQZ_H;
7134 break;
7135 case LoongArch::PseudoXVBZ_W:
7136 CondOpc = LoongArch::XVSETANYEQZ_W;
7137 break;
7138 case LoongArch::PseudoXVBZ_D:
7139 CondOpc = LoongArch::XVSETANYEQZ_D;
7140 break;
7141 case LoongArch::PseudoXVBNZ:
7142 CondOpc = LoongArch::XVSETNEZ_V;
7143 break;
7144 case LoongArch::PseudoXVBNZ_B:
7145 CondOpc = LoongArch::XVSETALLNEZ_B;
7146 break;
7147 case LoongArch::PseudoXVBNZ_H:
7148 CondOpc = LoongArch::XVSETALLNEZ_H;
7149 break;
7150 case LoongArch::PseudoXVBNZ_W:
7151 CondOpc = LoongArch::XVSETALLNEZ_W;
7152 break;
7153 case LoongArch::PseudoXVBNZ_D:
7154 CondOpc = LoongArch::XVSETALLNEZ_D;
7155 break;
7156 }
7157
7158 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7159 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7160 DebugLoc DL = MI.getDebugLoc();
7163
7164 MachineFunction *F = BB->getParent();
7165 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7166 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7167 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7168
7169 F->insert(It, FalseBB);
7170 F->insert(It, TrueBB);
7171 F->insert(It, SinkBB);
7172
7173 // Transfer the remainder of MBB and its successor edges to Sink.
7174 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7176
7177 // Insert the real instruction to BB.
7178 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7179 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7180
7181 // Insert branch.
7182 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7183 BB->addSuccessor(FalseBB);
7184 BB->addSuccessor(TrueBB);
7185
7186 // FalseBB.
7187 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7188 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7189 .addReg(LoongArch::R0)
7190 .addImm(0);
7191 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7192 FalseBB->addSuccessor(SinkBB);
7193
7194 // TrueBB.
7195 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7196 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7197 .addReg(LoongArch::R0)
7198 .addImm(1);
7199 TrueBB->addSuccessor(SinkBB);
7200
7201 // SinkBB: merge the results.
7202 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7203 MI.getOperand(0).getReg())
7204 .addReg(RD1)
7205 .addMBB(FalseBB)
7206 .addReg(RD2)
7207 .addMBB(TrueBB);
7208
7209 // The pseudo instruction is gone now.
7210 MI.eraseFromParent();
7211 return SinkBB;
7212}
7213
7214static MachineBasicBlock *
7216 const LoongArchSubtarget &Subtarget) {
7217 unsigned InsOp;
7218 unsigned BroadcastOp;
7219 unsigned HalfSize;
7220 switch (MI.getOpcode()) {
7221 default:
7222 llvm_unreachable("Unexpected opcode");
7223 case LoongArch::PseudoXVINSGR2VR_B:
7224 HalfSize = 16;
7225 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7226 InsOp = LoongArch::XVEXTRINS_B;
7227 break;
7228 case LoongArch::PseudoXVINSGR2VR_H:
7229 HalfSize = 8;
7230 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7231 InsOp = LoongArch::XVEXTRINS_H;
7232 break;
7233 }
7234 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7235 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7236 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7237 DebugLoc DL = MI.getDebugLoc();
7239 // XDst = vector_insert XSrc, Elt, Idx
7240 Register XDst = MI.getOperand(0).getReg();
7241 Register XSrc = MI.getOperand(1).getReg();
7242 Register Elt = MI.getOperand(2).getReg();
7243 unsigned Idx = MI.getOperand(3).getImm();
7244
7245 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
7246 Idx < HalfSize) {
7247 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
7248 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7249
7250 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7251 .addReg(XSrc, {}, LoongArch::sub_128);
7252 BuildMI(*BB, MI, DL,
7253 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7254 : LoongArch::VINSGR2VR_B),
7255 ScratchSubReg2)
7256 .addReg(ScratchSubReg1)
7257 .addReg(Elt)
7258 .addImm(Idx);
7259
7260 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7261 .addImm(0)
7262 .addReg(ScratchSubReg2)
7263 .addImm(LoongArch::sub_128);
7264 } else {
7265 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7266 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7267
7268 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7269
7270 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7271 .addReg(ScratchReg1)
7272 .addReg(XSrc)
7273 .addImm(Idx >= HalfSize ? 48 : 18);
7274
7275 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7276 .addReg(XSrc)
7277 .addReg(ScratchReg2)
7278 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7279 }
7280
7281 MI.eraseFromParent();
7282 return BB;
7283}
7284
7287 const LoongArchSubtarget &Subtarget) {
7288 assert(Subtarget.hasExtLSX());
7289 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7290 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7291 DebugLoc DL = MI.getDebugLoc();
7293 Register Dst = MI.getOperand(0).getReg();
7294 Register Src = MI.getOperand(1).getReg();
7295 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7296 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7297 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7298
7299 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7300 BuildMI(*BB, MI, DL,
7301 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7302 : LoongArch::VINSGR2VR_W),
7303 ScratchReg2)
7304 .addReg(ScratchReg1)
7305 .addReg(Src)
7306 .addImm(0);
7307 BuildMI(
7308 *BB, MI, DL,
7309 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7310 ScratchReg3)
7311 .addReg(ScratchReg2);
7312 BuildMI(*BB, MI, DL,
7313 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7314 : LoongArch::VPICKVE2GR_W),
7315 Dst)
7316 .addReg(ScratchReg3)
7317 .addImm(0);
7318
7319 MI.eraseFromParent();
7320 return BB;
7321}
7322
7323static MachineBasicBlock *
7325 const LoongArchSubtarget &Subtarget) {
7326 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7327 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7328 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7330 Register Dst = MI.getOperand(0).getReg();
7331 Register Src = MI.getOperand(1).getReg();
7332 DebugLoc DL = MI.getDebugLoc();
7333 unsigned EleBits = 8;
7334 unsigned NotOpc = 0;
7335 unsigned MskOpc;
7336
7337 switch (MI.getOpcode()) {
7338 default:
7339 llvm_unreachable("Unexpected opcode");
7340 case LoongArch::PseudoVMSKLTZ_B:
7341 MskOpc = LoongArch::VMSKLTZ_B;
7342 break;
7343 case LoongArch::PseudoVMSKLTZ_H:
7344 MskOpc = LoongArch::VMSKLTZ_H;
7345 EleBits = 16;
7346 break;
7347 case LoongArch::PseudoVMSKLTZ_W:
7348 MskOpc = LoongArch::VMSKLTZ_W;
7349 EleBits = 32;
7350 break;
7351 case LoongArch::PseudoVMSKLTZ_D:
7352 MskOpc = LoongArch::VMSKLTZ_D;
7353 EleBits = 64;
7354 break;
7355 case LoongArch::PseudoVMSKGEZ_B:
7356 MskOpc = LoongArch::VMSKGEZ_B;
7357 break;
7358 case LoongArch::PseudoVMSKEQZ_B:
7359 MskOpc = LoongArch::VMSKNZ_B;
7360 NotOpc = LoongArch::VNOR_V;
7361 break;
7362 case LoongArch::PseudoVMSKNEZ_B:
7363 MskOpc = LoongArch::VMSKNZ_B;
7364 break;
7365 case LoongArch::PseudoXVMSKLTZ_B:
7366 MskOpc = LoongArch::XVMSKLTZ_B;
7367 RC = &LoongArch::LASX256RegClass;
7368 break;
7369 case LoongArch::PseudoXVMSKLTZ_H:
7370 MskOpc = LoongArch::XVMSKLTZ_H;
7371 RC = &LoongArch::LASX256RegClass;
7372 EleBits = 16;
7373 break;
7374 case LoongArch::PseudoXVMSKLTZ_W:
7375 MskOpc = LoongArch::XVMSKLTZ_W;
7376 RC = &LoongArch::LASX256RegClass;
7377 EleBits = 32;
7378 break;
7379 case LoongArch::PseudoXVMSKLTZ_D:
7380 MskOpc = LoongArch::XVMSKLTZ_D;
7381 RC = &LoongArch::LASX256RegClass;
7382 EleBits = 64;
7383 break;
7384 case LoongArch::PseudoXVMSKGEZ_B:
7385 MskOpc = LoongArch::XVMSKGEZ_B;
7386 RC = &LoongArch::LASX256RegClass;
7387 break;
7388 case LoongArch::PseudoXVMSKEQZ_B:
7389 MskOpc = LoongArch::XVMSKNZ_B;
7390 NotOpc = LoongArch::XVNOR_V;
7391 RC = &LoongArch::LASX256RegClass;
7392 break;
7393 case LoongArch::PseudoXVMSKNEZ_B:
7394 MskOpc = LoongArch::XVMSKNZ_B;
7395 RC = &LoongArch::LASX256RegClass;
7396 break;
7397 }
7398
7399 Register Msk = MRI.createVirtualRegister(RC);
7400 if (NotOpc) {
7401 Register Tmp = MRI.createVirtualRegister(RC);
7402 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7403 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7404 .addReg(Tmp, RegState::Kill)
7405 .addReg(Tmp, RegState::Kill);
7406 } else {
7407 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7408 }
7409
7410 if (TRI->getRegSizeInBits(*RC) > 128) {
7411 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7412 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7413 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7414 .addReg(Msk)
7415 .addImm(0);
7416 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7417 .addReg(Msk, RegState::Kill)
7418 .addImm(4);
7419 BuildMI(*BB, MI, DL,
7420 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7421 : LoongArch::BSTRINS_W),
7422 Dst)
7425 .addImm(256 / EleBits - 1)
7426 .addImm(128 / EleBits);
7427 } else {
7428 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7429 .addReg(Msk, RegState::Kill)
7430 .addImm(0);
7431 }
7432
7433 MI.eraseFromParent();
7434 return BB;
7435}
7436
7437static MachineBasicBlock *
7439 const LoongArchSubtarget &Subtarget) {
7440 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7441 "Unexpected instruction");
7442
7443 MachineFunction &MF = *BB->getParent();
7444 DebugLoc DL = MI.getDebugLoc();
7446 Register LoReg = MI.getOperand(0).getReg();
7447 Register HiReg = MI.getOperand(1).getReg();
7448 Register SrcReg = MI.getOperand(2).getReg();
7449
7450 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7451 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7452 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7453 MI.eraseFromParent(); // The pseudo instruction is gone now.
7454 return BB;
7455}
7456
7457static MachineBasicBlock *
7459 const LoongArchSubtarget &Subtarget) {
7460 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7461 "Unexpected instruction");
7462
7463 MachineFunction &MF = *BB->getParent();
7464 DebugLoc DL = MI.getDebugLoc();
7467 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7468 Register DstReg = MI.getOperand(0).getReg();
7469 Register LoReg = MI.getOperand(1).getReg();
7470 Register HiReg = MI.getOperand(2).getReg();
7471
7472 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7473 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7474 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7475 .addReg(TmpReg, RegState::Kill)
7476 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7477 MI.eraseFromParent(); // The pseudo instruction is gone now.
7478 return BB;
7479}
7480
7482 switch (MI.getOpcode()) {
7483 default:
7484 return false;
7485 case LoongArch::Select_GPR_Using_CC_GPR:
7486 return true;
7487 }
7488}
7489
7490static MachineBasicBlock *
7492 const LoongArchSubtarget &Subtarget) {
7493 // To "insert" Select_* instructions, we actually have to insert the triangle
7494 // control-flow pattern. The incoming instructions know the destination vreg
7495 // to set, the condition code register to branch on, the true/false values to
7496 // select between, and the condcode to use to select the appropriate branch.
7497 //
7498 // We produce the following control flow:
7499 // HeadMBB
7500 // | \
7501 // | IfFalseMBB
7502 // | /
7503 // TailMBB
7504 //
7505 // When we find a sequence of selects we attempt to optimize their emission
7506 // by sharing the control flow. Currently we only handle cases where we have
7507 // multiple selects with the exact same condition (same LHS, RHS and CC).
7508 // The selects may be interleaved with other instructions if the other
7509 // instructions meet some requirements we deem safe:
7510 // - They are not pseudo instructions.
7511 // - They are debug instructions. Otherwise,
7512 // - They do not have side-effects, do not access memory and their inputs do
7513 // not depend on the results of the select pseudo-instructions.
7514 // The TrueV/FalseV operands of the selects cannot depend on the result of
7515 // previous selects in the sequence.
7516 // These conditions could be further relaxed. See the X86 target for a
7517 // related approach and more information.
7518
7519 Register LHS = MI.getOperand(1).getReg();
7520 Register RHS;
7521 if (MI.getOperand(2).isReg())
7522 RHS = MI.getOperand(2).getReg();
7523 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7524
7525 SmallVector<MachineInstr *, 4> SelectDebugValues;
7526 SmallSet<Register, 4> SelectDests;
7527 SelectDests.insert(MI.getOperand(0).getReg());
7528
7529 MachineInstr *LastSelectPseudo = &MI;
7530 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7531 SequenceMBBI != E; ++SequenceMBBI) {
7532 if (SequenceMBBI->isDebugInstr())
7533 continue;
7534 if (isSelectPseudo(*SequenceMBBI)) {
7535 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7536 !SequenceMBBI->getOperand(2).isReg() ||
7537 SequenceMBBI->getOperand(2).getReg() != RHS ||
7538 SequenceMBBI->getOperand(3).getImm() != CC ||
7539 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7540 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7541 break;
7542 LastSelectPseudo = &*SequenceMBBI;
7543 SequenceMBBI->collectDebugValues(SelectDebugValues);
7544 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7545 continue;
7546 }
7547 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7548 SequenceMBBI->mayLoadOrStore() ||
7549 SequenceMBBI->usesCustomInsertionHook())
7550 break;
7551 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7552 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7553 }))
7554 break;
7555 }
7556
7557 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7558 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7559 DebugLoc DL = MI.getDebugLoc();
7561
7562 MachineBasicBlock *HeadMBB = BB;
7563 MachineFunction *F = BB->getParent();
7564 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7565 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7566
7567 F->insert(I, IfFalseMBB);
7568 F->insert(I, TailMBB);
7569
7570 // Set the call frame size on entry to the new basic blocks.
7571 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7572 IfFalseMBB->setCallFrameSize(CallFrameSize);
7573 TailMBB->setCallFrameSize(CallFrameSize);
7574
7575 // Transfer debug instructions associated with the selects to TailMBB.
7576 for (MachineInstr *DebugInstr : SelectDebugValues) {
7577 TailMBB->push_back(DebugInstr->removeFromParent());
7578 }
7579
7580 // Move all instructions after the sequence to TailMBB.
7581 TailMBB->splice(TailMBB->end(), HeadMBB,
7582 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7583 // Update machine-CFG edges by transferring all successors of the current
7584 // block to the new block which will contain the Phi nodes for the selects.
7585 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7586 // Set the successors for HeadMBB.
7587 HeadMBB->addSuccessor(IfFalseMBB);
7588 HeadMBB->addSuccessor(TailMBB);
7589
7590 // Insert appropriate branch.
7591 if (MI.getOperand(2).isImm())
7592 BuildMI(HeadMBB, DL, TII.get(CC))
7593 .addReg(LHS)
7594 .addImm(MI.getOperand(2).getImm())
7595 .addMBB(TailMBB);
7596 else
7597 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7598
7599 // IfFalseMBB just falls through to TailMBB.
7600 IfFalseMBB->addSuccessor(TailMBB);
7601
7602 // Create PHIs for all of the select pseudo-instructions.
7603 auto SelectMBBI = MI.getIterator();
7604 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7605 auto InsertionPoint = TailMBB->begin();
7606 while (SelectMBBI != SelectEnd) {
7607 auto Next = std::next(SelectMBBI);
7608 if (isSelectPseudo(*SelectMBBI)) {
7609 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7610 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7611 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7612 .addReg(SelectMBBI->getOperand(4).getReg())
7613 .addMBB(HeadMBB)
7614 .addReg(SelectMBBI->getOperand(5).getReg())
7615 .addMBB(IfFalseMBB);
7616 SelectMBBI->eraseFromParent();
7617 }
7618 SelectMBBI = Next;
7619 }
7620
7621 F->getProperties().resetNoPHIs();
7622 return TailMBB;
7623}
7624
7625MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7626 MachineInstr &MI, MachineBasicBlock *BB) const {
7627 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7628 DebugLoc DL = MI.getDebugLoc();
7629
7630 switch (MI.getOpcode()) {
7631 default:
7632 llvm_unreachable("Unexpected instr type to insert");
7633 case LoongArch::DIV_W:
7634 case LoongArch::DIV_WU:
7635 case LoongArch::MOD_W:
7636 case LoongArch::MOD_WU:
7637 case LoongArch::DIV_D:
7638 case LoongArch::DIV_DU:
7639 case LoongArch::MOD_D:
7640 case LoongArch::MOD_DU:
7641 return insertDivByZeroTrap(MI, BB);
7642 break;
7643 case LoongArch::WRFCSR: {
7644 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7645 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7646 .addReg(MI.getOperand(1).getReg());
7647 MI.eraseFromParent();
7648 return BB;
7649 }
7650 case LoongArch::RDFCSR: {
7651 MachineInstr *ReadFCSR =
7652 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7653 MI.getOperand(0).getReg())
7654 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7655 ReadFCSR->getOperand(1).setIsUndef();
7656 MI.eraseFromParent();
7657 return BB;
7658 }
7659 case LoongArch::Select_GPR_Using_CC_GPR:
7660 return emitSelectPseudo(MI, BB, Subtarget);
7661 case LoongArch::BuildPairF64Pseudo:
7662 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7663 case LoongArch::SplitPairF64Pseudo:
7664 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7665 case LoongArch::PseudoVBZ:
7666 case LoongArch::PseudoVBZ_B:
7667 case LoongArch::PseudoVBZ_H:
7668 case LoongArch::PseudoVBZ_W:
7669 case LoongArch::PseudoVBZ_D:
7670 case LoongArch::PseudoVBNZ:
7671 case LoongArch::PseudoVBNZ_B:
7672 case LoongArch::PseudoVBNZ_H:
7673 case LoongArch::PseudoVBNZ_W:
7674 case LoongArch::PseudoVBNZ_D:
7675 case LoongArch::PseudoXVBZ:
7676 case LoongArch::PseudoXVBZ_B:
7677 case LoongArch::PseudoXVBZ_H:
7678 case LoongArch::PseudoXVBZ_W:
7679 case LoongArch::PseudoXVBZ_D:
7680 case LoongArch::PseudoXVBNZ:
7681 case LoongArch::PseudoXVBNZ_B:
7682 case LoongArch::PseudoXVBNZ_H:
7683 case LoongArch::PseudoXVBNZ_W:
7684 case LoongArch::PseudoXVBNZ_D:
7685 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7686 case LoongArch::PseudoXVINSGR2VR_B:
7687 case LoongArch::PseudoXVINSGR2VR_H:
7688 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7689 case LoongArch::PseudoCTPOP:
7690 return emitPseudoCTPOP(MI, BB, Subtarget);
7691 case LoongArch::PseudoVMSKLTZ_B:
7692 case LoongArch::PseudoVMSKLTZ_H:
7693 case LoongArch::PseudoVMSKLTZ_W:
7694 case LoongArch::PseudoVMSKLTZ_D:
7695 case LoongArch::PseudoVMSKGEZ_B:
7696 case LoongArch::PseudoVMSKEQZ_B:
7697 case LoongArch::PseudoVMSKNEZ_B:
7698 case LoongArch::PseudoXVMSKLTZ_B:
7699 case LoongArch::PseudoXVMSKLTZ_H:
7700 case LoongArch::PseudoXVMSKLTZ_W:
7701 case LoongArch::PseudoXVMSKLTZ_D:
7702 case LoongArch::PseudoXVMSKGEZ_B:
7703 case LoongArch::PseudoXVMSKEQZ_B:
7704 case LoongArch::PseudoXVMSKNEZ_B:
7705 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7706 case TargetOpcode::STATEPOINT:
7707 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7708 // while bl call instruction (where statepoint will be lowered at the
7709 // end) has implicit def. This def is early-clobber as it will be set at
7710 // the moment of the call and earlier than any use is read.
7711 // Add this implicit dead def here as a workaround.
7712 MI.addOperand(*MI.getMF(),
7714 LoongArch::R1, /*isDef*/ true,
7715 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7716 /*isUndef*/ false, /*isEarlyClobber*/ true));
7717 if (!Subtarget.is64Bit())
7718 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7719 return emitPatchPoint(MI, BB);
7720 }
7721}
7722
7724 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7725 unsigned *Fast) const {
7726 if (!Subtarget.hasUAL())
7727 return false;
7728
7729 // TODO: set reasonable speed number.
7730 if (Fast)
7731 *Fast = 1;
7732 return true;
7733}
7734
7735//===----------------------------------------------------------------------===//
7736// Calling Convention Implementation
7737//===----------------------------------------------------------------------===//
7738
7739// Eight general-purpose registers a0-a7 used for passing integer arguments,
7740// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7741// fixed-point arguments, and floating-point arguments when no FPR is available
7742// or with soft float ABI.
7743const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7744 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7745 LoongArch::R10, LoongArch::R11};
7746
7747// PreserveNone calling convention:
7748// Arguments may be passed in any general-purpose registers except:
7749// - R1 : return address register
7750// - R22 : frame pointer
7751// - R31 : base pointer
7752//
7753// All general-purpose registers are treated as caller-saved,
7754// except R1 (RA) and R22 (FP).
7755//
7756// Non-volatile registers are allocated first so that a function
7757// can call normal functions without having to spill and reload
7758// argument registers.
7760 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
7761 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
7762 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
7763 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
7764 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
7765 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
7766 LoongArch::R20};
7767
7768// Eight floating-point registers fa0-fa7 used for passing floating-point
7769// arguments, and fa0-fa1 are also used to return values.
7770const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7771 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7772 LoongArch::F6, LoongArch::F7};
7773// FPR32 and FPR64 alias each other.
7775 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7776 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7777
7778const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7779 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7780 LoongArch::VR6, LoongArch::VR7};
7781
7782const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7783 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7784 LoongArch::XR6, LoongArch::XR7};
7785
7787 switch (State.getCallingConv()) {
7789 if (!State.isVarArg())
7790 return State.AllocateReg(PreserveNoneArgGPRs);
7791 [[fallthrough]];
7792 default:
7793 return State.AllocateReg(ArgGPRs);
7794 }
7795}
7796
7797// Pass a 2*GRLen argument that has been split into two GRLen values through
7798// registers or the stack as necessary.
7799static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7800 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7801 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7802 ISD::ArgFlagsTy ArgFlags2) {
7803 unsigned GRLenInBytes = GRLen / 8;
7804 if (Register Reg = allocateArgGPR(State)) {
7805 // At least one half can be passed via register.
7806 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7807 VA1.getLocVT(), CCValAssign::Full));
7808 } else {
7809 // Both halves must be passed on the stack, with proper alignment.
7810 Align StackAlign =
7811 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7812 State.addLoc(
7814 State.AllocateStack(GRLenInBytes, StackAlign),
7815 VA1.getLocVT(), CCValAssign::Full));
7816 State.addLoc(CCValAssign::getMem(
7817 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7818 LocVT2, CCValAssign::Full));
7819 return false;
7820 }
7821 if (Register Reg = allocateArgGPR(State)) {
7822 // The second half can also be passed via register.
7823 State.addLoc(
7824 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7825 } else {
7826 // The second half is passed via the stack, without additional alignment.
7827 State.addLoc(CCValAssign::getMem(
7828 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7829 LocVT2, CCValAssign::Full));
7830 }
7831 return false;
7832}
7833
7834// Implements the LoongArch calling convention. Returns true upon failure.
7836 unsigned ValNo, MVT ValVT,
7837 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7838 CCState &State, bool IsRet, Type *OrigTy) {
7839 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7840 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7841 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7842 MVT LocVT = ValVT;
7843
7844 // Any return value split into more than two values can't be returned
7845 // directly.
7846 if (IsRet && ValNo > 1)
7847 return true;
7848
7849 // If passing a variadic argument, or if no FPR is available.
7850 bool UseGPRForFloat = true;
7851
7852 switch (ABI) {
7853 default:
7854 llvm_unreachable("Unexpected ABI");
7855 break;
7860 UseGPRForFloat = ArgFlags.isVarArg();
7861 break;
7864 break;
7865 }
7866
7867 // If this is a variadic argument, the LoongArch calling convention requires
7868 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7869 // byte alignment. An aligned register should be used regardless of whether
7870 // the original argument was split during legalisation or not. The argument
7871 // will not be passed by registers if the original type is larger than
7872 // 2*GRLen, so the register alignment rule does not apply.
7873 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7874 if (ArgFlags.isVarArg() &&
7875 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7876 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7877 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7878 // Skip 'odd' register if necessary.
7879 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7880 State.AllocateReg(ArgGPRs);
7881 }
7882
7883 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7884 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7885 State.getPendingArgFlags();
7886
7887 assert(PendingLocs.size() == PendingArgFlags.size() &&
7888 "PendingLocs and PendingArgFlags out of sync");
7889
7890 // FPR32 and FPR64 alias each other.
7891 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7892 UseGPRForFloat = true;
7893
7894 if (UseGPRForFloat && ValVT == MVT::f32) {
7895 LocVT = GRLenVT;
7896 LocInfo = CCValAssign::BCvt;
7897 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7898 LocVT = MVT::i64;
7899 LocInfo = CCValAssign::BCvt;
7900 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7901 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7902 // registers are exhausted.
7903 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7904 // Depending on available argument GPRS, f64 may be passed in a pair of
7905 // GPRs, split between a GPR and the stack, or passed completely on the
7906 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7907 // cases.
7908 MCRegister Reg = allocateArgGPR(State);
7909 if (!Reg) {
7910 int64_t StackOffset = State.AllocateStack(8, Align(8));
7911 State.addLoc(
7912 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7913 return false;
7914 }
7915 LocVT = MVT::i32;
7916 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7917 MCRegister HiReg = allocateArgGPR(State);
7918 if (HiReg) {
7919 State.addLoc(
7920 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7921 } else {
7922 int64_t StackOffset = State.AllocateStack(4, Align(4));
7923 State.addLoc(
7924 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7925 }
7926 return false;
7927 }
7928
7929 // Split arguments might be passed indirectly, so keep track of the pending
7930 // values.
7931 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7932 LocVT = GRLenVT;
7933 LocInfo = CCValAssign::Indirect;
7934 PendingLocs.push_back(
7935 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7936 PendingArgFlags.push_back(ArgFlags);
7937 if (!ArgFlags.isSplitEnd()) {
7938 return false;
7939 }
7940 }
7941
7942 // If the split argument only had two elements, it should be passed directly
7943 // in registers or on the stack.
7944 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7945 PendingLocs.size() <= 2) {
7946 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7947 // Apply the normal calling convention rules to the first half of the
7948 // split argument.
7949 CCValAssign VA = PendingLocs[0];
7950 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7951 PendingLocs.clear();
7952 PendingArgFlags.clear();
7953 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7954 ArgFlags);
7955 }
7956
7957 // Allocate to a register if possible, or else a stack slot.
7958 Register Reg;
7959 unsigned StoreSizeBytes = GRLen / 8;
7960 Align StackAlign = Align(GRLen / 8);
7961
7962 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7963 Reg = State.AllocateReg(ArgFPR32s);
7964 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7965 Reg = State.AllocateReg(ArgFPR64s);
7966 } else if (ValVT.is128BitVector()) {
7967 Reg = State.AllocateReg(ArgVRs);
7968 UseGPRForFloat = false;
7969 StoreSizeBytes = 16;
7970 StackAlign = Align(16);
7971 } else if (ValVT.is256BitVector()) {
7972 Reg = State.AllocateReg(ArgXRs);
7973 UseGPRForFloat = false;
7974 StoreSizeBytes = 32;
7975 StackAlign = Align(32);
7976 } else {
7977 Reg = allocateArgGPR(State);
7978 }
7979
7980 unsigned StackOffset =
7981 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7982
7983 // If we reach this point and PendingLocs is non-empty, we must be at the
7984 // end of a split argument that must be passed indirectly.
7985 if (!PendingLocs.empty()) {
7986 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7987 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7988 for (auto &It : PendingLocs) {
7989 if (Reg)
7990 It.convertToReg(Reg);
7991 else
7992 It.convertToMem(StackOffset);
7993 State.addLoc(It);
7994 }
7995 PendingLocs.clear();
7996 PendingArgFlags.clear();
7997 return false;
7998 }
7999 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
8000 "Expected an GRLenVT at this stage");
8001
8002 if (Reg) {
8003 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8004 return false;
8005 }
8006
8007 // When a floating-point value is passed on the stack, no bit-cast is needed.
8008 if (ValVT.isFloatingPoint()) {
8009 LocVT = ValVT;
8010 LocInfo = CCValAssign::Full;
8011 }
8012
8013 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8014 return false;
8015}
8016
8017void LoongArchTargetLowering::analyzeInputArgs(
8018 MachineFunction &MF, CCState &CCInfo,
8019 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8020 LoongArchCCAssignFn Fn) const {
8021 FunctionType *FType = MF.getFunction().getFunctionType();
8022 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8023 MVT ArgVT = Ins[i].VT;
8024 Type *ArgTy = nullptr;
8025 if (IsRet)
8026 ArgTy = FType->getReturnType();
8027 else if (Ins[i].isOrigArg())
8028 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8030 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8031 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8032 CCInfo, IsRet, ArgTy)) {
8033 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8034 << '\n');
8035 llvm_unreachable("");
8036 }
8037 }
8038}
8039
8040void LoongArchTargetLowering::analyzeOutputArgs(
8041 MachineFunction &MF, CCState &CCInfo,
8042 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8043 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8044 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8045 MVT ArgVT = Outs[i].VT;
8046 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8048 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8049 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8050 CCInfo, IsRet, OrigTy)) {
8051 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8052 << "\n");
8053 llvm_unreachable("");
8054 }
8055 }
8056}
8057
8058// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8059// values.
8061 const CCValAssign &VA, const SDLoc &DL) {
8062 switch (VA.getLocInfo()) {
8063 default:
8064 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8065 case CCValAssign::Full:
8067 break;
8068 case CCValAssign::BCvt:
8069 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8070 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
8071 else
8072 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8073 break;
8074 }
8075 return Val;
8076}
8077
8079 const CCValAssign &VA, const SDLoc &DL,
8080 const ISD::InputArg &In,
8081 const LoongArchTargetLowering &TLI) {
8084 EVT LocVT = VA.getLocVT();
8085 SDValue Val;
8086 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8087 Register VReg = RegInfo.createVirtualRegister(RC);
8088 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8089 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8090
8091 // If input is sign extended from 32 bits, note it for the OptW pass.
8092 if (In.isOrigArg()) {
8093 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8094 if (OrigArg->getType()->isIntegerTy()) {
8095 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8096 // An input zero extended from i31 can also be considered sign extended.
8097 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8098 (BitWidth < 32 && In.Flags.isZExt())) {
8101 LAFI->addSExt32Register(VReg);
8102 }
8103 }
8104 }
8105
8106 return convertLocVTToValVT(DAG, Val, VA, DL);
8107}
8108
8109// The caller is responsible for loading the full value if the argument is
8110// passed with CCValAssign::Indirect.
8112 const CCValAssign &VA, const SDLoc &DL) {
8114 MachineFrameInfo &MFI = MF.getFrameInfo();
8115 EVT ValVT = VA.getValVT();
8116 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8117 /*IsImmutable=*/true);
8118 SDValue FIN = DAG.getFrameIndex(
8120
8121 ISD::LoadExtType ExtType;
8122 switch (VA.getLocInfo()) {
8123 default:
8124 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8125 case CCValAssign::Full:
8127 case CCValAssign::BCvt:
8128 ExtType = ISD::NON_EXTLOAD;
8129 break;
8130 }
8131 return DAG.getExtLoad(
8132 ExtType, DL, VA.getLocVT(), Chain, FIN,
8134}
8135
8137 const CCValAssign &VA,
8138 const CCValAssign &HiVA,
8139 const SDLoc &DL) {
8140 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8141 "Unexpected VA");
8143 MachineFrameInfo &MFI = MF.getFrameInfo();
8145
8146 assert(VA.isRegLoc() && "Expected register VA assignment");
8147
8148 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8149 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8150 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8151 SDValue Hi;
8152 if (HiVA.isMemLoc()) {
8153 // Second half of f64 is passed on the stack.
8154 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8155 /*IsImmutable=*/true);
8156 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8157 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8159 } else {
8160 // Second half of f64 is passed in another GPR.
8161 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8162 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8163 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8164 }
8165 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8166}
8167
8169 const CCValAssign &VA, const SDLoc &DL) {
8170 EVT LocVT = VA.getLocVT();
8171
8172 switch (VA.getLocInfo()) {
8173 default:
8174 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8175 case CCValAssign::Full:
8176 break;
8177 case CCValAssign::BCvt:
8178 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8179 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8180 else
8181 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8182 break;
8183 }
8184 return Val;
8185}
8186
8187static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8188 CCValAssign::LocInfo LocInfo,
8189 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8190 CCState &State) {
8191 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8192 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8193 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8194 static const MCPhysReg GPRList[] = {
8195 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8196 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8197 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8198 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8199 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8200 return false;
8201 }
8202 }
8203
8204 if (LocVT == MVT::f32) {
8205 // Pass in STG registers: F1, F2, F3, F4
8206 // fs0,fs1,fs2,fs3
8207 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8208 LoongArch::F26, LoongArch::F27};
8209 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8210 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8211 return false;
8212 }
8213 }
8214
8215 if (LocVT == MVT::f64) {
8216 // Pass in STG registers: D1, D2, D3, D4
8217 // fs4,fs5,fs6,fs7
8218 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8219 LoongArch::F30_64, LoongArch::F31_64};
8220 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8221 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8222 return false;
8223 }
8224 }
8225
8226 report_fatal_error("No registers left in GHC calling convention");
8227 return true;
8228}
8229
8230// Transform physical registers into virtual registers.
8232 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8233 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8234 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8235
8237 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8238
8239 switch (CallConv) {
8240 default:
8241 llvm_unreachable("Unsupported calling convention");
8242 case CallingConv::C:
8243 case CallingConv::Fast:
8246 break;
8247 case CallingConv::GHC:
8248 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8249 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8251 "GHC calling convention requires the F and D extensions");
8252 }
8253
8254 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8255 MVT GRLenVT = Subtarget.getGRLenVT();
8256 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8257 // Used with varargs to acumulate store chains.
8258 std::vector<SDValue> OutChains;
8259
8260 // Assign locations to all of the incoming arguments.
8262 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8263
8264 if (CallConv == CallingConv::GHC)
8266 else
8267 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8268
8269 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8270 CCValAssign &VA = ArgLocs[i];
8271 SDValue ArgValue;
8272 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8273 // case.
8274 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8275 assert(VA.needsCustom());
8276 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8277 } else if (VA.isRegLoc())
8278 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8279 else
8280 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8281 if (VA.getLocInfo() == CCValAssign::Indirect) {
8282 // If the original argument was split and passed by reference, we need to
8283 // load all parts of it here (using the same address).
8284 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8286 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8287 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8288 assert(ArgPartOffset == 0);
8289 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8290 CCValAssign &PartVA = ArgLocs[i + 1];
8291 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8292 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8293 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8294 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8296 ++i;
8297 ++InsIdx;
8298 }
8299 continue;
8300 }
8301 InVals.push_back(ArgValue);
8302 if (Ins[InsIdx].Flags.isByVal())
8303 LoongArchFI->addIncomingByValArgs(ArgValue);
8304 }
8305
8306 if (IsVarArg) {
8308 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8309 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8310 MachineFrameInfo &MFI = MF.getFrameInfo();
8311 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8312
8313 // Offset of the first variable argument from stack pointer, and size of
8314 // the vararg save area. For now, the varargs save area is either zero or
8315 // large enough to hold a0-a7.
8316 int VaArgOffset, VarArgsSaveSize;
8317
8318 // If all registers are allocated, then all varargs must be passed on the
8319 // stack and we don't need to save any argregs.
8320 if (ArgRegs.size() == Idx) {
8321 VaArgOffset = CCInfo.getStackSize();
8322 VarArgsSaveSize = 0;
8323 } else {
8324 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8325 VaArgOffset = -VarArgsSaveSize;
8326 }
8327
8328 // Record the frame index of the first variable argument
8329 // which is a value necessary to VASTART.
8330 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8331 LoongArchFI->setVarArgsFrameIndex(FI);
8332
8333 // If saving an odd number of registers then create an extra stack slot to
8334 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8335 // offsets to even-numbered registered remain 2*GRLen-aligned.
8336 if (Idx % 2) {
8337 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8338 true);
8339 VarArgsSaveSize += GRLenInBytes;
8340 }
8341
8342 // Copy the integer registers that may have been used for passing varargs
8343 // to the vararg save area.
8344 for (unsigned I = Idx; I < ArgRegs.size();
8345 ++I, VaArgOffset += GRLenInBytes) {
8346 const Register Reg = RegInfo.createVirtualRegister(RC);
8347 RegInfo.addLiveIn(ArgRegs[I], Reg);
8348 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8349 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8350 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8351 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8353 cast<StoreSDNode>(Store.getNode())
8354 ->getMemOperand()
8355 ->setValue((Value *)nullptr);
8356 OutChains.push_back(Store);
8357 }
8358 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8359 }
8360
8361 LoongArchFI->setArgumentStackSize(CCInfo.getStackSize());
8362
8363 // All stores are grouped in one node to allow the matching between
8364 // the size of Ins and InVals. This only happens for vararg functions.
8365 if (!OutChains.empty()) {
8366 OutChains.push_back(Chain);
8367 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8368 }
8369
8370 return Chain;
8371}
8372
8374 return CI->isTailCall();
8375}
8376
8377// Check if the return value is used as only a return value, as otherwise
8378// we can't perform a tail-call.
8380 SDValue &Chain) const {
8381 if (N->getNumValues() != 1)
8382 return false;
8383 if (!N->hasNUsesOfValue(1, 0))
8384 return false;
8385
8386 SDNode *Copy = *N->user_begin();
8387 if (Copy->getOpcode() != ISD::CopyToReg)
8388 return false;
8389
8390 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8391 // isn't safe to perform a tail call.
8392 if (Copy->getGluedNode())
8393 return false;
8394
8395 // The copy must be used by a LoongArchISD::RET, and nothing else.
8396 bool HasRet = false;
8397 for (SDNode *Node : Copy->users()) {
8398 if (Node->getOpcode() != LoongArchISD::RET)
8399 return false;
8400 HasRet = true;
8401 }
8402
8403 if (!HasRet)
8404 return false;
8405
8406 Chain = Copy->getOperand(0);
8407 return true;
8408}
8409
8410// Check whether the call is eligible for tail call optimization.
8411bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8412 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8413 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8414
8415 auto CalleeCC = CLI.CallConv;
8416 auto &Outs = CLI.Outs;
8417 auto &Caller = MF.getFunction();
8418 auto CallerCC = Caller.getCallingConv();
8419 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8420
8421 // If the stack arguments for this call do not fit into our own save area then
8422 // the call cannot be made tail.
8423 if (CCInfo.getStackSize() > LoongArchFI->getArgumentStackSize())
8424 return false;
8425
8426 // Do not tail call opt if any parameters need to be passed indirectly.
8427 for (auto &VA : ArgLocs)
8428 if (VA.getLocInfo() == CCValAssign::Indirect)
8429 return false;
8430
8431 // Do not tail call opt if either caller or callee uses struct return
8432 // semantics.
8433 auto IsCallerStructRet = Caller.hasStructRetAttr();
8434 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8435 if (IsCallerStructRet != IsCalleeStructRet)
8436 return false;
8437
8438 // Do not tail call opt if caller's and callee's byval arguments do not match.
8439 for (unsigned i = 0, j = 0; i < Outs.size(); i++) {
8440 if (!Outs[i].Flags.isByVal())
8441 continue;
8442 if (j++ >= LoongArchFI->getIncomingByValArgsSize())
8443 return false;
8444 if (LoongArchFI->getIncomingByValArgs(i).getValueType() != Outs[i].ArgVT)
8445 return false;
8446 }
8447
8448 // The callee has to preserve all registers the caller needs to preserve.
8449 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8450 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8451 if (CalleeCC != CallerCC) {
8452 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8453 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8454 return false;
8455 }
8456
8457 // If the callee takes no arguments then go on to check the results of the
8458 // call.
8459 const MachineRegisterInfo &MRI = MF.getRegInfo();
8460 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8461 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
8462 return false;
8463
8464 return true;
8465}
8466
8468 return DAG.getDataLayout().getPrefTypeAlign(
8469 VT.getTypeForEVT(*DAG.getContext()));
8470}
8471
8472// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8473// and output parameter nodes.
8474SDValue
8476 SmallVectorImpl<SDValue> &InVals) const {
8477 SelectionDAG &DAG = CLI.DAG;
8478 SDLoc &DL = CLI.DL;
8480 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8482 SDValue Chain = CLI.Chain;
8483 SDValue Callee = CLI.Callee;
8484 CallingConv::ID CallConv = CLI.CallConv;
8485 bool IsVarArg = CLI.IsVarArg;
8486 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8487 MVT GRLenVT = Subtarget.getGRLenVT();
8488 bool &IsTailCall = CLI.IsTailCall;
8489
8491 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8492
8493 // Analyze the operands of the call, assigning locations to each operand.
8495 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8496
8497 if (CallConv == CallingConv::GHC)
8498 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8499 else
8500 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8501
8502 // Check if it's really possible to do a tail call.
8503 if (IsTailCall)
8504 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8505
8506 if (IsTailCall)
8507 ++NumTailCalls;
8508 else if (CLI.CB && CLI.CB->isMustTailCall())
8509 report_fatal_error("failed to perform tail call elimination on a call "
8510 "site marked musttail");
8511
8512 // Get a count of how many bytes are to be pushed on the stack.
8513 unsigned NumBytes = ArgCCInfo.getStackSize();
8514
8515 // Create local copies for byval args.
8516 SmallVector<SDValue> ByValArgs;
8517 for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
8518 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8519 if (!Flags.isByVal())
8520 continue;
8521
8522 SDValue Arg = OutVals[i];
8523 unsigned Size = Flags.getByValSize();
8524 Align Alignment = Flags.getNonZeroByValAlign();
8525 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8526 SDValue Dst;
8527
8528 if (IsTailCall) {
8529 SDValue CallerArg = LoongArchFI->getIncomingByValArgs(j++);
8532 Dst = CallerArg;
8533 } else {
8534 int FI =
8535 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8536 Dst = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8537 }
8538 if (Dst) {
8539 Chain =
8540 DAG.getMemcpy(Chain, DL, Dst, Arg, SizeNode, Alignment,
8541 /*IsVolatile=*/false,
8542 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8544 ByValArgs.push_back(Dst);
8545 }
8546 }
8547
8548 if (!IsTailCall)
8549 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8550
8551 // During a tail call, stores to the argument area must happen after all of
8552 // the function's incoming arguments have been loaded because they may alias.
8553 // This is done by folding in a TokenFactor from LowerFormalArguments, but
8554 // there's no point in doing so repeatedly so this tracks whether that's
8555 // happened yet.
8556 bool AfterFormalArgLoads = false;
8557
8558 // Copy argument values to their designated locations.
8560 SmallVector<SDValue> MemOpChains;
8561 SDValue StackPtr;
8562 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8563 ++i, ++OutIdx) {
8564 CCValAssign &VA = ArgLocs[i];
8565 SDValue ArgValue = OutVals[OutIdx];
8566 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8567
8568 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8569 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8570 assert(VA.isRegLoc() && "Expected register VA assignment");
8571 assert(VA.needsCustom());
8572 SDValue SplitF64 =
8573 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8574 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8575 SDValue Lo = SplitF64.getValue(0);
8576 SDValue Hi = SplitF64.getValue(1);
8577
8578 Register RegLo = VA.getLocReg();
8579 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8580
8581 // Get the CCValAssign for the Hi part.
8582 CCValAssign &HiVA = ArgLocs[++i];
8583
8584 if (HiVA.isMemLoc()) {
8585 // Second half of f64 is passed on the stack.
8586 if (!StackPtr.getNode())
8587 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8589 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8590 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8591 // Emit the store.
8592 MemOpChains.push_back(DAG.getStore(
8593 Chain, DL, Hi, Address,
8595 } else {
8596 // Second half of f64 is passed in another GPR.
8597 Register RegHigh = HiVA.getLocReg();
8598 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8599 }
8600 continue;
8601 }
8602
8603 // Promote the value if needed.
8604 // For now, only handle fully promoted and indirect arguments.
8605 if (VA.getLocInfo() == CCValAssign::Indirect) {
8606 // Store the argument in a stack slot and pass its address.
8607 Align StackAlign =
8608 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8609 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8610 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8611 // If the original argument was split and passed by reference, we need to
8612 // store the required parts of it here (and pass just one address).
8613 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8614 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8615 assert(ArgPartOffset == 0);
8616 // Calculate the total size to store. We don't have access to what we're
8617 // actually storing other than performing the loop and collecting the
8618 // info.
8620 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8621 SDValue PartValue = OutVals[OutIdx + 1];
8622 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8623 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8624 EVT PartVT = PartValue.getValueType();
8625
8626 StoredSize += PartVT.getStoreSize();
8627 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8628 Parts.push_back(std::make_pair(PartValue, Offset));
8629 ++i;
8630 ++OutIdx;
8631 }
8632 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8633 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8634 MemOpChains.push_back(
8635 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8637 for (const auto &Part : Parts) {
8638 SDValue PartValue = Part.first;
8639 SDValue PartOffset = Part.second;
8641 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8642 MemOpChains.push_back(
8643 DAG.getStore(Chain, DL, PartValue, Address,
8645 }
8646 ArgValue = SpillSlot;
8647 } else {
8648 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8649 }
8650
8651 // Use local copy if it is a byval arg.
8652 if (Flags.isByVal()) {
8653 if (!IsTailCall || (isa<GlobalAddressSDNode>(ArgValue) ||
8654 isa<ExternalSymbolSDNode>(ArgValue) ||
8655 isa<FrameIndexSDNode>(ArgValue)))
8656 ArgValue = ByValArgs[j++];
8657 }
8658
8659 if (VA.isRegLoc()) {
8660 // Queue up the argument copies and emit them at the end.
8661 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8662 } else {
8663 assert(VA.isMemLoc() && "Argument not register or memory");
8664 SDValue DstAddr;
8665 MachinePointerInfo DstInfo;
8666 int32_t Offset = VA.getLocMemOffset();
8667
8668 // Work out the address of the stack slot.
8669 if (!StackPtr.getNode())
8670 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8671
8672 if (IsTailCall) {
8673 unsigned OpSize = divideCeil(VA.getValVT().getSizeInBits(), 8);
8674 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
8675 DstAddr = DAG.getFrameIndex(FI, PtrVT);
8676 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
8677 if (!AfterFormalArgLoads) {
8678 Chain = DAG.getStackArgumentTokenFactor(Chain);
8679 AfterFormalArgLoads = true;
8680 }
8681 } else {
8682 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
8683 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
8684 DstInfo = MachinePointerInfo::getStack(MF, Offset);
8685 }
8686
8687 // Emit the store.
8688 MemOpChains.push_back(
8689 DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo));
8690 }
8691 }
8692
8693 // Join the stores, which are independent of one another.
8694 if (!MemOpChains.empty())
8695 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8696
8697 SDValue Glue;
8698
8699 // Build a sequence of copy-to-reg nodes, chained and glued together.
8700 for (auto &Reg : RegsToPass) {
8701 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8702 Glue = Chain.getValue(1);
8703 }
8704
8705 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8706 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8707 // split it and then direct call can be matched by PseudoCALL_SMALL.
8709 const GlobalValue *GV = S->getGlobal();
8710 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8713 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8714 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8715 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8718 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8719 }
8720
8721 // The first call operand is the chain and the second is the target address.
8723 Ops.push_back(Chain);
8724 Ops.push_back(Callee);
8725
8726 // Add argument registers to the end of the list so that they are
8727 // known live into the call.
8728 for (auto &Reg : RegsToPass)
8729 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8730
8731 if (!IsTailCall) {
8732 // Add a register mask operand representing the call-preserved registers.
8733 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8734 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8735 assert(Mask && "Missing call preserved mask for calling convention");
8736 Ops.push_back(DAG.getRegisterMask(Mask));
8737 }
8738
8739 // Glue the call to the argument copies, if any.
8740 if (Glue.getNode())
8741 Ops.push_back(Glue);
8742
8743 // Emit the call.
8744 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8745 unsigned Op;
8746 switch (DAG.getTarget().getCodeModel()) {
8747 default:
8748 report_fatal_error("Unsupported code model");
8749 case CodeModel::Small:
8750 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8751 break;
8752 case CodeModel::Medium:
8753 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8754 break;
8755 case CodeModel::Large:
8756 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8757 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8758 break;
8759 }
8760
8761 if (IsTailCall) {
8763 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8764 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8765 return Ret;
8766 }
8767
8768 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8769 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8770 Glue = Chain.getValue(1);
8771
8772 // Mark the end of the call, which is glued to the call itself.
8773 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8774 Glue = Chain.getValue(1);
8775
8776 // Assign locations to each value returned by this call.
8778 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8779 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8780
8781 // Copy all of the result registers out of their specified physreg.
8782 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8783 auto &VA = RVLocs[i];
8784 // Copy the value out.
8785 SDValue RetValue =
8786 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8787 // Glue the RetValue to the end of the call sequence.
8788 Chain = RetValue.getValue(1);
8789 Glue = RetValue.getValue(2);
8790
8791 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8792 assert(VA.needsCustom());
8793 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8794 MVT::i32, Glue);
8795 Chain = RetValue2.getValue(1);
8796 Glue = RetValue2.getValue(2);
8797 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8798 RetValue, RetValue2);
8799 } else
8800 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8801
8802 InVals.push_back(RetValue);
8803 }
8804
8805 return Chain;
8806}
8807
8809 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8810 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8811 const Type *RetTy) const {
8813 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8814
8815 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8816 LoongArchABI::ABI ABI =
8817 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8818 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8819 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8820 return false;
8821 }
8822 return true;
8823}
8824
8826 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8828 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8829 SelectionDAG &DAG) const {
8830 // Stores the assignment of the return value to a location.
8832
8833 // Info about the registers and stack slot.
8834 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8835 *DAG.getContext());
8836
8837 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8838 nullptr, CC_LoongArch);
8839 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8840 report_fatal_error("GHC functions return void only");
8841 SDValue Glue;
8842 SmallVector<SDValue, 4> RetOps(1, Chain);
8843
8844 // Copy the result values into the output registers.
8845 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8846 SDValue Val = OutVals[OutIdx];
8847 CCValAssign &VA = RVLocs[i];
8848 assert(VA.isRegLoc() && "Can only return in registers!");
8849
8850 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8851 // Handle returning f64 on LA32D with a soft float ABI.
8852 assert(VA.isRegLoc() && "Expected return via registers");
8853 assert(VA.needsCustom());
8854 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8855 DAG.getVTList(MVT::i32, MVT::i32), Val);
8856 SDValue Lo = SplitF64.getValue(0);
8857 SDValue Hi = SplitF64.getValue(1);
8858 Register RegLo = VA.getLocReg();
8859 Register RegHi = RVLocs[++i].getLocReg();
8860
8861 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8862 Glue = Chain.getValue(1);
8863 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8864 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8865 Glue = Chain.getValue(1);
8866 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8867 } else {
8868 // Handle a 'normal' return.
8869 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8870 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8871
8872 // Guarantee that all emitted copies are stuck together.
8873 Glue = Chain.getValue(1);
8874 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8875 }
8876 }
8877
8878 RetOps[0] = Chain; // Update chain.
8879
8880 // Add the glue node if we have it.
8881 if (Glue.getNode())
8882 RetOps.push_back(Glue);
8883
8884 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8885}
8886
8887// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8888// Note: The following prefixes are excluded:
8889// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8890// as they can be represented using [x]vrepli.[whb]
8892 const APInt &SplatValue, const unsigned SplatBitSize) const {
8893 uint64_t RequiredImm = 0;
8894 uint64_t V = SplatValue.getZExtValue();
8895 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8896 // 4'b0101
8897 RequiredImm = (0b10101 << 8) | (V >> 8);
8898 return {true, RequiredImm};
8899 } else if (SplatBitSize == 32) {
8900 // 4'b0001
8901 if (!(V & 0xFFFF00FF)) {
8902 RequiredImm = (0b10001 << 8) | (V >> 8);
8903 return {true, RequiredImm};
8904 }
8905 // 4'b0010
8906 if (!(V & 0xFF00FFFF)) {
8907 RequiredImm = (0b10010 << 8) | (V >> 16);
8908 return {true, RequiredImm};
8909 }
8910 // 4'b0011
8911 if (!(V & 0x00FFFFFF)) {
8912 RequiredImm = (0b10011 << 8) | (V >> 24);
8913 return {true, RequiredImm};
8914 }
8915 // 4'b0110
8916 if ((V & 0xFFFF00FF) == 0xFF) {
8917 RequiredImm = (0b10110 << 8) | (V >> 8);
8918 return {true, RequiredImm};
8919 }
8920 // 4'b0111
8921 if ((V & 0xFF00FFFF) == 0xFFFF) {
8922 RequiredImm = (0b10111 << 8) | (V >> 16);
8923 return {true, RequiredImm};
8924 }
8925 // 4'b1010
8926 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8927 RequiredImm =
8928 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8929 return {true, RequiredImm};
8930 }
8931 } else if (SplatBitSize == 64) {
8932 // 4'b1011
8933 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8934 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8935 RequiredImm =
8936 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8937 return {true, RequiredImm};
8938 }
8939 // 4'b1100
8940 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8941 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8942 RequiredImm =
8943 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8944 return {true, RequiredImm};
8945 }
8946 // 4'b1001
8947 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8948 uint8_t res = 0;
8949 for (int i = 0; i < 8; ++i) {
8950 uint8_t byte = x & 0xFF;
8951 if (byte == 0 || byte == 0xFF)
8952 res |= ((byte & 1) << i);
8953 else
8954 return {false, 0};
8955 x >>= 8;
8956 }
8957 return {true, res};
8958 };
8959 auto [IsSame, Suffix] = sameBitsPreByte(V);
8960 if (IsSame) {
8961 RequiredImm = (0b11001 << 8) | Suffix;
8962 return {true, RequiredImm};
8963 }
8964 }
8965 return {false, RequiredImm};
8966}
8967
8969 EVT VT) const {
8970 if (!Subtarget.hasExtLSX())
8971 return false;
8972
8973 if (VT == MVT::f32) {
8974 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8975 return (masked == 0x3e000000 || masked == 0x40000000);
8976 }
8977
8978 if (VT == MVT::f64) {
8979 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8980 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8981 }
8982
8983 return false;
8984}
8985
8986bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8987 bool ForCodeSize) const {
8988 // TODO: Maybe need more checks here after vector extension is supported.
8989 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8990 return false;
8991 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8992 return false;
8993 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8994}
8995
8997 return true;
8998}
8999
9001 return true;
9002}
9003
9004bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9005 const Instruction *I) const {
9006 if (!Subtarget.is64Bit())
9007 return isa<LoadInst>(I) || isa<StoreInst>(I);
9008
9009 if (isa<LoadInst>(I))
9010 return true;
9011
9012 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9013 // require fences beacuse we can use amswap_db.[w/d].
9014 Type *Ty = I->getOperand(0)->getType();
9015 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
9016 unsigned Size = Ty->getIntegerBitWidth();
9017 return (Size == 8 || Size == 16);
9018 }
9019
9020 return false;
9021}
9022
9024 LLVMContext &Context,
9025 EVT VT) const {
9026 if (!VT.isVector())
9027 return getPointerTy(DL);
9029}
9030
9032 EVT VT = Y.getValueType();
9033
9034 if (VT.isVector())
9035 return Subtarget.hasExtLSX() && VT.isInteger();
9036
9037 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
9038}
9039
9041 const CallBase &I,
9042 MachineFunction &MF,
9043 unsigned Intrinsic) const {
9044 switch (Intrinsic) {
9045 default:
9046 return false;
9047 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9048 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9049 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9050 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
9051 Info.opc = ISD::INTRINSIC_W_CHAIN;
9052 Info.memVT = MVT::i32;
9053 Info.ptrVal = I.getArgOperand(0);
9054 Info.offset = 0;
9055 Info.align = Align(4);
9058 return true;
9059 // TODO: Add more Intrinsics later.
9060 }
9061}
9062
9063// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9064// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9065// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9066// regression, we need to implement it manually.
9069
9071 Op == AtomicRMWInst::And) &&
9072 "Unable to expand");
9073 unsigned MinWordSize = 4;
9074
9075 IRBuilder<> Builder(AI);
9076 LLVMContext &Ctx = Builder.getContext();
9077 const DataLayout &DL = AI->getDataLayout();
9078 Type *ValueType = AI->getType();
9079 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
9080
9081 Value *Addr = AI->getPointerOperand();
9082 PointerType *PtrTy = cast<PointerType>(Addr->getType());
9083 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
9084
9085 Value *AlignedAddr = Builder.CreateIntrinsic(
9086 Intrinsic::ptrmask, {PtrTy, IntTy},
9087 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
9088 "AlignedAddr");
9089
9090 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
9091 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
9092 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
9093 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
9094 Value *Mask = Builder.CreateShl(
9095 ConstantInt::get(WordType,
9096 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
9097 ShiftAmt, "Mask");
9098 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
9099 Value *ValOperand_Shifted =
9100 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
9101 ShiftAmt, "ValOperand_Shifted");
9102 Value *NewOperand;
9103 if (Op == AtomicRMWInst::And)
9104 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
9105 else
9106 NewOperand = ValOperand_Shifted;
9107
9108 AtomicRMWInst *NewAI =
9109 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
9110 AI->getOrdering(), AI->getSyncScopeID());
9111
9112 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
9113 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
9114 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
9115 AI->replaceAllUsesWith(FinalOldResult);
9116 AI->eraseFromParent();
9117}
9118
9121 const AtomicRMWInst *AI) const {
9122 // TODO: Add more AtomicRMWInst that needs to be extended.
9123
9124 // Since floating-point operation requires a non-trivial set of data
9125 // operations, use CmpXChg to expand.
9126 if (AI->isFloatingPointOperation() ||
9132
9133 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9136 AI->getOperation() == AtomicRMWInst::Sub)) {
9138 }
9139
9140 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9141 if (Subtarget.hasLAMCAS()) {
9142 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9146 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9148 }
9149
9150 if (Size == 8 || Size == 16)
9153}
9154
9155static Intrinsic::ID
9157 AtomicRMWInst::BinOp BinOp) {
9158 if (GRLen == 64) {
9159 switch (BinOp) {
9160 default:
9161 llvm_unreachable("Unexpected AtomicRMW BinOp");
9163 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9164 case AtomicRMWInst::Add:
9165 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9166 case AtomicRMWInst::Sub:
9167 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9169 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9171 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9173 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9174 case AtomicRMWInst::Max:
9175 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9176 case AtomicRMWInst::Min:
9177 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9178 // TODO: support other AtomicRMWInst.
9179 }
9180 }
9181
9182 if (GRLen == 32) {
9183 switch (BinOp) {
9184 default:
9185 llvm_unreachable("Unexpected AtomicRMW BinOp");
9187 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9188 case AtomicRMWInst::Add:
9189 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9190 case AtomicRMWInst::Sub:
9191 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9193 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9195 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9197 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9198 case AtomicRMWInst::Max:
9199 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9200 case AtomicRMWInst::Min:
9201 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9202 // TODO: support other AtomicRMWInst.
9203 }
9204 }
9205
9206 llvm_unreachable("Unexpected GRLen\n");
9207}
9208
9211 const AtomicCmpXchgInst *CI) const {
9212
9213 if (Subtarget.hasLAMCAS())
9215
9217 if (Size == 8 || Size == 16)
9220}
9221
9223 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9224 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9225 unsigned GRLen = Subtarget.getGRLen();
9226 AtomicOrdering FailOrd = CI->getFailureOrdering();
9227 Value *FailureOrdering =
9228 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9229 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9230 if (GRLen == 64) {
9231 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9232 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9233 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9234 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9235 }
9236 Type *Tys[] = {AlignedAddr->getType()};
9237 Value *Result = Builder.CreateIntrinsic(
9238 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9239 if (GRLen == 64)
9240 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9241 return Result;
9242}
9243
9245 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9246 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9247 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9248 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9249 // mask, as this produces better code than the LL/SC loop emitted by
9250 // int_loongarch_masked_atomicrmw_xchg.
9251 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9254 if (CVal->isZero())
9255 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9256 Builder.CreateNot(Mask, "Inv_Mask"),
9257 AI->getAlign(), Ord);
9258 if (CVal->isMinusOne())
9259 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9260 AI->getAlign(), Ord);
9261 }
9262
9263 unsigned GRLen = Subtarget.getGRLen();
9264 Value *Ordering =
9265 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9266 Type *Tys[] = {AlignedAddr->getType()};
9268 AI->getModule(),
9270
9271 if (GRLen == 64) {
9272 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9273 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9274 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9275 }
9276
9277 Value *Result;
9278
9279 // Must pass the shift amount needed to sign extend the loaded value prior
9280 // to performing a signed comparison for min/max. ShiftAmt is the number of
9281 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9282 // is the number of bits to left+right shift the value in order to
9283 // sign-extend.
9284 if (AI->getOperation() == AtomicRMWInst::Min ||
9286 const DataLayout &DL = AI->getDataLayout();
9287 unsigned ValWidth =
9288 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9289 Value *SextShamt =
9290 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9291 Result = Builder.CreateCall(LlwOpScwLoop,
9292 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9293 } else {
9294 Result =
9295 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9296 }
9297
9298 if (GRLen == 64)
9299 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9300 return Result;
9301}
9302
9304 const MachineFunction &MF, EVT VT) const {
9305 VT = VT.getScalarType();
9306
9307 if (!VT.isSimple())
9308 return false;
9309
9310 switch (VT.getSimpleVT().SimpleTy) {
9311 case MVT::f32:
9312 case MVT::f64:
9313 return true;
9314 default:
9315 break;
9316 }
9317
9318 return false;
9319}
9320
9322 const Constant *PersonalityFn) const {
9323 return LoongArch::R4;
9324}
9325
9327 const Constant *PersonalityFn) const {
9328 return LoongArch::R5;
9329}
9330
9331//===----------------------------------------------------------------------===//
9332// Target Optimization Hooks
9333//===----------------------------------------------------------------------===//
9334
9336 const LoongArchSubtarget &Subtarget) {
9337 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9338 // IEEE float has 23 digits and double has 52 digits.
9339 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9340 return RefinementSteps;
9341}
9342
9344 SelectionDAG &DAG, int Enabled,
9345 int &RefinementSteps,
9346 bool &UseOneConstNR,
9347 bool Reciprocal) const {
9348 if (Subtarget.hasFrecipe()) {
9349 SDLoc DL(Operand);
9350 EVT VT = Operand.getValueType();
9351
9352 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9353 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9354 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9355 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9356 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9357
9358 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9359 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9360
9361 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9362 if (Reciprocal)
9363 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9364
9365 return Estimate;
9366 }
9367 }
9368
9369 return SDValue();
9370}
9371
9373 SelectionDAG &DAG,
9374 int Enabled,
9375 int &RefinementSteps) const {
9376 if (Subtarget.hasFrecipe()) {
9377 SDLoc DL(Operand);
9378 EVT VT = Operand.getValueType();
9379
9380 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9381 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9382 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9383 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9384 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9385
9386 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9387 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9388
9389 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9390 }
9391 }
9392
9393 return SDValue();
9394}
9395
9396//===----------------------------------------------------------------------===//
9397// LoongArch Inline Assembly Support
9398//===----------------------------------------------------------------------===//
9399
9401LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9402 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9403 //
9404 // 'f': A floating-point register (if available).
9405 // 'k': A memory operand whose address is formed by a base register and
9406 // (optionally scaled) index register.
9407 // 'l': A signed 16-bit constant.
9408 // 'm': A memory operand whose address is formed by a base register and
9409 // offset that is suitable for use in instructions with the same
9410 // addressing mode as st.w and ld.w.
9411 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9412 // instruction)
9413 // 'I': A signed 12-bit constant (for arithmetic instructions).
9414 // 'J': Integer zero.
9415 // 'K': An unsigned 12-bit constant (for logic instructions).
9416 // "ZB": An address that is held in a general-purpose register. The offset is
9417 // zero.
9418 // "ZC": A memory operand whose address is formed by a base register and
9419 // offset that is suitable for use in instructions with the same
9420 // addressing mode as ll.w and sc.w.
9421 if (Constraint.size() == 1) {
9422 switch (Constraint[0]) {
9423 default:
9424 break;
9425 case 'f':
9426 case 'q':
9427 return C_RegisterClass;
9428 case 'l':
9429 case 'I':
9430 case 'J':
9431 case 'K':
9432 return C_Immediate;
9433 case 'k':
9434 return C_Memory;
9435 }
9436 }
9437
9438 if (Constraint == "ZC" || Constraint == "ZB")
9439 return C_Memory;
9440
9441 // 'm' is handled here.
9442 return TargetLowering::getConstraintType(Constraint);
9443}
9444
9445InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9446 StringRef ConstraintCode) const {
9447 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9451 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9452}
9453
9454std::pair<unsigned, const TargetRegisterClass *>
9455LoongArchTargetLowering::getRegForInlineAsmConstraint(
9456 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9457 // First, see if this is a constraint that directly corresponds to a LoongArch
9458 // register class.
9459 if (Constraint.size() == 1) {
9460 switch (Constraint[0]) {
9461 case 'r':
9462 // TODO: Support fixed vectors up to GRLen?
9463 if (VT.isVector())
9464 break;
9465 return std::make_pair(0U, &LoongArch::GPRRegClass);
9466 case 'q':
9467 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9468 case 'f':
9469 if (Subtarget.hasBasicF() && VT == MVT::f32)
9470 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9471 if (Subtarget.hasBasicD() && VT == MVT::f64)
9472 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9473 if (Subtarget.hasExtLSX() &&
9474 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9475 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9476 if (Subtarget.hasExtLASX() &&
9477 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9478 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9479 break;
9480 default:
9481 break;
9482 }
9483 }
9484
9485 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9486 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9487 // constraints while the official register name is prefixed with a '$'. So we
9488 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9489 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9490 // case insensitive, so no need to convert the constraint to upper case here.
9491 //
9492 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9493 // decode the usage of register name aliases into their official names. And
9494 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9495 // official register names.
9496 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9497 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9498 bool IsFP = Constraint[2] == 'f';
9499 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9500 std::pair<unsigned, const TargetRegisterClass *> R;
9502 TRI, join_items("", Temp.first, Temp.second), VT);
9503 // Match those names to the widest floating point register type available.
9504 if (IsFP) {
9505 unsigned RegNo = R.first;
9506 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9507 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9508 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9509 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9510 }
9511 }
9512 }
9513 return R;
9514 }
9515
9516 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9517}
9518
9519void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9520 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9521 SelectionDAG &DAG) const {
9522 // Currently only support length 1 constraints.
9523 if (Constraint.size() == 1) {
9524 switch (Constraint[0]) {
9525 case 'l':
9526 // Validate & create a 16-bit signed immediate operand.
9527 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9528 uint64_t CVal = C->getSExtValue();
9529 if (isInt<16>(CVal))
9530 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9531 Subtarget.getGRLenVT()));
9532 }
9533 return;
9534 case 'I':
9535 // Validate & create a 12-bit signed immediate operand.
9536 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9537 uint64_t CVal = C->getSExtValue();
9538 if (isInt<12>(CVal))
9539 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9540 Subtarget.getGRLenVT()));
9541 }
9542 return;
9543 case 'J':
9544 // Validate & create an integer zero operand.
9545 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9546 if (C->getZExtValue() == 0)
9547 Ops.push_back(
9548 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9549 return;
9550 case 'K':
9551 // Validate & create a 12-bit unsigned immediate operand.
9552 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9553 uint64_t CVal = C->getZExtValue();
9554 if (isUInt<12>(CVal))
9555 Ops.push_back(
9556 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9557 }
9558 return;
9559 default:
9560 break;
9561 }
9562 }
9564}
9565
9566#define GET_REGISTER_MATCHER
9567#include "LoongArchGenAsmMatcher.inc"
9568
9571 const MachineFunction &MF) const {
9572 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9573 std::string NewRegName = Name.second.str();
9574 Register Reg = MatchRegisterAltName(NewRegName);
9575 if (!Reg)
9576 Reg = MatchRegisterName(NewRegName);
9577 if (!Reg)
9578 return Reg;
9579 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9580 if (!ReservedRegs.test(Reg))
9581 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9582 StringRef(RegName) + "\"."));
9583 return Reg;
9584}
9585
9587 EVT VT, SDValue C) const {
9588 // TODO: Support vectors.
9589 if (!VT.isScalarInteger())
9590 return false;
9591
9592 // Omit the optimization if the data size exceeds GRLen.
9593 if (VT.getSizeInBits() > Subtarget.getGRLen())
9594 return false;
9595
9596 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9597 const APInt &Imm = ConstNode->getAPIntValue();
9598 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9599 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9600 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9601 return true;
9602 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9603 if (ConstNode->hasOneUse() &&
9604 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9605 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9606 return true;
9607 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9608 // in which the immediate has two set bits. Or Break (MUL x, imm)
9609 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9610 // equals to (1 << s0) - (1 << s1).
9611 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9612 unsigned Shifts = Imm.countr_zero();
9613 // Reject immediates which can be composed via a single LUI.
9614 if (Shifts >= 12)
9615 return false;
9616 // Reject multiplications can be optimized to
9617 // (SLLI (ALSL x, x, 1/2/3/4), s).
9618 APInt ImmPop = Imm.ashr(Shifts);
9619 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9620 return false;
9621 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9622 // since it needs one more instruction than other 3 cases.
9623 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9624 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9625 (ImmSmall - Imm).isPowerOf2())
9626 return true;
9627 }
9628 }
9629
9630 return false;
9631}
9632
9634 const AddrMode &AM,
9635 Type *Ty, unsigned AS,
9636 Instruction *I) const {
9637 // LoongArch has four basic addressing modes:
9638 // 1. reg
9639 // 2. reg + 12-bit signed offset
9640 // 3. reg + 14-bit signed offset left-shifted by 2
9641 // 4. reg1 + reg2
9642 // TODO: Add more checks after support vector extension.
9643
9644 // No global is ever allowed as a base.
9645 if (AM.BaseGV)
9646 return false;
9647
9648 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9649 // with `UAL` feature.
9650 if (!isInt<12>(AM.BaseOffs) &&
9651 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9652 return false;
9653
9654 switch (AM.Scale) {
9655 case 0:
9656 // "r+i" or just "i", depending on HasBaseReg.
9657 break;
9658 case 1:
9659 // "r+r+i" is not allowed.
9660 if (AM.HasBaseReg && AM.BaseOffs)
9661 return false;
9662 // Otherwise we have "r+r" or "r+i".
9663 break;
9664 case 2:
9665 // "2*r+r" or "2*r+i" is not allowed.
9666 if (AM.HasBaseReg || AM.BaseOffs)
9667 return false;
9668 // Allow "2*r" as "r+r".
9669 break;
9670 default:
9671 return false;
9672 }
9673
9674 return true;
9675}
9676
9678 return isInt<12>(Imm);
9679}
9680
9682 return isInt<12>(Imm);
9683}
9684
9686 // Zexts are free if they can be combined with a load.
9687 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9688 // poorly with type legalization of compares preferring sext.
9689 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9690 EVT MemVT = LD->getMemoryVT();
9691 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9692 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9693 LD->getExtensionType() == ISD::ZEXTLOAD))
9694 return true;
9695 }
9696
9697 return TargetLowering::isZExtFree(Val, VT2);
9698}
9699
9701 EVT DstVT) const {
9702 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9703}
9704
9706 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9707}
9708
9710 // TODO: Support vectors.
9711 if (Y.getValueType().isVector())
9712 return false;
9713
9714 return !isa<ConstantSDNode>(Y);
9715}
9716
9718 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9719 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9720}
9721
9723 Type *Ty, bool IsSigned) const {
9724 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9725 return true;
9726
9727 return IsSigned;
9728}
9729
9731 // Return false to suppress the unnecessary extensions if the LibCall
9732 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9733 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9734 Type.getSizeInBits() < Subtarget.getGRLen()))
9735 return false;
9736 return true;
9737}
9738
9739// memcpy, and other memory intrinsics, typically tries to use wider load/store
9740// if the source/dest is aligned and the copy size is large enough. We therefore
9741// want to align such objects passed to memory intrinsics.
9743 unsigned &MinSize,
9744 Align &PrefAlign) const {
9745 if (!isa<MemIntrinsic>(CI))
9746 return false;
9747
9748 if (Subtarget.is64Bit()) {
9749 MinSize = 8;
9750 PrefAlign = Align(8);
9751 } else {
9752 MinSize = 4;
9753 PrefAlign = Align(4);
9754 }
9755
9756 return true;
9757}
9758
9767
9768bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9769 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9770 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9771 bool IsABIRegCopy = CC.has_value();
9772 EVT ValueVT = Val.getValueType();
9773
9774 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9775 PartVT == MVT::f32) {
9776 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9777 // nan, and cast to f32.
9778 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9779 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9780 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9781 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9782 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9783 Parts[0] = Val;
9784 return true;
9785 }
9786
9787 return false;
9788}
9789
9790SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9791 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9792 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9793 bool IsABIRegCopy = CC.has_value();
9794
9795 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9796 PartVT == MVT::f32) {
9797 SDValue Val = Parts[0];
9798
9799 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9800 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9801 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9802 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9803 return Val;
9804 }
9805
9806 return SDValue();
9807}
9808
9809MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9810 CallingConv::ID CC,
9811 EVT VT) const {
9812 // Use f32 to pass f16.
9813 if (VT == MVT::f16 && Subtarget.hasBasicF())
9814 return MVT::f32;
9815
9817}
9818
9819unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9820 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9821 // Use f32 to pass f16.
9822 if (VT == MVT::f16 && Subtarget.hasBasicF())
9823 return 1;
9824
9826}
9827
9829 SDValue Op, const APInt &OriginalDemandedBits,
9830 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9831 unsigned Depth) const {
9832 EVT VT = Op.getValueType();
9833 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9834 unsigned Opc = Op.getOpcode();
9835 switch (Opc) {
9836 default:
9837 break;
9838 case LoongArchISD::VMSKLTZ:
9839 case LoongArchISD::XVMSKLTZ: {
9840 SDValue Src = Op.getOperand(0);
9841 MVT SrcVT = Src.getSimpleValueType();
9842 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9843 unsigned NumElts = SrcVT.getVectorNumElements();
9844
9845 // If we don't need the sign bits at all just return zero.
9846 if (OriginalDemandedBits.countr_zero() >= NumElts)
9847 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9848
9849 // Only demand the vector elements of the sign bits we need.
9850 APInt KnownUndef, KnownZero;
9851 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9852 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9853 TLO, Depth + 1))
9854 return true;
9855
9856 Known.Zero = KnownZero.zext(BitWidth);
9857 Known.Zero.setHighBits(BitWidth - NumElts);
9858
9859 // [X]VMSKLTZ only uses the MSB from each vector element.
9860 KnownBits KnownSrc;
9861 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9862 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9863 Depth + 1))
9864 return true;
9865
9866 if (KnownSrc.One[SrcBits - 1])
9867 Known.One.setLowBits(NumElts);
9868 else if (KnownSrc.Zero[SrcBits - 1])
9869 Known.Zero.setLowBits(NumElts);
9870
9871 // Attempt to avoid multi-use ops if we don't need anything from it.
9873 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9874 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9875 return false;
9876 }
9877 }
9878
9880 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9881}
9882
9884 unsigned Opc = VecOp.getOpcode();
9885
9886 // Assume target opcodes can't be scalarized.
9887 // TODO - do we have any exceptions?
9888 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9889 return false;
9890
9891 // If the vector op is not supported, try to convert to scalar.
9892 EVT VecVT = VecOp.getValueType();
9894 return true;
9895
9896 // If the vector op is supported, but the scalar op is not, the transform may
9897 // not be worthwhile.
9898 EVT ScalarVT = VecVT.getScalarType();
9899 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9900}
9901
9903 unsigned Index) const {
9905 return false;
9906
9907 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9908 return Index == 0;
9909}
9910
9912 unsigned Index) const {
9913 EVT EltVT = VT.getScalarType();
9914
9915 // Extract a scalar FP value from index 0 of a vector is free.
9916 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9917}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1495
bool isZero() const
Definition APFloat.h:1508
APInt bitcastToAPInt() const
Definition APFloat.h:1416
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1400
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1648
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1397
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:490
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:890
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2775
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...