LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
370 VT, Expand);
378 }
380 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
381 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
382 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
397 }
398 }
399
400 // Set operations for 'LASX' feature.
401
402 if (Subtarget.hasExtLASX()) {
403 for (MVT VT : LASXVTs) {
407
413
417 }
418 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
421 Legal);
423 VT, Legal);
430 Expand);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
459 VT, Expand);
467 }
468 }
469
470 // Set DAG combine for LA32 and LA64.
471 if (Subtarget.hasBasicF()) {
473 }
474
479
480 // Set DAG combine for 'LSX' feature.
481
482 if (Subtarget.hasExtLSX()) {
485 }
486
487 // Set DAG combine for 'LASX' feature.
488 if (Subtarget.hasExtLASX()) {
492 }
493
494 // Compute derived properties from the register classes.
495 computeRegisterProperties(Subtarget.getRegisterInfo());
496
498
501
502 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
503
505
506 // Function alignments.
508 // Set preferred alignments.
509 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
510 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
511 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
512
513 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
514 if (Subtarget.hasLAMCAS())
516
517 if (Subtarget.hasSCQ()) {
520 }
521
522 // Disable strict node mutation.
523 IsStrictFPEnabled = true;
524}
525
527 const GlobalAddressSDNode *GA) const {
528 // In order to maximise the opportunity for common subexpression elimination,
529 // keep a separate ADD node for the global address offset instead of folding
530 // it in the global address node. Later peephole optimisations may choose to
531 // fold it back in when profitable.
532 return false;
533}
534
536 SelectionDAG &DAG) const {
537 switch (Op.getOpcode()) {
539 return lowerATOMIC_FENCE(Op, DAG);
541 return lowerEH_DWARF_CFA(Op, DAG);
543 return lowerGlobalAddress(Op, DAG);
545 return lowerGlobalTLSAddress(Op, DAG);
547 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
549 return lowerINTRINSIC_W_CHAIN(Op, DAG);
551 return lowerINTRINSIC_VOID(Op, DAG);
553 return lowerBlockAddress(Op, DAG);
554 case ISD::JumpTable:
555 return lowerJumpTable(Op, DAG);
556 case ISD::SHL_PARTS:
557 return lowerShiftLeftParts(Op, DAG);
558 case ISD::SRA_PARTS:
559 return lowerShiftRightParts(Op, DAG, true);
560 case ISD::SRL_PARTS:
561 return lowerShiftRightParts(Op, DAG, false);
563 return lowerConstantPool(Op, DAG);
564 case ISD::FP_TO_SINT:
565 return lowerFP_TO_SINT(Op, DAG);
566 case ISD::BITCAST:
567 return lowerBITCAST(Op, DAG);
568 case ISD::UINT_TO_FP:
569 return lowerUINT_TO_FP(Op, DAG);
570 case ISD::SINT_TO_FP:
571 return lowerSINT_TO_FP(Op, DAG);
572 case ISD::VASTART:
573 return lowerVASTART(Op, DAG);
574 case ISD::FRAMEADDR:
575 return lowerFRAMEADDR(Op, DAG);
576 case ISD::RETURNADDR:
577 return lowerRETURNADDR(Op, DAG);
579 return lowerWRITE_REGISTER(Op, DAG);
581 return lowerINSERT_VECTOR_ELT(Op, DAG);
583 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
585 return lowerBUILD_VECTOR(Op, DAG);
587 return lowerCONCAT_VECTORS(Op, DAG);
589 return lowerVECTOR_SHUFFLE(Op, DAG);
590 case ISD::BITREVERSE:
591 return lowerBITREVERSE(Op, DAG);
593 return lowerSCALAR_TO_VECTOR(Op, DAG);
594 case ISD::PREFETCH:
595 return lowerPREFETCH(Op, DAG);
596 case ISD::SELECT:
597 return lowerSELECT(Op, DAG);
598 case ISD::BRCOND:
599 return lowerBRCOND(Op, DAG);
600 case ISD::FP_TO_FP16:
601 return lowerFP_TO_FP16(Op, DAG);
602 case ISD::FP16_TO_FP:
603 return lowerFP16_TO_FP(Op, DAG);
604 case ISD::FP_TO_BF16:
605 return lowerFP_TO_BF16(Op, DAG);
606 case ISD::BF16_TO_FP:
607 return lowerBF16_TO_FP(Op, DAG);
609 return lowerVECREDUCE_ADD(Op, DAG);
610 case ISD::ROTL:
611 case ISD::ROTR:
612 return lowerRotate(Op, DAG);
620 return lowerVECREDUCE(Op, DAG);
621 case ISD::ConstantFP:
622 return lowerConstantFP(Op, DAG);
623 case ISD::SETCC:
624 return lowerSETCC(Op, DAG);
625 }
626 return SDValue();
627}
628
629// Helper to attempt to return a cheaper, bit-inverted version of \p V.
631 // TODO: don't always ignore oneuse constraints.
632 V = peekThroughBitcasts(V);
633 EVT VT = V.getValueType();
634
635 // Match not(xor X, -1) -> X.
636 if (V.getOpcode() == ISD::XOR &&
637 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
638 isAllOnesConstant(V.getOperand(1))))
639 return V.getOperand(0);
640
641 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
642 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
643 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
644 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
645 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
646 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
647 V.getOperand(1));
648 }
649 }
650
651 // Match not(SplatVector(not(X)) -> SplatVector(X).
652 if (V.getOpcode() == ISD::BUILD_VECTOR) {
653 if (SDValue SplatValue =
654 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
655 if (!V->isOnlyUserOf(SplatValue.getNode()))
656 return SDValue();
657
658 if (SDValue Not = isNOT(SplatValue, DAG)) {
659 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
660 return DAG.getSplat(VT, SDLoc(Not), Not);
661 }
662 }
663 }
664
665 // Match not(or(not(X),not(Y))) -> and(X, Y).
666 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
667 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
668 // TODO: Handle cases with single NOT operand -> VANDN
669 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
670 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
671 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
672 DAG.getBitcast(VT, Op1));
673 }
674
675 // TODO: Add more matching patterns. Such as,
676 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
677 // not(slt(C, X)) -> slt(X - 1, C)
678
679 return SDValue();
680}
681
682SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
683 SelectionDAG &DAG) const {
684 EVT VT = Op.getValueType();
685 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
686 const APFloat &FPVal = CFP->getValueAPF();
687 SDLoc DL(CFP);
688
689 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
690 (VT == MVT::f64 && Subtarget.hasBasicD()));
691
692 // If value is 0.0 or -0.0, just ignore it.
693 if (FPVal.isZero())
694 return SDValue();
695
696 // If lsx enabled, use cheaper 'vldi' instruction if possible.
697 if (isFPImmVLDILegal(FPVal, VT))
698 return SDValue();
699
700 // Construct as integer, and move to float register.
701 APInt INTVal = FPVal.bitcastToAPInt();
702
703 // If more than MaterializeFPImmInsNum instructions will be used to
704 // generate the INTVal and move it to float register, fallback to
705 // use floating point load from the constant pool.
707 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
708 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
709 return SDValue();
710
711 switch (VT.getSimpleVT().SimpleTy) {
712 default:
713 llvm_unreachable("Unexpected floating point type!");
714 break;
715 case MVT::f32: {
716 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
717 if (Subtarget.is64Bit())
718 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
719 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
720 : LoongArchISD::MOVGR2FR_W,
721 DL, VT, NewVal);
722 }
723 case MVT::f64: {
724 if (Subtarget.is64Bit()) {
725 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
726 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
727 }
728 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
729 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
730 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
731 }
732 }
733
734 return SDValue();
735}
736
737// Ensure SETCC result and operand have the same bit width; isel does not
738// support mismatched widths.
739SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
740 SelectionDAG &DAG) const {
741 SDLoc DL(Op);
742 EVT ResultVT = Op.getValueType();
743 EVT OperandVT = Op.getOperand(0).getValueType();
744
745 EVT SetCCResultVT =
746 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
747
748 if (ResultVT == SetCCResultVT)
749 return Op;
750
751 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
752 "SETCC operands must have the same type!");
753
754 SDValue SetCCNode =
755 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
756 Op.getOperand(1), Op.getOperand(2));
757
758 if (ResultVT.bitsGT(SetCCResultVT))
759 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
760 else if (ResultVT.bitsLT(SetCCResultVT))
761 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
762
763 return SetCCNode;
764}
765
766// Lower vecreduce_add using vhaddw instructions.
767// For Example:
768// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
769// can be lowered to:
770// VHADDW_D_W vr0, vr0, vr0
771// VHADDW_Q_D vr0, vr0, vr0
772// VPICKVE2GR_D a0, vr0, 0
773// ADDI_W a0, a0, 0
774SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
775 SelectionDAG &DAG) const {
776
777 SDLoc DL(Op);
778 MVT OpVT = Op.getSimpleValueType();
779 SDValue Val = Op.getOperand(0);
780
781 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
782 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
783 unsigned ResBits = OpVT.getScalarSizeInBits();
784
785 unsigned LegalVecSize = 128;
786 bool isLASX256Vector =
787 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
788
789 // Ensure operand type legal or enable it legal.
790 while (!isTypeLegal(Val.getSimpleValueType())) {
791 Val = DAG.WidenVector(Val, DL);
792 }
793
794 // NumEles is designed for iterations count, v4i32 for LSX
795 // and v8i32 for LASX should have the same count.
796 if (isLASX256Vector) {
797 NumEles /= 2;
798 LegalVecSize = 256;
799 }
800
801 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
802 MVT IntTy = MVT::getIntegerVT(EleBits);
803 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
804 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
805 }
806
807 if (isLASX256Vector) {
808 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
809 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
810 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
811 }
812
813 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
814 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
815 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
816}
817
818// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
819// For Example:
820// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
821// can be lowered to:
822// VBSRL_V vr1, vr0, 8
823// VMAX_W vr0, vr1, vr0
824// VBSRL_V vr1, vr0, 4
825// VMAX_W vr0, vr1, vr0
826// VPICKVE2GR_W a0, vr0, 0
827// For 256 bit vector, it is illegal and will be spilt into
828// two 128 bit vector by default then processed by this.
829SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
830 SelectionDAG &DAG) const {
831 SDLoc DL(Op);
832
833 MVT OpVT = Op.getSimpleValueType();
834 SDValue Val = Op.getOperand(0);
835
836 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
837 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
838
839 // Ensure operand type legal or enable it legal.
840 while (!isTypeLegal(Val.getSimpleValueType())) {
841 Val = DAG.WidenVector(Val, DL);
842 }
843
844 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
845 MVT VecTy = Val.getSimpleValueType();
846 MVT GRLenVT = Subtarget.getGRLenVT();
847
848 for (int i = NumEles; i > 1; i /= 2) {
849 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
850 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
851 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
852 }
853
854 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
855 DAG.getConstant(0, DL, GRLenVT));
856}
857
858SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
859 SelectionDAG &DAG) const {
860 unsigned IsData = Op.getConstantOperandVal(4);
861
862 // We don't support non-data prefetch.
863 // Just preserve the chain.
864 if (!IsData)
865 return Op.getOperand(0);
866
867 return Op;
868}
869
870SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
871 SelectionDAG &DAG) const {
872 MVT VT = Op.getSimpleValueType();
873 assert(VT.isVector() && "Unexpected type");
874
875 SDLoc DL(Op);
876 SDValue R = Op.getOperand(0);
877 SDValue Amt = Op.getOperand(1);
878 unsigned Opcode = Op.getOpcode();
879 unsigned EltSizeInBits = VT.getScalarSizeInBits();
880
881 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
882 if (V.getOpcode() != ISD::BUILD_VECTOR)
883 return false;
884 if (SDValue SplatValue =
885 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
886 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
887 CstSplatValue = C->getAPIntValue();
888 return true;
889 }
890 }
891 return false;
892 };
893
894 // Check for constant splat rotation amount.
895 APInt CstSplatValue;
896 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
897 bool isROTL = Opcode == ISD::ROTL;
898
899 // Check for splat rotate by zero.
900 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
901 return R;
902
903 // LoongArch targets always prefer ISD::ROTR.
904 if (isROTL) {
905 SDValue Zero = DAG.getConstant(0, DL, VT);
906 return DAG.getNode(ISD::ROTR, DL, VT, R,
907 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
908 }
909
910 // Rotate by a immediate.
911 if (IsCstSplat) {
912 // ISD::ROTR: Attemp to rotate by a positive immediate.
913 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
914 if (SDValue Urem =
915 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
916 return DAG.getNode(Opcode, DL, VT, R, Urem);
917 }
918
919 return Op;
920}
921
922// Return true if Val is equal to (setcc LHS, RHS, CC).
923// Return false if Val is the inverse of (setcc LHS, RHS, CC).
924// Otherwise, return std::nullopt.
925static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
926 ISD::CondCode CC, SDValue Val) {
927 assert(Val->getOpcode() == ISD::SETCC);
928 SDValue LHS2 = Val.getOperand(0);
929 SDValue RHS2 = Val.getOperand(1);
930 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
931
932 if (LHS == LHS2 && RHS == RHS2) {
933 if (CC == CC2)
934 return true;
935 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
936 return false;
937 } else if (LHS == RHS2 && RHS == LHS2) {
939 if (CC == CC2)
940 return true;
941 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
942 return false;
943 }
944
945 return std::nullopt;
946}
947
949 const LoongArchSubtarget &Subtarget) {
950 SDValue CondV = N->getOperand(0);
951 SDValue TrueV = N->getOperand(1);
952 SDValue FalseV = N->getOperand(2);
953 MVT VT = N->getSimpleValueType(0);
954 SDLoc DL(N);
955
956 // (select c, -1, y) -> -c | y
957 if (isAllOnesConstant(TrueV)) {
958 SDValue Neg = DAG.getNegative(CondV, DL, VT);
959 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
960 }
961 // (select c, y, -1) -> (c-1) | y
962 if (isAllOnesConstant(FalseV)) {
963 SDValue Neg =
964 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
965 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
966 }
967
968 // (select c, 0, y) -> (c-1) & y
969 if (isNullConstant(TrueV)) {
970 SDValue Neg =
971 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
972 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
973 }
974 // (select c, y, 0) -> -c & y
975 if (isNullConstant(FalseV)) {
976 SDValue Neg = DAG.getNegative(CondV, DL, VT);
977 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
978 }
979
980 // select c, ~x, x --> xor -c, x
981 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
982 const APInt &TrueVal = TrueV->getAsAPIntVal();
983 const APInt &FalseVal = FalseV->getAsAPIntVal();
984 if (~TrueVal == FalseVal) {
985 SDValue Neg = DAG.getNegative(CondV, DL, VT);
986 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
987 }
988 }
989
990 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
991 // when both truev and falsev are also setcc.
992 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
993 FalseV.getOpcode() == ISD::SETCC) {
994 SDValue LHS = CondV.getOperand(0);
995 SDValue RHS = CondV.getOperand(1);
996 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
997
998 // (select x, x, y) -> x | y
999 // (select !x, x, y) -> x & y
1000 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1001 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1002 DAG.getFreeze(FalseV));
1003 }
1004 // (select x, y, x) -> x & y
1005 // (select !x, y, x) -> x | y
1006 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1007 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1008 DAG.getFreeze(TrueV), FalseV);
1009 }
1010 }
1011
1012 return SDValue();
1013}
1014
1015// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1016// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1017// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1018// being `0` or `-1`. In such cases we can replace `select` with `and`.
1019// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1020// than `c0`?
1021static SDValue
1023 const LoongArchSubtarget &Subtarget) {
1024 unsigned SelOpNo = 0;
1025 SDValue Sel = BO->getOperand(0);
1026 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1027 SelOpNo = 1;
1028 Sel = BO->getOperand(1);
1029 }
1030
1031 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1032 return SDValue();
1033
1034 unsigned ConstSelOpNo = 1;
1035 unsigned OtherSelOpNo = 2;
1036 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1037 ConstSelOpNo = 2;
1038 OtherSelOpNo = 1;
1039 }
1040 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1041 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1042 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1043 return SDValue();
1044
1045 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1046 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1047 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1048 return SDValue();
1049
1050 SDLoc DL(Sel);
1051 EVT VT = BO->getValueType(0);
1052
1053 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1054 if (SelOpNo == 1)
1055 std::swap(NewConstOps[0], NewConstOps[1]);
1056
1057 SDValue NewConstOp =
1058 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1059 if (!NewConstOp)
1060 return SDValue();
1061
1062 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1063 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1064 return SDValue();
1065
1066 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1067 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1068 if (SelOpNo == 1)
1069 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1070 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1071
1072 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1073 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1074 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1075}
1076
1077// Changes the condition code and swaps operands if necessary, so the SetCC
1078// operation matches one of the comparisons supported directly by branches
1079// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1080// compare with 1/-1.
1082 ISD::CondCode &CC, SelectionDAG &DAG) {
1083 // If this is a single bit test that can't be handled by ANDI, shift the
1084 // bit to be tested to the MSB and perform a signed compare with 0.
1085 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1086 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1087 isa<ConstantSDNode>(LHS.getOperand(1))) {
1088 uint64_t Mask = LHS.getConstantOperandVal(1);
1089 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1090 unsigned ShAmt = 0;
1091 if (isPowerOf2_64(Mask)) {
1092 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1093 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1094 } else {
1095 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1096 }
1097
1098 LHS = LHS.getOperand(0);
1099 if (ShAmt != 0)
1100 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1101 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1102 return;
1103 }
1104 }
1105
1106 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1107 int64_t C = RHSC->getSExtValue();
1108 switch (CC) {
1109 default:
1110 break;
1111 case ISD::SETGT:
1112 // Convert X > -1 to X >= 0.
1113 if (C == -1) {
1114 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1115 CC = ISD::SETGE;
1116 return;
1117 }
1118 break;
1119 case ISD::SETLT:
1120 // Convert X < 1 to 0 >= X.
1121 if (C == 1) {
1122 RHS = LHS;
1123 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1124 CC = ISD::SETGE;
1125 return;
1126 }
1127 break;
1128 }
1129 }
1130
1131 switch (CC) {
1132 default:
1133 break;
1134 case ISD::SETGT:
1135 case ISD::SETLE:
1136 case ISD::SETUGT:
1137 case ISD::SETULE:
1139 std::swap(LHS, RHS);
1140 break;
1141 }
1142}
1143
1144SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1145 SelectionDAG &DAG) const {
1146 SDValue CondV = Op.getOperand(0);
1147 SDValue TrueV = Op.getOperand(1);
1148 SDValue FalseV = Op.getOperand(2);
1149 SDLoc DL(Op);
1150 MVT VT = Op.getSimpleValueType();
1151 MVT GRLenVT = Subtarget.getGRLenVT();
1152
1153 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1154 return V;
1155
1156 if (Op.hasOneUse()) {
1157 unsigned UseOpc = Op->user_begin()->getOpcode();
1158 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1159 SDNode *BinOp = *Op->user_begin();
1160 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1161 DAG, Subtarget)) {
1162 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1163 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1164 // may return a constant node and cause crash in lowerSELECT.
1165 if (NewSel.getOpcode() == ISD::SELECT)
1166 return lowerSELECT(NewSel, DAG);
1167 return NewSel;
1168 }
1169 }
1170 }
1171
1172 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1173 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1174 // (select condv, truev, falsev)
1175 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1176 if (CondV.getOpcode() != ISD::SETCC ||
1177 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1178 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1179 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1180
1181 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1182
1183 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1184 }
1185
1186 // If the CondV is the output of a SETCC node which operates on GRLenVT
1187 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1188 // to take advantage of the integer compare+branch instructions. i.e.: (select
1189 // (setcc lhs, rhs, cc), truev, falsev)
1190 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1191 SDValue LHS = CondV.getOperand(0);
1192 SDValue RHS = CondV.getOperand(1);
1193 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1194
1195 // Special case for a select of 2 constants that have a difference of 1.
1196 // Normally this is done by DAGCombine, but if the select is introduced by
1197 // type legalization or op legalization, we miss it. Restricting to SETLT
1198 // case for now because that is what signed saturating add/sub need.
1199 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1200 // but we would probably want to swap the true/false values if the condition
1201 // is SETGE/SETLE to avoid an XORI.
1202 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1203 CCVal == ISD::SETLT) {
1204 const APInt &TrueVal = TrueV->getAsAPIntVal();
1205 const APInt &FalseVal = FalseV->getAsAPIntVal();
1206 if (TrueVal - 1 == FalseVal)
1207 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1208 if (TrueVal + 1 == FalseVal)
1209 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1210 }
1211
1212 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1213 // 1 < x ? x : 1 -> 0 < x ? x : 1
1214 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1215 RHS == TrueV && LHS == FalseV) {
1216 LHS = DAG.getConstant(0, DL, VT);
1217 // 0 <u x is the same as x != 0.
1218 if (CCVal == ISD::SETULT) {
1219 std::swap(LHS, RHS);
1220 CCVal = ISD::SETNE;
1221 }
1222 }
1223
1224 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1225 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1226 RHS == FalseV) {
1227 RHS = DAG.getConstant(0, DL, VT);
1228 }
1229
1230 SDValue TargetCC = DAG.getCondCode(CCVal);
1231
1232 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1233 // (select (setcc lhs, rhs, CC), constant, falsev)
1234 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1235 std::swap(TrueV, FalseV);
1236 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1237 }
1238
1239 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1240 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1241}
1242
1243SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1244 SelectionDAG &DAG) const {
1245 SDValue CondV = Op.getOperand(1);
1246 SDLoc DL(Op);
1247 MVT GRLenVT = Subtarget.getGRLenVT();
1248
1249 if (CondV.getOpcode() == ISD::SETCC) {
1250 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1251 SDValue LHS = CondV.getOperand(0);
1252 SDValue RHS = CondV.getOperand(1);
1253 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1254
1255 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1256
1257 SDValue TargetCC = DAG.getCondCode(CCVal);
1258 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1259 Op.getOperand(0), LHS, RHS, TargetCC,
1260 Op.getOperand(2));
1261 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1262 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1263 Op.getOperand(0), CondV, Op.getOperand(2));
1264 }
1265 }
1266
1267 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1268 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1269 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1270}
1271
1272SDValue
1273LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1274 SelectionDAG &DAG) const {
1275 SDLoc DL(Op);
1276 MVT OpVT = Op.getSimpleValueType();
1277
1278 SDValue Vector = DAG.getUNDEF(OpVT);
1279 SDValue Val = Op.getOperand(0);
1280 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1281
1282 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1283}
1284
1285SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1286 SelectionDAG &DAG) const {
1287 EVT ResTy = Op->getValueType(0);
1288 SDValue Src = Op->getOperand(0);
1289 SDLoc DL(Op);
1290
1291 // LoongArchISD::BITREV_8B is not supported on LA32.
1292 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1293 return SDValue();
1294
1295 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1296 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1297 unsigned int NewEltNum = NewVT.getVectorNumElements();
1298
1299 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1300
1302 for (unsigned int i = 0; i < NewEltNum; i++) {
1303 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1304 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1305 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1306 ? (unsigned)LoongArchISD::BITREV_8B
1307 : (unsigned)ISD::BITREVERSE;
1308 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1309 }
1310 SDValue Res =
1311 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1312
1313 switch (ResTy.getSimpleVT().SimpleTy) {
1314 default:
1315 return SDValue();
1316 case MVT::v16i8:
1317 case MVT::v32i8:
1318 return Res;
1319 case MVT::v8i16:
1320 case MVT::v16i16:
1321 case MVT::v4i32:
1322 case MVT::v8i32: {
1324 for (unsigned int i = 0; i < NewEltNum; i++)
1325 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1326 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1327 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1328 }
1329 }
1330}
1331
1332// Widen element type to get a new mask value (if possible).
1333// For example:
1334// shufflevector <4 x i32> %a, <4 x i32> %b,
1335// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1336// is equivalent to:
1337// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1338// can be lowered to:
1339// VPACKOD_D vr0, vr0, vr1
1341 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1342 unsigned EltBits = VT.getScalarSizeInBits();
1343
1344 if (EltBits > 32 || EltBits == 1)
1345 return SDValue();
1346
1347 SmallVector<int, 8> NewMask;
1348 if (widenShuffleMaskElts(Mask, NewMask)) {
1349 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1350 : MVT::getIntegerVT(EltBits * 2);
1351 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1352 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1353 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1354 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1355 return DAG.getBitcast(
1356 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1357 }
1358 }
1359
1360 return SDValue();
1361}
1362
1363/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1364/// instruction.
1365// The funciton matches elements from one of the input vector shuffled to the
1366// left or right with zeroable elements 'shifted in'. It handles both the
1367// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1368// lane.
1369// Mostly copied from X86.
1370static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1371 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1372 int MaskOffset, const APInt &Zeroable) {
1373 int Size = Mask.size();
1374 unsigned SizeInBits = Size * ScalarSizeInBits;
1375
1376 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1377 for (int i = 0; i < Size; i += Scale)
1378 for (int j = 0; j < Shift; ++j)
1379 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1380 return false;
1381
1382 return true;
1383 };
1384
1385 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1386 int Step = 1) {
1387 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1388 if (!(Mask[i] == -1 || Mask[i] == Low))
1389 return false;
1390 return true;
1391 };
1392
1393 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1394 for (int i = 0; i != Size; i += Scale) {
1395 unsigned Pos = Left ? i + Shift : i;
1396 unsigned Low = Left ? i : i + Shift;
1397 unsigned Len = Scale - Shift;
1398 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1399 return -1;
1400 }
1401
1402 int ShiftEltBits = ScalarSizeInBits * Scale;
1403 bool ByteShift = ShiftEltBits > 64;
1404 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1405 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1406 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1407
1408 // Normalize the scale for byte shifts to still produce an i64 element
1409 // type.
1410 Scale = ByteShift ? Scale / 2 : Scale;
1411
1412 // We need to round trip through the appropriate type for the shift.
1413 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1414 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1415 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1416 return (int)ShiftAmt;
1417 };
1418
1419 unsigned MaxWidth = 128;
1420 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1421 for (int Shift = 1; Shift != Scale; ++Shift)
1422 for (bool Left : {true, false})
1423 if (CheckZeros(Shift, Scale, Left)) {
1424 int ShiftAmt = MatchShift(Shift, Scale, Left);
1425 if (0 < ShiftAmt)
1426 return ShiftAmt;
1427 }
1428
1429 // no match
1430 return -1;
1431}
1432
1433/// Lower VECTOR_SHUFFLE as shift (if possible).
1434///
1435/// For example:
1436/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1437/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1438/// is lowered to:
1439/// (VBSLL_V $v0, $v0, 4)
1440///
1441/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1442/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1443/// is lowered to:
1444/// (VSLLI_D $v0, $v0, 32)
1446 MVT VT, SDValue V1, SDValue V2,
1447 SelectionDAG &DAG,
1448 const LoongArchSubtarget &Subtarget,
1449 const APInt &Zeroable) {
1450 int Size = Mask.size();
1451 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1452
1453 MVT ShiftVT;
1454 SDValue V = V1;
1455 unsigned Opcode;
1456
1457 // Try to match shuffle against V1 shift.
1458 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1459 Mask, 0, Zeroable);
1460
1461 // If V1 failed, try to match shuffle against V2 shift.
1462 if (ShiftAmt < 0) {
1463 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1464 Mask, Size, Zeroable);
1465 V = V2;
1466 }
1467
1468 if (ShiftAmt < 0)
1469 return SDValue();
1470
1471 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1472 "Illegal integer vector type");
1473 V = DAG.getBitcast(ShiftVT, V);
1474 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1475 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1476 return DAG.getBitcast(VT, V);
1477}
1478
1479/// Determine whether a range fits a regular pattern of values.
1480/// This function accounts for the possibility of jumping over the End iterator.
1481template <typename ValType>
1482static bool
1484 unsigned CheckStride,
1486 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1487 auto &I = Begin;
1488
1489 while (I != End) {
1490 if (*I != -1 && *I != ExpectedIndex)
1491 return false;
1492 ExpectedIndex += ExpectedIndexStride;
1493
1494 // Incrementing past End is undefined behaviour so we must increment one
1495 // step at a time and check for End at each step.
1496 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1497 ; // Empty loop body.
1498 }
1499 return true;
1500}
1501
1502/// Compute whether each element of a shuffle is zeroable.
1503///
1504/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1506 SDValue V2, APInt &KnownUndef,
1507 APInt &KnownZero) {
1508 int Size = Mask.size();
1509 KnownUndef = KnownZero = APInt::getZero(Size);
1510
1511 V1 = peekThroughBitcasts(V1);
1512 V2 = peekThroughBitcasts(V2);
1513
1514 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1515 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1516
1517 int VectorSizeInBits = V1.getValueSizeInBits();
1518 int ScalarSizeInBits = VectorSizeInBits / Size;
1519 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1520 (void)ScalarSizeInBits;
1521
1522 for (int i = 0; i < Size; ++i) {
1523 int M = Mask[i];
1524 if (M < 0) {
1525 KnownUndef.setBit(i);
1526 continue;
1527 }
1528 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1529 KnownZero.setBit(i);
1530 continue;
1531 }
1532 }
1533}
1534
1535/// Test whether a shuffle mask is equivalent within each sub-lane.
1536///
1537/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1538/// non-trivial to compute in the face of undef lanes. The representation is
1539/// suitable for use with existing 128-bit shuffles as entries from the second
1540/// vector have been remapped to [LaneSize, 2*LaneSize).
1541static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1542 ArrayRef<int> Mask,
1543 SmallVectorImpl<int> &RepeatedMask) {
1544 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1545 RepeatedMask.assign(LaneSize, -1);
1546 int Size = Mask.size();
1547 for (int i = 0; i < Size; ++i) {
1548 assert(Mask[i] == -1 || Mask[i] >= 0);
1549 if (Mask[i] < 0)
1550 continue;
1551 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1552 // This entry crosses lanes, so there is no way to model this shuffle.
1553 return false;
1554
1555 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1556 // Adjust second vector indices to start at LaneSize instead of Size.
1557 int LocalM =
1558 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1559 if (RepeatedMask[i % LaneSize] < 0)
1560 // This is the first non-undef entry in this slot of a 128-bit lane.
1561 RepeatedMask[i % LaneSize] = LocalM;
1562 else if (RepeatedMask[i % LaneSize] != LocalM)
1563 // Found a mismatch with the repeated mask.
1564 return false;
1565 }
1566 return true;
1567}
1568
1569/// Attempts to match vector shuffle as byte rotation.
1571 ArrayRef<int> Mask) {
1572
1573 SDValue Lo, Hi;
1574 SmallVector<int, 16> RepeatedMask;
1575
1576 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1577 return -1;
1578
1579 int NumElts = RepeatedMask.size();
1580 int Rotation = 0;
1581 int Scale = 16 / NumElts;
1582
1583 for (int i = 0; i < NumElts; ++i) {
1584 int M = RepeatedMask[i];
1585 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1586 "Unexpected mask index.");
1587 if (M < 0)
1588 continue;
1589
1590 // Determine where a rotated vector would have started.
1591 int StartIdx = i - (M % NumElts);
1592 if (StartIdx == 0)
1593 return -1;
1594
1595 // If we found the tail of a vector the rotation must be the missing
1596 // front. If we found the head of a vector, it must be how much of the
1597 // head.
1598 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1599
1600 if (Rotation == 0)
1601 Rotation = CandidateRotation;
1602 else if (Rotation != CandidateRotation)
1603 return -1;
1604
1605 // Compute which value this mask is pointing at.
1606 SDValue MaskV = M < NumElts ? V1 : V2;
1607
1608 // Compute which of the two target values this index should be assigned
1609 // to. This reflects whether the high elements are remaining or the low
1610 // elements are remaining.
1611 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1612
1613 // Either set up this value if we've not encountered it before, or check
1614 // that it remains consistent.
1615 if (!TargetV)
1616 TargetV = MaskV;
1617 else if (TargetV != MaskV)
1618 return -1;
1619 }
1620
1621 // Check that we successfully analyzed the mask, and normalize the results.
1622 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1623 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1624 if (!Lo)
1625 Lo = Hi;
1626 else if (!Hi)
1627 Hi = Lo;
1628
1629 V1 = Lo;
1630 V2 = Hi;
1631
1632 return Rotation * Scale;
1633}
1634
1635/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1636///
1637/// For example:
1638/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1639/// <2 x i32> <i32 3, i32 0>
1640/// is lowered to:
1641/// (VBSRL_V $v1, $v1, 8)
1642/// (VBSLL_V $v0, $v0, 8)
1643/// (VOR_V $v0, $V0, $v1)
1644static SDValue
1646 SDValue V1, SDValue V2, SelectionDAG &DAG,
1647 const LoongArchSubtarget &Subtarget) {
1648
1649 SDValue Lo = V1, Hi = V2;
1650 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1651 if (ByteRotation <= 0)
1652 return SDValue();
1653
1654 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1655 Lo = DAG.getBitcast(ByteVT, Lo);
1656 Hi = DAG.getBitcast(ByteVT, Hi);
1657
1658 int LoByteShift = 16 - ByteRotation;
1659 int HiByteShift = ByteRotation;
1660 MVT GRLenVT = Subtarget.getGRLenVT();
1661
1662 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1663 DAG.getConstant(LoByteShift, DL, GRLenVT));
1664 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1665 DAG.getConstant(HiByteShift, DL, GRLenVT));
1666 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1667}
1668
1669/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1670///
1671/// For example:
1672/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1673/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1674/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1675/// is lowered to:
1676/// (VREPLI $v1, 0)
1677/// (VILVL $v0, $v1, $v0)
1679 ArrayRef<int> Mask, MVT VT,
1680 SDValue V1, SDValue V2,
1681 SelectionDAG &DAG,
1682 const APInt &Zeroable) {
1683 int Bits = VT.getSizeInBits();
1684 int EltBits = VT.getScalarSizeInBits();
1685 int NumElements = VT.getVectorNumElements();
1686
1687 if (Zeroable.isAllOnes())
1688 return DAG.getConstant(0, DL, VT);
1689
1690 // Define a helper function to check a particular ext-scale and lower to it if
1691 // valid.
1692 auto Lower = [&](int Scale) -> SDValue {
1693 SDValue InputV;
1694 bool AnyExt = true;
1695 int Offset = 0;
1696 for (int i = 0; i < NumElements; i++) {
1697 int M = Mask[i];
1698 if (M < 0)
1699 continue;
1700 if (i % Scale != 0) {
1701 // Each of the extended elements need to be zeroable.
1702 if (!Zeroable[i])
1703 return SDValue();
1704
1705 AnyExt = false;
1706 continue;
1707 }
1708
1709 // Each of the base elements needs to be consecutive indices into the
1710 // same input vector.
1711 SDValue V = M < NumElements ? V1 : V2;
1712 M = M % NumElements;
1713 if (!InputV) {
1714 InputV = V;
1715 Offset = M - (i / Scale);
1716
1717 // These offset can't be handled
1718 if (Offset % (NumElements / Scale))
1719 return SDValue();
1720 } else if (InputV != V)
1721 return SDValue();
1722
1723 if (M != (Offset + (i / Scale)))
1724 return SDValue(); // Non-consecutive strided elements.
1725 }
1726
1727 // If we fail to find an input, we have a zero-shuffle which should always
1728 // have already been handled.
1729 if (!InputV)
1730 return SDValue();
1731
1732 do {
1733 unsigned VilVLoHi = LoongArchISD::VILVL;
1734 if (Offset >= (NumElements / 2)) {
1735 VilVLoHi = LoongArchISD::VILVH;
1736 Offset -= (NumElements / 2);
1737 }
1738
1739 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1740 SDValue Ext =
1741 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1742 InputV = DAG.getBitcast(InputVT, InputV);
1743 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1744 Scale /= 2;
1745 EltBits *= 2;
1746 NumElements /= 2;
1747 } while (Scale > 1);
1748 return DAG.getBitcast(VT, InputV);
1749 };
1750
1751 // Each iteration, try extending the elements half as much, but into twice as
1752 // many elements.
1753 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1754 NumExtElements *= 2) {
1755 if (SDValue V = Lower(NumElements / NumExtElements))
1756 return V;
1757 }
1758 return SDValue();
1759}
1760
1761/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1762///
1763/// VREPLVEI performs vector broadcast based on an element specified by an
1764/// integer immediate, with its mask being similar to:
1765/// <x, x, x, ...>
1766/// where x is any valid index.
1767///
1768/// When undef's appear in the mask they are treated as if they were whatever
1769/// value is necessary in order to fit the above form.
1770static SDValue
1772 SDValue V1, SelectionDAG &DAG,
1773 const LoongArchSubtarget &Subtarget) {
1774 int SplatIndex = -1;
1775 for (const auto &M : Mask) {
1776 if (M != -1) {
1777 SplatIndex = M;
1778 break;
1779 }
1780 }
1781
1782 if (SplatIndex == -1)
1783 return DAG.getUNDEF(VT);
1784
1785 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1786 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1787 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1788 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1789 }
1790
1791 return SDValue();
1792}
1793
1794/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1795///
1796/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1797/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1798///
1799/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1800/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1801/// When undef's appear they are treated as if they were whatever value is
1802/// necessary in order to fit the above forms.
1803///
1804/// For example:
1805/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1806/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1807/// i32 7, i32 6, i32 5, i32 4>
1808/// is lowered to:
1809/// (VSHUF4I_H $v0, $v1, 27)
1810/// where the 27 comes from:
1811/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1812static SDValue
1814 SDValue V1, SDValue V2, SelectionDAG &DAG,
1815 const LoongArchSubtarget &Subtarget) {
1816
1817 unsigned SubVecSize = 4;
1818 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1819 SubVecSize = 2;
1820
1821 int SubMask[4] = {-1, -1, -1, -1};
1822 for (unsigned i = 0; i < SubVecSize; ++i) {
1823 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1824 int M = Mask[j];
1825
1826 // Convert from vector index to 4-element subvector index
1827 // If an index refers to an element outside of the subvector then give up
1828 if (M != -1) {
1829 M -= 4 * (j / SubVecSize);
1830 if (M < 0 || M >= 4)
1831 return SDValue();
1832 }
1833
1834 // If the mask has an undef, replace it with the current index.
1835 // Note that it might still be undef if the current index is also undef
1836 if (SubMask[i] == -1)
1837 SubMask[i] = M;
1838 // Check that non-undef values are the same as in the mask. If they
1839 // aren't then give up
1840 else if (M != -1 && M != SubMask[i])
1841 return SDValue();
1842 }
1843 }
1844
1845 // Calculate the immediate. Replace any remaining undefs with zero
1846 int Imm = 0;
1847 for (int i = SubVecSize - 1; i >= 0; --i) {
1848 int M = SubMask[i];
1849
1850 if (M == -1)
1851 M = 0;
1852
1853 Imm <<= 2;
1854 Imm |= M & 0x3;
1855 }
1856
1857 MVT GRLenVT = Subtarget.getGRLenVT();
1858
1859 // Return vshuf4i.d
1860 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1861 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
1862 DAG.getConstant(Imm, DL, GRLenVT));
1863
1864 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1865 DAG.getConstant(Imm, DL, GRLenVT));
1866}
1867
1868/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1869///
1870/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1871/// reverse whose mask likes:
1872/// <7, 6, 5, 4, 3, 2, 1, 0>
1873///
1874/// When undef's appear in the mask they are treated as if they were whatever
1875/// value is necessary in order to fit the above forms.
1876static SDValue
1878 SDValue V1, SelectionDAG &DAG,
1879 const LoongArchSubtarget &Subtarget) {
1880 // Only vectors with i8/i16 elements which cannot match other patterns
1881 // directly needs to do this.
1882 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1883 VT != MVT::v16i16)
1884 return SDValue();
1885
1886 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1887 return SDValue();
1888
1889 int WidenNumElts = VT.getVectorNumElements() / 4;
1890 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1891 for (int i = 0; i < WidenNumElts; ++i)
1892 WidenMask[i] = WidenNumElts - 1 - i;
1893
1894 MVT WidenVT = MVT::getVectorVT(
1895 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1896 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1897 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1898 DAG.getUNDEF(WidenVT), WidenMask);
1899
1900 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1901 DAG.getBitcast(VT, WidenRev),
1902 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1903}
1904
1905/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1906///
1907/// VPACKEV interleaves the even elements from each vector.
1908///
1909/// It is possible to lower into VPACKEV when the mask consists of two of the
1910/// following forms interleaved:
1911/// <0, 2, 4, ...>
1912/// <n, n+2, n+4, ...>
1913/// where n is the number of elements in the vector.
1914/// For example:
1915/// <0, 0, 2, 2, 4, 4, ...>
1916/// <0, n, 2, n+2, 4, n+4, ...>
1917///
1918/// When undef's appear in the mask they are treated as if they were whatever
1919/// value is necessary in order to fit the above forms.
1921 MVT VT, SDValue V1, SDValue V2,
1922 SelectionDAG &DAG) {
1923
1924 const auto &Begin = Mask.begin();
1925 const auto &End = Mask.end();
1926 SDValue OriV1 = V1, OriV2 = V2;
1927
1928 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1929 V1 = OriV1;
1930 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1931 V1 = OriV2;
1932 else
1933 return SDValue();
1934
1935 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1936 V2 = OriV1;
1937 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1938 V2 = OriV2;
1939 else
1940 return SDValue();
1941
1942 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1943}
1944
1945/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1946///
1947/// VPACKOD interleaves the odd elements from each vector.
1948///
1949/// It is possible to lower into VPACKOD when the mask consists of two of the
1950/// following forms interleaved:
1951/// <1, 3, 5, ...>
1952/// <n+1, n+3, n+5, ...>
1953/// where n is the number of elements in the vector.
1954/// For example:
1955/// <1, 1, 3, 3, 5, 5, ...>
1956/// <1, n+1, 3, n+3, 5, n+5, ...>
1957///
1958/// When undef's appear in the mask they are treated as if they were whatever
1959/// value is necessary in order to fit the above forms.
1961 MVT VT, SDValue V1, SDValue V2,
1962 SelectionDAG &DAG) {
1963
1964 const auto &Begin = Mask.begin();
1965 const auto &End = Mask.end();
1966 SDValue OriV1 = V1, OriV2 = V2;
1967
1968 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1969 V1 = OriV1;
1970 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1971 V1 = OriV2;
1972 else
1973 return SDValue();
1974
1975 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1976 V2 = OriV1;
1977 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1978 V2 = OriV2;
1979 else
1980 return SDValue();
1981
1982 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1983}
1984
1985/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1986///
1987/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1988/// of each vector.
1989///
1990/// It is possible to lower into VILVH when the mask consists of two of the
1991/// following forms interleaved:
1992/// <x, x+1, x+2, ...>
1993/// <n+x, n+x+1, n+x+2, ...>
1994/// where n is the number of elements in the vector and x is half n.
1995/// For example:
1996/// <x, x, x+1, x+1, x+2, x+2, ...>
1997/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1998///
1999/// When undef's appear in the mask they are treated as if they were whatever
2000/// value is necessary in order to fit the above forms.
2002 MVT VT, SDValue V1, SDValue V2,
2003 SelectionDAG &DAG) {
2004
2005 const auto &Begin = Mask.begin();
2006 const auto &End = Mask.end();
2007 unsigned HalfSize = Mask.size() / 2;
2008 SDValue OriV1 = V1, OriV2 = V2;
2009
2010 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2011 V1 = OriV1;
2012 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2013 V1 = OriV2;
2014 else
2015 return SDValue();
2016
2017 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2018 V2 = OriV1;
2019 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2020 1))
2021 V2 = OriV2;
2022 else
2023 return SDValue();
2024
2025 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2026}
2027
2028/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2029///
2030/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2031/// of each vector.
2032///
2033/// It is possible to lower into VILVL when the mask consists of two of the
2034/// following forms interleaved:
2035/// <0, 1, 2, ...>
2036/// <n, n+1, n+2, ...>
2037/// where n is the number of elements in the vector.
2038/// For example:
2039/// <0, 0, 1, 1, 2, 2, ...>
2040/// <0, n, 1, n+1, 2, n+2, ...>
2041///
2042/// When undef's appear in the mask they are treated as if they were whatever
2043/// value is necessary in order to fit the above forms.
2045 MVT VT, SDValue V1, SDValue V2,
2046 SelectionDAG &DAG) {
2047
2048 const auto &Begin = Mask.begin();
2049 const auto &End = Mask.end();
2050 SDValue OriV1 = V1, OriV2 = V2;
2051
2052 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2053 V1 = OriV1;
2054 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2055 V1 = OriV2;
2056 else
2057 return SDValue();
2058
2059 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2060 V2 = OriV1;
2061 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2062 V2 = OriV2;
2063 else
2064 return SDValue();
2065
2066 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2067}
2068
2069/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2070///
2071/// VPICKEV copies the even elements of each vector into the result vector.
2072///
2073/// It is possible to lower into VPICKEV when the mask consists of two of the
2074/// following forms concatenated:
2075/// <0, 2, 4, ...>
2076/// <n, n+2, n+4, ...>
2077/// where n is the number of elements in the vector.
2078/// For example:
2079/// <0, 2, 4, ..., 0, 2, 4, ...>
2080/// <0, 2, 4, ..., n, n+2, n+4, ...>
2081///
2082/// When undef's appear in the mask they are treated as if they were whatever
2083/// value is necessary in order to fit the above forms.
2085 MVT VT, SDValue V1, SDValue V2,
2086 SelectionDAG &DAG) {
2087
2088 const auto &Begin = Mask.begin();
2089 const auto &Mid = Mask.begin() + Mask.size() / 2;
2090 const auto &End = Mask.end();
2091 SDValue OriV1 = V1, OriV2 = V2;
2092
2093 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2094 V1 = OriV1;
2095 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2096 V1 = OriV2;
2097 else
2098 return SDValue();
2099
2100 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2101 V2 = OriV1;
2102 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2103 V2 = OriV2;
2104
2105 else
2106 return SDValue();
2107
2108 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2109}
2110
2111/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2112///
2113/// VPICKOD copies the odd elements of each vector into the result vector.
2114///
2115/// It is possible to lower into VPICKOD when the mask consists of two of the
2116/// following forms concatenated:
2117/// <1, 3, 5, ...>
2118/// <n+1, n+3, n+5, ...>
2119/// where n is the number of elements in the vector.
2120/// For example:
2121/// <1, 3, 5, ..., 1, 3, 5, ...>
2122/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2123///
2124/// When undef's appear in the mask they are treated as if they were whatever
2125/// value is necessary in order to fit the above forms.
2127 MVT VT, SDValue V1, SDValue V2,
2128 SelectionDAG &DAG) {
2129
2130 const auto &Begin = Mask.begin();
2131 const auto &Mid = Mask.begin() + Mask.size() / 2;
2132 const auto &End = Mask.end();
2133 SDValue OriV1 = V1, OriV2 = V2;
2134
2135 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2136 V1 = OriV1;
2137 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2138 V1 = OriV2;
2139 else
2140 return SDValue();
2141
2142 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2143 V2 = OriV1;
2144 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2145 V2 = OriV2;
2146 else
2147 return SDValue();
2148
2149 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2150}
2151
2152/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2153///
2154/// VEXTRINS copies one element of a vector into any place of the result
2155/// vector and makes no change to the rest elements of the result vector.
2156///
2157/// It is possible to lower into VEXTRINS when the mask takes the form:
2158/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2159/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2160/// where n is the number of elements in the vector and i is in [0, n).
2161/// For example:
2162/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2163/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2164///
2165/// When undef's appear in the mask they are treated as if they were whatever
2166/// value is necessary in order to fit the above forms.
2167static SDValue
2169 SDValue V1, SDValue V2, SelectionDAG &DAG,
2170 const LoongArchSubtarget &Subtarget) {
2171 unsigned NumElts = VT.getVectorNumElements();
2172 MVT EltVT = VT.getVectorElementType();
2173 MVT GRLenVT = Subtarget.getGRLenVT();
2174
2175 if (Mask.size() != NumElts)
2176 return SDValue();
2177
2178 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2179 int DiffCount = 0;
2180 int DiffPos = -1;
2181 for (unsigned i = 0; i < NumElts; ++i) {
2182 if (Mask[i] == -1)
2183 continue;
2184 if (Mask[i] != int(Base + i)) {
2185 ++DiffCount;
2186 DiffPos = int(i);
2187 if (DiffCount > 1)
2188 return SDValue();
2189 }
2190 }
2191
2192 // Need exactly one differing element to lower into VEXTRINS.
2193 if (DiffCount != 1)
2194 return SDValue();
2195
2196 // DiffMask must be in [0, 2N).
2197 int DiffMask = Mask[DiffPos];
2198 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2199 return SDValue();
2200
2201 // Determine source vector and source index.
2202 SDValue SrcVec;
2203 unsigned SrcIdx;
2204 if (unsigned(DiffMask) < NumElts) {
2205 SrcVec = V1;
2206 SrcIdx = unsigned(DiffMask);
2207 } else {
2208 SrcVec = V2;
2209 SrcIdx = unsigned(DiffMask) - NumElts;
2210 }
2211
2212 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2213 // patterns of VEXTRINS in tablegen.
2214 SDValue Extracted = DAG.getNode(
2215 ISD::EXTRACT_VECTOR_ELT, DL, EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2216 SrcVec, DAG.getConstant(SrcIdx, DL, GRLenVT));
2217 SDValue Result =
2218 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, (Base == 0) ? V1 : V2,
2219 Extracted, DAG.getConstant(DiffPos, DL, GRLenVT));
2220
2221 return Result;
2222 };
2223
2224 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2225 if (SDValue Result = tryLowerToExtrAndIns(0))
2226 return Result;
2227 return tryLowerToExtrAndIns(NumElts);
2228}
2229
2230/// Lower VECTOR_SHUFFLE into VSHUF.
2231///
2232/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2233/// adding it as an operand to the resulting VSHUF.
2235 MVT VT, SDValue V1, SDValue V2,
2236 SelectionDAG &DAG,
2237 const LoongArchSubtarget &Subtarget) {
2238
2240 for (auto M : Mask)
2241 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2242
2243 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2244 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2245
2246 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2247 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2248 // VSHF concatenates the vectors in a bitwise fashion:
2249 // <0b00, 0b01> + <0b10, 0b11> ->
2250 // 0b0100 + 0b1110 -> 0b01001110
2251 // <0b10, 0b11, 0b00, 0b01>
2252 // We must therefore swap the operands to get the correct result.
2253 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2254}
2255
2256/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2257///
2258/// This routine breaks down the specific type of 128-bit shuffle and
2259/// dispatches to the lowering routines accordingly.
2261 SDValue V1, SDValue V2, SelectionDAG &DAG,
2262 const LoongArchSubtarget &Subtarget) {
2263 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2264 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2265 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2266 "Vector type is unsupported for lsx!");
2268 "Two operands have different types!");
2269 assert(VT.getVectorNumElements() == Mask.size() &&
2270 "Unexpected mask size for shuffle!");
2271 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2272
2273 APInt KnownUndef, KnownZero;
2274 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2275 APInt Zeroable = KnownUndef | KnownZero;
2276
2277 SDValue Result;
2278 // TODO: Add more comparison patterns.
2279 if (V2.isUndef()) {
2280 if ((Result =
2281 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2282 return Result;
2283 if ((Result =
2284 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2285 return Result;
2286 if ((Result =
2287 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2288 return Result;
2289
2290 // TODO: This comment may be enabled in the future to better match the
2291 // pattern for instruction selection.
2292 /* V2 = V1; */
2293 }
2294
2295 // It is recommended not to change the pattern comparison order for better
2296 // performance.
2297 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2298 return Result;
2299 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2300 return Result;
2301 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2302 return Result;
2303 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2304 return Result;
2305 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2306 return Result;
2307 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2308 return Result;
2309 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2310 (Result =
2311 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2312 return Result;
2313 if ((Result =
2314 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2315 return Result;
2316 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2317 Zeroable)))
2318 return Result;
2319 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2320 Zeroable)))
2321 return Result;
2322 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2323 Subtarget)))
2324 return Result;
2325 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2326 return NewShuffle;
2327 if ((Result =
2328 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2329 return Result;
2330 return SDValue();
2331}
2332
2333/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2334///
2335/// It is a XVREPLVEI when the mask is:
2336/// <x, x, x, ..., x+n, x+n, x+n, ...>
2337/// where the number of x is equal to n and n is half the length of vector.
2338///
2339/// When undef's appear in the mask they are treated as if they were whatever
2340/// value is necessary in order to fit the above form.
2341static SDValue
2343 SDValue V1, SelectionDAG &DAG,
2344 const LoongArchSubtarget &Subtarget) {
2345 int SplatIndex = -1;
2346 for (const auto &M : Mask) {
2347 if (M != -1) {
2348 SplatIndex = M;
2349 break;
2350 }
2351 }
2352
2353 if (SplatIndex == -1)
2354 return DAG.getUNDEF(VT);
2355
2356 const auto &Begin = Mask.begin();
2357 const auto &End = Mask.end();
2358 int HalfSize = Mask.size() / 2;
2359
2360 if (SplatIndex >= HalfSize)
2361 return SDValue();
2362
2363 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2364 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2365 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2366 0)) {
2367 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2368 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2369 }
2370
2371 return SDValue();
2372}
2373
2374/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2375static SDValue
2377 SDValue V1, SDValue V2, SelectionDAG &DAG,
2378 const LoongArchSubtarget &Subtarget) {
2379 // When the size is less than or equal to 4, lower cost instructions may be
2380 // used.
2381 if (Mask.size() <= 4)
2382 return SDValue();
2383 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2384}
2385
2386/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2387static SDValue
2389 SDValue V1, SelectionDAG &DAG,
2390 const LoongArchSubtarget &Subtarget) {
2391 // Only consider XVPERMI_D.
2392 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2393 return SDValue();
2394
2395 unsigned MaskImm = 0;
2396 for (unsigned i = 0; i < Mask.size(); ++i) {
2397 if (Mask[i] == -1)
2398 continue;
2399 MaskImm |= Mask[i] << (i * 2);
2400 }
2401
2402 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2403 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2404}
2405
2406/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2408 MVT VT, SDValue V1, SelectionDAG &DAG,
2409 const LoongArchSubtarget &Subtarget) {
2410 // LoongArch LASX only have XVPERM_W.
2411 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2412 return SDValue();
2413
2414 unsigned NumElts = VT.getVectorNumElements();
2415 unsigned HalfSize = NumElts / 2;
2416 bool FrontLo = true, FrontHi = true;
2417 bool BackLo = true, BackHi = true;
2418
2419 auto inRange = [](int val, int low, int high) {
2420 return (val == -1) || (val >= low && val < high);
2421 };
2422
2423 for (unsigned i = 0; i < HalfSize; ++i) {
2424 int Fronti = Mask[i];
2425 int Backi = Mask[i + HalfSize];
2426
2427 FrontLo &= inRange(Fronti, 0, HalfSize);
2428 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2429 BackLo &= inRange(Backi, 0, HalfSize);
2430 BackHi &= inRange(Backi, HalfSize, NumElts);
2431 }
2432
2433 // If both the lower and upper 128-bit parts access only one half of the
2434 // vector (either lower or upper), avoid using xvperm.w. The latency of
2435 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2436 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2437 return SDValue();
2438
2440 MVT GRLenVT = Subtarget.getGRLenVT();
2441 for (unsigned i = 0; i < NumElts; ++i)
2442 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2443 : DAG.getConstant(Mask[i], DL, GRLenVT));
2444 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2445
2446 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2447}
2448
2449/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2451 MVT VT, SDValue V1, SDValue V2,
2452 SelectionDAG &DAG) {
2453 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2454}
2455
2456/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2458 MVT VT, SDValue V1, SDValue V2,
2459 SelectionDAG &DAG) {
2460 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2461}
2462
2463/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2465 MVT VT, SDValue V1, SDValue V2,
2466 SelectionDAG &DAG) {
2467
2468 const auto &Begin = Mask.begin();
2469 const auto &End = Mask.end();
2470 unsigned HalfSize = Mask.size() / 2;
2471 unsigned LeftSize = HalfSize / 2;
2472 SDValue OriV1 = V1, OriV2 = V2;
2473
2474 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2475 1) &&
2476 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2477 V1 = OriV1;
2478 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2479 Mask.size() + HalfSize - LeftSize, 1) &&
2480 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2481 Mask.size() + HalfSize + LeftSize, 1))
2482 V1 = OriV2;
2483 else
2484 return SDValue();
2485
2486 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2487 1) &&
2488 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2489 1))
2490 V2 = OriV1;
2491 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2492 Mask.size() + HalfSize - LeftSize, 1) &&
2493 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2494 Mask.size() + HalfSize + LeftSize, 1))
2495 V2 = OriV2;
2496 else
2497 return SDValue();
2498
2499 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2500}
2501
2502/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2504 MVT VT, SDValue V1, SDValue V2,
2505 SelectionDAG &DAG) {
2506
2507 const auto &Begin = Mask.begin();
2508 const auto &End = Mask.end();
2509 unsigned HalfSize = Mask.size() / 2;
2510 SDValue OriV1 = V1, OriV2 = V2;
2511
2512 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2513 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2514 V1 = OriV1;
2515 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2516 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2517 Mask.size() + HalfSize, 1))
2518 V1 = OriV2;
2519 else
2520 return SDValue();
2521
2522 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2523 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2524 V2 = OriV1;
2525 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2526 1) &&
2527 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2528 Mask.size() + HalfSize, 1))
2529 V2 = OriV2;
2530 else
2531 return SDValue();
2532
2533 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2534}
2535
2536/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2538 MVT VT, SDValue V1, SDValue V2,
2539 SelectionDAG &DAG) {
2540
2541 const auto &Begin = Mask.begin();
2542 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2543 const auto &Mid = Mask.begin() + Mask.size() / 2;
2544 const auto &RightMid = Mask.end() - Mask.size() / 4;
2545 const auto &End = Mask.end();
2546 unsigned HalfSize = Mask.size() / 2;
2547 SDValue OriV1 = V1, OriV2 = V2;
2548
2549 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2550 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2551 V1 = OriV1;
2552 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2553 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2554 V1 = OriV2;
2555 else
2556 return SDValue();
2557
2558 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2559 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2560 V2 = OriV1;
2561 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2562 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2563 V2 = OriV2;
2564
2565 else
2566 return SDValue();
2567
2568 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2569}
2570
2571/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2573 MVT VT, SDValue V1, SDValue V2,
2574 SelectionDAG &DAG) {
2575
2576 const auto &Begin = Mask.begin();
2577 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2578 const auto &Mid = Mask.begin() + Mask.size() / 2;
2579 const auto &RightMid = Mask.end() - Mask.size() / 4;
2580 const auto &End = Mask.end();
2581 unsigned HalfSize = Mask.size() / 2;
2582 SDValue OriV1 = V1, OriV2 = V2;
2583
2584 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2585 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2586 V1 = OriV1;
2587 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2588 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2589 2))
2590 V1 = OriV2;
2591 else
2592 return SDValue();
2593
2594 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2595 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2596 V2 = OriV1;
2597 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2598 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2599 2))
2600 V2 = OriV2;
2601 else
2602 return SDValue();
2603
2604 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2605}
2606
2607/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2608static SDValue
2610 SDValue V1, SDValue V2, SelectionDAG &DAG,
2611 const LoongArchSubtarget &Subtarget) {
2612 // LoongArch LASX only supports xvinsve0.{w/d}.
2613 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2614 VT != MVT::v4f64)
2615 return SDValue();
2616
2617 MVT GRLenVT = Subtarget.getGRLenVT();
2618 int MaskSize = Mask.size();
2619 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2620
2621 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2622 // all other elements are either 'Base + i' or undef (-1). On success, return
2623 // the index of the replaced element. Otherwise, just return -1.
2624 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2625 int Idx = -1;
2626 for (int i = 0; i < MaskSize; ++i) {
2627 if (Mask[i] == Base + i || Mask[i] == -1)
2628 continue;
2629 if (Mask[i] != Replaced)
2630 return -1;
2631 if (Idx == -1)
2632 Idx = i;
2633 else
2634 return -1;
2635 }
2636 return Idx;
2637 };
2638
2639 // Case 1: the lowest element of V2 replaces one element in V1.
2640 int Idx = checkReplaceOne(0, MaskSize);
2641 if (Idx != -1)
2642 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2643 DAG.getConstant(Idx, DL, GRLenVT));
2644
2645 // Case 2: the lowest element of V1 replaces one element in V2.
2646 Idx = checkReplaceOne(MaskSize, 0);
2647 if (Idx != -1)
2648 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2649 DAG.getConstant(Idx, DL, GRLenVT));
2650
2651 return SDValue();
2652}
2653
2654/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2656 MVT VT, SDValue V1, SDValue V2,
2657 SelectionDAG &DAG) {
2658
2659 int MaskSize = Mask.size();
2660 int HalfSize = Mask.size() / 2;
2661 const auto &Begin = Mask.begin();
2662 const auto &Mid = Mask.begin() + HalfSize;
2663 const auto &End = Mask.end();
2664
2665 // VECTOR_SHUFFLE concatenates the vectors:
2666 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2667 // shuffling ->
2668 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2669 //
2670 // XVSHUF concatenates the vectors:
2671 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2672 // shuffling ->
2673 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2674 SmallVector<SDValue, 8> MaskAlloc;
2675 for (auto it = Begin; it < Mid; it++) {
2676 if (*it < 0) // UNDEF
2677 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2678 else if ((*it >= 0 && *it < HalfSize) ||
2679 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2680 int M = *it < HalfSize ? *it : *it - HalfSize;
2681 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2682 } else
2683 return SDValue();
2684 }
2685 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2686
2687 for (auto it = Mid; it < End; it++) {
2688 if (*it < 0) // UNDEF
2689 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2690 else if ((*it >= HalfSize && *it < MaskSize) ||
2691 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2692 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2693 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2694 } else
2695 return SDValue();
2696 }
2697 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2698
2699 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2700 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2701 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2702}
2703
2704/// Shuffle vectors by lane to generate more optimized instructions.
2705/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2706///
2707/// Therefore, except for the following four cases, other cases are regarded
2708/// as cross-lane shuffles, where optimization is relatively limited.
2709///
2710/// - Shuffle high, low lanes of two inputs vector
2711/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2712/// - Shuffle low, high lanes of two inputs vector
2713/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2714/// - Shuffle low, low lanes of two inputs vector
2715/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2716/// - Shuffle high, high lanes of two inputs vector
2717/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2718///
2719/// The first case is the closest to LoongArch instructions and the other
2720/// cases need to be converted to it for processing.
2721///
2722/// This function will return true for the last three cases above and will
2723/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2724/// cross-lane shuffle cases.
2726 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2727 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2728
2729 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2730
2731 int MaskSize = Mask.size();
2732 int HalfSize = Mask.size() / 2;
2733 MVT GRLenVT = Subtarget.getGRLenVT();
2734
2735 HalfMaskType preMask = None, postMask = None;
2736
2737 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2738 return M < 0 || (M >= 0 && M < HalfSize) ||
2739 (M >= MaskSize && M < MaskSize + HalfSize);
2740 }))
2741 preMask = HighLaneTy;
2742 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2743 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2744 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2745 }))
2746 preMask = LowLaneTy;
2747
2748 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2749 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2750 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2751 }))
2752 postMask = LowLaneTy;
2753 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2754 return M < 0 || (M >= 0 && M < HalfSize) ||
2755 (M >= MaskSize && M < MaskSize + HalfSize);
2756 }))
2757 postMask = HighLaneTy;
2758
2759 // The pre-half of mask is high lane type, and the post-half of mask
2760 // is low lane type, which is closest to the LoongArch instructions.
2761 //
2762 // Note: In the LoongArch architecture, the high lane of mask corresponds
2763 // to the lower 128-bit of vector register, and the low lane of mask
2764 // corresponds the higher 128-bit of vector register.
2765 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2766 return false;
2767 }
2768 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2769 V1 = DAG.getBitcast(MVT::v4i64, V1);
2770 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2771 DAG.getConstant(0b01001110, DL, GRLenVT));
2772 V1 = DAG.getBitcast(VT, V1);
2773
2774 if (!V2.isUndef()) {
2775 V2 = DAG.getBitcast(MVT::v4i64, V2);
2776 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2777 DAG.getConstant(0b01001110, DL, GRLenVT));
2778 V2 = DAG.getBitcast(VT, V2);
2779 }
2780
2781 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2782 *it = *it < 0 ? *it : *it - HalfSize;
2783 }
2784 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2785 *it = *it < 0 ? *it : *it + HalfSize;
2786 }
2787 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2788 V1 = DAG.getBitcast(MVT::v4i64, V1);
2789 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2790 DAG.getConstant(0b11101110, DL, GRLenVT));
2791 V1 = DAG.getBitcast(VT, V1);
2792
2793 if (!V2.isUndef()) {
2794 V2 = DAG.getBitcast(MVT::v4i64, V2);
2795 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2796 DAG.getConstant(0b11101110, DL, GRLenVT));
2797 V2 = DAG.getBitcast(VT, V2);
2798 }
2799
2800 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2801 *it = *it < 0 ? *it : *it - HalfSize;
2802 }
2803 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2804 V1 = DAG.getBitcast(MVT::v4i64, V1);
2805 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2806 DAG.getConstant(0b01000100, DL, GRLenVT));
2807 V1 = DAG.getBitcast(VT, V1);
2808
2809 if (!V2.isUndef()) {
2810 V2 = DAG.getBitcast(MVT::v4i64, V2);
2811 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2812 DAG.getConstant(0b01000100, DL, GRLenVT));
2813 V2 = DAG.getBitcast(VT, V2);
2814 }
2815
2816 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2817 *it = *it < 0 ? *it : *it + HalfSize;
2818 }
2819 } else { // cross-lane
2820 return false;
2821 }
2822
2823 return true;
2824}
2825
2826/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2827/// Only for 256-bit vector.
2828///
2829/// For example:
2830/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2831/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2832/// is lowerded to:
2833/// (XVPERMI $xr2, $xr0, 78)
2834/// (XVSHUF $xr1, $xr2, $xr0)
2835/// (XVORI $xr0, $xr1, 0)
2837 ArrayRef<int> Mask,
2838 MVT VT, SDValue V1,
2839 SDValue V2,
2840 SelectionDAG &DAG) {
2841 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2842 int Size = Mask.size();
2843 int LaneSize = Size / 2;
2844
2845 bool LaneCrossing[2] = {false, false};
2846 for (int i = 0; i < Size; ++i)
2847 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2848 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2849
2850 // Ensure that all lanes ared involved.
2851 if (!LaneCrossing[0] && !LaneCrossing[1])
2852 return SDValue();
2853
2854 SmallVector<int> InLaneMask;
2855 InLaneMask.assign(Mask.begin(), Mask.end());
2856 for (int i = 0; i < Size; ++i) {
2857 int &M = InLaneMask[i];
2858 if (M < 0)
2859 continue;
2860 if (((M % Size) / LaneSize) != (i / LaneSize))
2861 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2862 }
2863
2864 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2865 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2866 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2867 Flipped = DAG.getBitcast(VT, Flipped);
2868 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2869}
2870
2871/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2872///
2873/// This routine breaks down the specific type of 256-bit shuffle and
2874/// dispatches to the lowering routines accordingly.
2876 SDValue V1, SDValue V2, SelectionDAG &DAG,
2877 const LoongArchSubtarget &Subtarget) {
2878 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2879 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2880 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2881 "Vector type is unsupported for lasx!");
2883 "Two operands have different types!");
2884 assert(VT.getVectorNumElements() == Mask.size() &&
2885 "Unexpected mask size for shuffle!");
2886 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2887 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2888
2889 APInt KnownUndef, KnownZero;
2890 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2891 APInt Zeroable = KnownUndef | KnownZero;
2892
2893 SDValue Result;
2894 // TODO: Add more comparison patterns.
2895 if (V2.isUndef()) {
2896 if ((Result =
2897 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2898 return Result;
2899 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2900 Subtarget)))
2901 return Result;
2902 // Try to widen vectors to gain more optimization opportunities.
2903 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2904 return NewShuffle;
2905 if ((Result =
2906 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2907 return Result;
2908 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2909 return Result;
2910 if ((Result =
2911 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2912 return Result;
2913
2914 // TODO: This comment may be enabled in the future to better match the
2915 // pattern for instruction selection.
2916 /* V2 = V1; */
2917 }
2918
2919 // It is recommended not to change the pattern comparison order for better
2920 // performance.
2921 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2922 return Result;
2923 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2924 return Result;
2925 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2926 return Result;
2927 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2928 return Result;
2929 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2930 return Result;
2931 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2932 return Result;
2933 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2934 Zeroable)))
2935 return Result;
2936 if ((Result =
2937 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2938 return Result;
2939 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2940 Subtarget)))
2941 return Result;
2942
2943 // canonicalize non cross-lane shuffle vector
2944 SmallVector<int> NewMask(Mask);
2945 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2946 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2947
2948 // FIXME: Handling the remaining cases earlier can degrade performance
2949 // in some situations. Further analysis is required to enable more
2950 // effective optimizations.
2951 if (V2.isUndef()) {
2952 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2953 V1, V2, DAG)))
2954 return Result;
2955 }
2956
2957 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2958 return NewShuffle;
2959 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2960 return Result;
2961
2962 return SDValue();
2963}
2964
2965SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2966 SelectionDAG &DAG) const {
2967 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2968 ArrayRef<int> OrigMask = SVOp->getMask();
2969 SDValue V1 = Op.getOperand(0);
2970 SDValue V2 = Op.getOperand(1);
2971 MVT VT = Op.getSimpleValueType();
2972 int NumElements = VT.getVectorNumElements();
2973 SDLoc DL(Op);
2974
2975 bool V1IsUndef = V1.isUndef();
2976 bool V2IsUndef = V2.isUndef();
2977 if (V1IsUndef && V2IsUndef)
2978 return DAG.getUNDEF(VT);
2979
2980 // When we create a shuffle node we put the UNDEF node to second operand,
2981 // but in some cases the first operand may be transformed to UNDEF.
2982 // In this case we should just commute the node.
2983 if (V1IsUndef)
2984 return DAG.getCommutedVectorShuffle(*SVOp);
2985
2986 // Check for non-undef masks pointing at an undef vector and make the masks
2987 // undef as well. This makes it easier to match the shuffle based solely on
2988 // the mask.
2989 if (V2IsUndef &&
2990 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2991 SmallVector<int, 8> NewMask(OrigMask);
2992 for (int &M : NewMask)
2993 if (M >= NumElements)
2994 M = -1;
2995 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2996 }
2997
2998 // Check for illegal shuffle mask element index values.
2999 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3000 (void)MaskUpperLimit;
3001 assert(llvm::all_of(OrigMask,
3002 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3003 "Out of bounds shuffle index");
3004
3005 // For each vector width, delegate to a specialized lowering routine.
3006 if (VT.is128BitVector())
3007 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3008
3009 if (VT.is256BitVector())
3010 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3011
3012 return SDValue();
3013}
3014
3015SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3016 SelectionDAG &DAG) const {
3017 // Custom lower to ensure the libcall return is passed in an FPR on hard
3018 // float ABIs.
3019 SDLoc DL(Op);
3020 MakeLibCallOptions CallOptions;
3021 SDValue Op0 = Op.getOperand(0);
3022 SDValue Chain = SDValue();
3023 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
3024 SDValue Res;
3025 std::tie(Res, Chain) =
3026 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
3027 if (Subtarget.is64Bit())
3028 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3029 return DAG.getBitcast(MVT::i32, Res);
3030}
3031
3032SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3033 SelectionDAG &DAG) const {
3034 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3035 // float ABIs.
3036 SDLoc DL(Op);
3037 MakeLibCallOptions CallOptions;
3038 SDValue Op0 = Op.getOperand(0);
3039 SDValue Chain = SDValue();
3040 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3041 DL, MVT::f32, Op0)
3042 : DAG.getBitcast(MVT::f32, Op0);
3043 SDValue Res;
3044 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
3045 CallOptions, DL, Chain);
3046 return Res;
3047}
3048
3049SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3050 SelectionDAG &DAG) const {
3051 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3052 SDLoc DL(Op);
3053 MakeLibCallOptions CallOptions;
3054 RTLIB::Libcall LC =
3055 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
3056 SDValue Res =
3057 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
3058 if (Subtarget.is64Bit())
3059 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3060 return DAG.getBitcast(MVT::i32, Res);
3061}
3062
3063SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3064 SelectionDAG &DAG) const {
3065 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3066 MVT VT = Op.getSimpleValueType();
3067 SDLoc DL(Op);
3068 Op = DAG.getNode(
3069 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
3070 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3071 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3072 DL, MVT::f32, Op)
3073 : DAG.getBitcast(MVT::f32, Op);
3074 if (VT != MVT::f32)
3075 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3076 return Res;
3077}
3078
3079// Lower BUILD_VECTOR as broadcast load (if possible).
3080// For example:
3081// %a = load i8, ptr %ptr
3082// %b = build_vector %a, %a, %a, %a
3083// is lowered to :
3084// (VLDREPL_B $a0, 0)
3086 const SDLoc &DL,
3087 SelectionDAG &DAG) {
3088 MVT VT = BVOp->getSimpleValueType(0);
3089 int NumOps = BVOp->getNumOperands();
3090
3091 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3092 "Unsupported vector type for broadcast.");
3093
3094 SDValue IdentitySrc;
3095 bool IsIdeneity = true;
3096
3097 for (int i = 0; i != NumOps; i++) {
3098 SDValue Op = BVOp->getOperand(i);
3099 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3100 IsIdeneity = false;
3101 break;
3102 }
3103 IdentitySrc = BVOp->getOperand(0);
3104 }
3105
3106 // make sure that this load is valid and only has one user.
3107 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3108 return SDValue();
3109
3110 auto *LN = cast<LoadSDNode>(IdentitySrc);
3111 auto ExtType = LN->getExtensionType();
3112
3113 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3114 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3115 // Indexed loads and stores are not supported on LoongArch.
3116 assert(LN->isUnindexed() && "Unexpected indexed load.");
3117
3118 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3119 // The offset operand of unindexed load is always undefined, so there is
3120 // no need to pass it to VLDREPL.
3121 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3122 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3123 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3124 return BCast;
3125 }
3126 return SDValue();
3127}
3128
3129// Sequentially insert elements from Ops into Vector, from low to high indices.
3130// Note: Ops can have fewer elements than Vector.
3132 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3133 EVT ResTy) {
3134 assert(Ops.size() <= ResTy.getVectorNumElements());
3135
3136 SDValue Op0 = Ops[0];
3137 if (!Op0.isUndef())
3138 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3139 for (unsigned i = 1; i < Ops.size(); ++i) {
3140 SDValue Opi = Ops[i];
3141 if (Opi.isUndef())
3142 continue;
3143 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3144 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3145 }
3146}
3147
3148// Build a ResTy subvector from Node, taking NumElts elements starting at index
3149// 'first'.
3151 SelectionDAG &DAG, SDLoc DL,
3152 const LoongArchSubtarget &Subtarget,
3153 EVT ResTy, unsigned first) {
3154 unsigned NumElts = ResTy.getVectorNumElements();
3155
3156 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3157
3158 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3159 Node->op_begin() + first + NumElts);
3160 SDValue Vector = DAG.getUNDEF(ResTy);
3161 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3162 return Vector;
3163}
3164
3165SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3166 SelectionDAG &DAG) const {
3167 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3168 MVT VT = Node->getSimpleValueType(0);
3169 EVT ResTy = Op->getValueType(0);
3170 unsigned NumElts = ResTy.getVectorNumElements();
3171 SDLoc DL(Op);
3172 APInt SplatValue, SplatUndef;
3173 unsigned SplatBitSize;
3174 bool HasAnyUndefs;
3175 bool IsConstant = false;
3176 bool UseSameConstant = true;
3177 SDValue ConstantValue;
3178 bool Is128Vec = ResTy.is128BitVector();
3179 bool Is256Vec = ResTy.is256BitVector();
3180
3181 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3182 (!Subtarget.hasExtLASX() || !Is256Vec))
3183 return SDValue();
3184
3185 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3186 return Result;
3187
3188 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3189 /*MinSplatBits=*/8) &&
3190 SplatBitSize <= 64) {
3191 // We can only cope with 8, 16, 32, or 64-bit elements.
3192 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3193 SplatBitSize != 64)
3194 return SDValue();
3195
3196 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3197 // We can only handle 64-bit elements that are within
3198 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3199 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3200 if (!SplatValue.isSignedIntN(10) &&
3201 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3202 return SDValue();
3203 if ((Is128Vec && ResTy == MVT::v4i32) ||
3204 (Is256Vec && ResTy == MVT::v8i32))
3205 return Op;
3206 }
3207
3208 EVT ViaVecTy;
3209
3210 switch (SplatBitSize) {
3211 default:
3212 return SDValue();
3213 case 8:
3214 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3215 break;
3216 case 16:
3217 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3218 break;
3219 case 32:
3220 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3221 break;
3222 case 64:
3223 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3224 break;
3225 }
3226
3227 // SelectionDAG::getConstant will promote SplatValue appropriately.
3228 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3229
3230 // Bitcast to the type we originally wanted.
3231 if (ViaVecTy != ResTy)
3232 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3233
3234 return Result;
3235 }
3236
3237 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3238 return Op;
3239
3240 for (unsigned i = 0; i < NumElts; ++i) {
3241 SDValue Opi = Node->getOperand(i);
3242 if (isIntOrFPConstant(Opi)) {
3243 IsConstant = true;
3244 if (!ConstantValue.getNode())
3245 ConstantValue = Opi;
3246 else if (ConstantValue != Opi)
3247 UseSameConstant = false;
3248 }
3249 }
3250
3251 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3252 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3253 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3254 for (unsigned i = 0; i < NumElts; ++i) {
3255 SDValue Opi = Node->getOperand(i);
3256 if (!isIntOrFPConstant(Opi))
3257 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3258 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3259 }
3260 return Result;
3261 }
3262
3263 if (!IsConstant) {
3264 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3265 // the sub-sequence of the vector and then broadcast the sub-sequence.
3266 //
3267 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3268 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3269 // generates worse code in some cases. This could be further optimized
3270 // with more consideration.
3272 BitVector UndefElements;
3273 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3274 UndefElements.count() == 0) {
3275 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3276 // because the high part can be simply treated as undef.
3277 SDValue Vector = DAG.getUNDEF(ResTy);
3278 EVT FillTy = Is256Vec
3280 : ResTy;
3281 SDValue FillVec =
3282 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3283
3284 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3285
3286 unsigned SeqLen = Sequence.size();
3287 unsigned SplatLen = NumElts / SeqLen;
3288 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3289 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3290
3291 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3292 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3293 if (SplatEltTy == MVT::i128)
3294 SplatTy = MVT::v4i64;
3295
3296 SDValue SplatVec;
3297 SDValue SrcVec = DAG.getBitcast(
3298 SplatTy,
3299 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3300 if (Is256Vec) {
3301 SplatVec =
3302 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3303 : LoongArchISD::XVREPLVE0,
3304 DL, SplatTy, SrcVec);
3305 } else {
3306 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3307 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3308 }
3309
3310 return DAG.getBitcast(ResTy, SplatVec);
3311 }
3312
3313 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3314 // using memory operations is much lower.
3315 //
3316 // For 256-bit vectors, normally split into two halves and concatenate.
3317 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3318 // one non-undef element, skip spliting to avoid a worse result.
3319 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3320 ResTy == MVT::v4f64) {
3321 unsigned NonUndefCount = 0;
3322 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3323 if (!Node->getOperand(i).isUndef()) {
3324 ++NonUndefCount;
3325 if (NonUndefCount > 1)
3326 break;
3327 }
3328 }
3329 if (NonUndefCount == 1)
3330 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3331 }
3332
3333 EVT VecTy =
3334 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3335 SDValue Vector =
3336 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3337
3338 if (Is128Vec)
3339 return Vector;
3340
3341 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3342 VecTy, NumElts / 2);
3343
3344 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3345 }
3346
3347 return SDValue();
3348}
3349
3350SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3351 SelectionDAG &DAG) const {
3352 SDLoc DL(Op);
3353 MVT ResVT = Op.getSimpleValueType();
3354 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3355
3356 unsigned NumOperands = Op.getNumOperands();
3357 unsigned NumFreezeUndef = 0;
3358 unsigned NumZero = 0;
3359 unsigned NumNonZero = 0;
3360 unsigned NonZeros = 0;
3361 SmallSet<SDValue, 4> Undefs;
3362 for (unsigned i = 0; i != NumOperands; ++i) {
3363 SDValue SubVec = Op.getOperand(i);
3364 if (SubVec.isUndef())
3365 continue;
3366 if (ISD::isFreezeUndef(SubVec.getNode())) {
3367 // If the freeze(undef) has multiple uses then we must fold to zero.
3368 if (SubVec.hasOneUse()) {
3369 ++NumFreezeUndef;
3370 } else {
3371 ++NumZero;
3372 Undefs.insert(SubVec);
3373 }
3374 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3375 ++NumZero;
3376 else {
3377 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3378 NonZeros |= 1 << i;
3379 ++NumNonZero;
3380 }
3381 }
3382
3383 // If we have more than 2 non-zeros, build each half separately.
3384 if (NumNonZero > 2) {
3385 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3386 ArrayRef<SDUse> Ops = Op->ops();
3387 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3388 Ops.slice(0, NumOperands / 2));
3389 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3390 Ops.slice(NumOperands / 2));
3391 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3392 }
3393
3394 // Otherwise, build it up through insert_subvectors.
3395 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3396 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3397 : DAG.getUNDEF(ResVT));
3398
3399 // Replace Undef operands with ZeroVector.
3400 for (SDValue U : Undefs)
3401 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3402
3403 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3404 unsigned NumSubElems = SubVT.getVectorNumElements();
3405 for (unsigned i = 0; i != NumOperands; ++i) {
3406 if ((NonZeros & (1 << i)) == 0)
3407 continue;
3408
3409 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3410 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3411 }
3412
3413 return Vec;
3414}
3415
3416SDValue
3417LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3418 SelectionDAG &DAG) const {
3419 MVT EltVT = Op.getSimpleValueType();
3420 SDValue Vec = Op->getOperand(0);
3421 EVT VecTy = Vec->getValueType(0);
3422 SDValue Idx = Op->getOperand(1);
3423 SDLoc DL(Op);
3424 MVT GRLenVT = Subtarget.getGRLenVT();
3425
3426 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3427
3428 if (isa<ConstantSDNode>(Idx))
3429 return Op;
3430
3431 switch (VecTy.getSimpleVT().SimpleTy) {
3432 default:
3433 llvm_unreachable("Unexpected type");
3434 case MVT::v32i8:
3435 case MVT::v16i16:
3436 case MVT::v4i64:
3437 case MVT::v4f64: {
3438 // Extract the high half subvector and place it to the low half of a new
3439 // vector. It doesn't matter what the high half of the new vector is.
3440 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3441 SDValue VecHi =
3442 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3443 SDValue TmpVec =
3444 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3445 VecHi, DAG.getConstant(0, DL, GRLenVT));
3446
3447 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3448 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3449 // desired element.
3450 SDValue IdxCp =
3451 Subtarget.is64Bit()
3452 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3453 : DAG.getBitcast(MVT::f32, Idx);
3454 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3455 SDValue MaskVec =
3456 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3457 SDValue ResVec =
3458 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3459
3460 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3461 DAG.getConstant(0, DL, GRLenVT));
3462 }
3463 case MVT::v8i32:
3464 case MVT::v8f32: {
3465 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3466 SDValue SplatValue =
3467 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3468
3469 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3470 DAG.getConstant(0, DL, GRLenVT));
3471 }
3472 }
3473}
3474
3475SDValue
3476LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3477 SelectionDAG &DAG) const {
3478 MVT VT = Op.getSimpleValueType();
3479 MVT EltVT = VT.getVectorElementType();
3480 unsigned NumElts = VT.getVectorNumElements();
3481 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3482 SDLoc DL(Op);
3483 SDValue Op0 = Op.getOperand(0);
3484 SDValue Op1 = Op.getOperand(1);
3485 SDValue Op2 = Op.getOperand(2);
3486
3487 if (isa<ConstantSDNode>(Op2))
3488 return Op;
3489
3490 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3491 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3492
3493 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3494 return SDValue();
3495
3496 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3497 SmallVector<SDValue, 32> RawIndices;
3498 SDValue SplatIdx;
3499 SDValue Indices;
3500
3501 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3502 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3503 for (unsigned i = 0; i < NumElts; ++i) {
3504 RawIndices.push_back(Op2);
3505 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3506 }
3507 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3508 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3509
3510 RawIndices.clear();
3511 for (unsigned i = 0; i < NumElts; ++i) {
3512 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3513 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3514 }
3515 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3516 Indices = DAG.getBitcast(IdxVTy, Indices);
3517 } else {
3518 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3519
3520 for (unsigned i = 0; i < NumElts; ++i)
3521 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3522 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3523 }
3524
3525 // insert vec, elt, idx
3526 // =>
3527 // select (splatidx == {0,1,2...}) ? splatelt : vec
3528 SDValue SelectCC =
3529 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3530 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3531}
3532
3533SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3534 SelectionDAG &DAG) const {
3535 SDLoc DL(Op);
3536 SyncScope::ID FenceSSID =
3537 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3538
3539 // singlethread fences only synchronize with signal handlers on the same
3540 // thread and thus only need to preserve instruction order, not actually
3541 // enforce memory ordering.
3542 if (FenceSSID == SyncScope::SingleThread)
3543 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3544 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3545
3546 return Op;
3547}
3548
3549SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3550 SelectionDAG &DAG) const {
3551
3552 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3553 DAG.getContext()->emitError(
3554 "On LA64, only 64-bit registers can be written.");
3555 return Op.getOperand(0);
3556 }
3557
3558 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3559 DAG.getContext()->emitError(
3560 "On LA32, only 32-bit registers can be written.");
3561 return Op.getOperand(0);
3562 }
3563
3564 return Op;
3565}
3566
3567SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3568 SelectionDAG &DAG) const {
3569 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3570 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3571 "be a constant integer");
3572 return SDValue();
3573 }
3574
3575 MachineFunction &MF = DAG.getMachineFunction();
3577 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3578 EVT VT = Op.getValueType();
3579 SDLoc DL(Op);
3580 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3581 unsigned Depth = Op.getConstantOperandVal(0);
3582 int GRLenInBytes = Subtarget.getGRLen() / 8;
3583
3584 while (Depth--) {
3585 int Offset = -(GRLenInBytes * 2);
3586 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3587 DAG.getSignedConstant(Offset, DL, VT));
3588 FrameAddr =
3589 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3590 }
3591 return FrameAddr;
3592}
3593
3594SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3595 SelectionDAG &DAG) const {
3596 // Currently only support lowering return address for current frame.
3597 if (Op.getConstantOperandVal(0) != 0) {
3598 DAG.getContext()->emitError(
3599 "return address can only be determined for the current frame");
3600 return SDValue();
3601 }
3602
3603 MachineFunction &MF = DAG.getMachineFunction();
3605 MVT GRLenVT = Subtarget.getGRLenVT();
3606
3607 // Return the value of the return address register, marking it an implicit
3608 // live-in.
3609 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3610 getRegClassFor(GRLenVT));
3611 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3612}
3613
3614SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3615 SelectionDAG &DAG) const {
3616 MachineFunction &MF = DAG.getMachineFunction();
3617 auto Size = Subtarget.getGRLen() / 8;
3618 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3619 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3620}
3621
3622SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3623 SelectionDAG &DAG) const {
3624 MachineFunction &MF = DAG.getMachineFunction();
3625 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3626
3627 SDLoc DL(Op);
3628 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3630
3631 // vastart just stores the address of the VarArgsFrameIndex slot into the
3632 // memory location argument.
3633 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3634 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3635 MachinePointerInfo(SV));
3636}
3637
3638SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3639 SelectionDAG &DAG) const {
3640 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3641 !Subtarget.hasBasicD() && "unexpected target features");
3642
3643 SDLoc DL(Op);
3644 SDValue Op0 = Op.getOperand(0);
3645 if (Op0->getOpcode() == ISD::AND) {
3646 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3647 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3648 return Op;
3649 }
3650
3651 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3652 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3653 Op0.getConstantOperandVal(2) == UINT64_C(0))
3654 return Op;
3655
3656 if (Op0.getOpcode() == ISD::AssertZext &&
3657 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3658 return Op;
3659
3660 EVT OpVT = Op0.getValueType();
3661 EVT RetVT = Op.getValueType();
3662 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3663 MakeLibCallOptions CallOptions;
3664 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3665 SDValue Chain = SDValue();
3667 std::tie(Result, Chain) =
3668 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3669 return Result;
3670}
3671
3672SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3673 SelectionDAG &DAG) const {
3674 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3675 !Subtarget.hasBasicD() && "unexpected target features");
3676
3677 SDLoc DL(Op);
3678 SDValue Op0 = Op.getOperand(0);
3679
3680 if ((Op0.getOpcode() == ISD::AssertSext ||
3682 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3683 return Op;
3684
3685 EVT OpVT = Op0.getValueType();
3686 EVT RetVT = Op.getValueType();
3687 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3688 MakeLibCallOptions CallOptions;
3689 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3690 SDValue Chain = SDValue();
3692 std::tie(Result, Chain) =
3693 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3694 return Result;
3695}
3696
3697SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3698 SelectionDAG &DAG) const {
3699
3700 SDLoc DL(Op);
3701 EVT VT = Op.getValueType();
3702 SDValue Op0 = Op.getOperand(0);
3703 EVT Op0VT = Op0.getValueType();
3704
3705 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3706 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3707 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3708 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3709 }
3710 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3711 SDValue Lo, Hi;
3712 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3713 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3714 }
3715 return Op;
3716}
3717
3718SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3719 SelectionDAG &DAG) const {
3720
3721 SDLoc DL(Op);
3722 SDValue Op0 = Op.getOperand(0);
3723
3724 if (Op0.getValueType() == MVT::f16)
3725 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3726
3727 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3728 !Subtarget.hasBasicD()) {
3729 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3730 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3731 }
3732
3733 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3734 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3735 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3736}
3737
3739 SelectionDAG &DAG, unsigned Flags) {
3740 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3741}
3742
3744 SelectionDAG &DAG, unsigned Flags) {
3745 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3746 Flags);
3747}
3748
3750 SelectionDAG &DAG, unsigned Flags) {
3751 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3752 N->getOffset(), Flags);
3753}
3754
3756 SelectionDAG &DAG, unsigned Flags) {
3757 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3758}
3759
3760template <class NodeTy>
3761SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3763 bool IsLocal) const {
3764 SDLoc DL(N);
3765 EVT Ty = getPointerTy(DAG.getDataLayout());
3766 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3767 SDValue Load;
3768
3769 switch (M) {
3770 default:
3771 report_fatal_error("Unsupported code model");
3772
3773 case CodeModel::Large: {
3774 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3775
3776 // This is not actually used, but is necessary for successfully matching
3777 // the PseudoLA_*_LARGE nodes.
3778 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3779 if (IsLocal) {
3780 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3781 // eventually becomes the desired 5-insn code sequence.
3782 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3783 Tmp, Addr),
3784 0);
3785 } else {
3786 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3787 // eventually becomes the desired 5-insn code sequence.
3788 Load = SDValue(
3789 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3790 0);
3791 }
3792 break;
3793 }
3794
3795 case CodeModel::Small:
3796 case CodeModel::Medium:
3797 if (IsLocal) {
3798 // This generates the pattern (PseudoLA_PCREL sym), which
3799 //
3800 // for la32r expands to:
3801 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3802 //
3803 // for la32s and la64 expands to:
3804 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3805 Load = SDValue(
3806 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3807 } else {
3808 // This generates the pattern (PseudoLA_GOT sym), which
3809 //
3810 // for la32r expands to:
3811 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3812 //
3813 // for la32s and la64 expands to:
3814 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3815 Load =
3816 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3817 }
3818 }
3819
3820 if (!IsLocal) {
3821 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3822 MachineFunction &MF = DAG.getMachineFunction();
3823 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3827 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3828 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3829 }
3830
3831 return Load;
3832}
3833
3834SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3835 SelectionDAG &DAG) const {
3836 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3837 DAG.getTarget().getCodeModel());
3838}
3839
3840SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3841 SelectionDAG &DAG) const {
3842 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3843 DAG.getTarget().getCodeModel());
3844}
3845
3846SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3847 SelectionDAG &DAG) const {
3848 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3849 DAG.getTarget().getCodeModel());
3850}
3851
3852SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3853 SelectionDAG &DAG) const {
3854 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3855 assert(N->getOffset() == 0 && "unexpected offset in global node");
3856 auto CM = DAG.getTarget().getCodeModel();
3857 const GlobalValue *GV = N->getGlobal();
3858
3859 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3860 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3861 CM = *GCM;
3862 }
3863
3864 return getAddr(N, DAG, CM, GV->isDSOLocal());
3865}
3866
3867SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3868 SelectionDAG &DAG,
3869 unsigned Opc, bool UseGOT,
3870 bool Large) const {
3871 SDLoc DL(N);
3872 EVT Ty = getPointerTy(DAG.getDataLayout());
3873 MVT GRLenVT = Subtarget.getGRLenVT();
3874
3875 // This is not actually used, but is necessary for successfully matching the
3876 // PseudoLA_*_LARGE nodes.
3877 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3878 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3879
3880 // Only IE needs an extra argument for large code model.
3881 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3882 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3883 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3884
3885 // If it is LE for normal/medium code model, the add tp operation will occur
3886 // during the pseudo-instruction expansion.
3887 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3888 return Offset;
3889
3890 if (UseGOT) {
3891 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3892 MachineFunction &MF = DAG.getMachineFunction();
3893 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3897 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3898 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3899 }
3900
3901 // Add the thread pointer.
3902 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3903 DAG.getRegister(LoongArch::R2, GRLenVT));
3904}
3905
3906SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3907 SelectionDAG &DAG,
3908 unsigned Opc,
3909 bool Large) const {
3910 SDLoc DL(N);
3911 EVT Ty = getPointerTy(DAG.getDataLayout());
3912 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3913
3914 // This is not actually used, but is necessary for successfully matching the
3915 // PseudoLA_*_LARGE nodes.
3916 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3917
3918 // Use a PC-relative addressing mode to access the dynamic GOT address.
3919 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3920 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3921 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3922
3923 // Prepare argument list to generate call.
3925 Args.emplace_back(Load, CallTy);
3926
3927 // Setup call to __tls_get_addr.
3928 TargetLowering::CallLoweringInfo CLI(DAG);
3929 CLI.setDebugLoc(DL)
3930 .setChain(DAG.getEntryNode())
3931 .setLibCallee(CallingConv::C, CallTy,
3932 DAG.getExternalSymbol("__tls_get_addr", Ty),
3933 std::move(Args));
3934
3935 return LowerCallTo(CLI).first;
3936}
3937
3938SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3939 SelectionDAG &DAG, unsigned Opc,
3940 bool Large) const {
3941 SDLoc DL(N);
3942 EVT Ty = getPointerTy(DAG.getDataLayout());
3943 const GlobalValue *GV = N->getGlobal();
3944
3945 // This is not actually used, but is necessary for successfully matching the
3946 // PseudoLA_*_LARGE nodes.
3947 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3948
3949 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3950 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3951 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3952 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3953 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3954}
3955
3956SDValue
3957LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3958 SelectionDAG &DAG) const {
3961 report_fatal_error("In GHC calling convention TLS is not supported");
3962
3963 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3964 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3965
3966 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3967 assert(N->getOffset() == 0 && "unexpected offset in global node");
3968
3969 if (DAG.getTarget().useEmulatedTLS())
3970 reportFatalUsageError("the emulated TLS is prohibited");
3971
3972 bool IsDesc = DAG.getTarget().useTLSDESC();
3973
3974 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3976 // In this model, application code calls the dynamic linker function
3977 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3978 // runtime.
3979 if (!IsDesc)
3980 return getDynamicTLSAddr(N, DAG,
3981 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3982 : LoongArch::PseudoLA_TLS_GD,
3983 Large);
3984 break;
3986 // Same as GeneralDynamic, except for assembly modifiers and relocation
3987 // records.
3988 if (!IsDesc)
3989 return getDynamicTLSAddr(N, DAG,
3990 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3991 : LoongArch::PseudoLA_TLS_LD,
3992 Large);
3993 break;
3995 // This model uses the GOT to resolve TLS offsets.
3996 return getStaticTLSAddr(N, DAG,
3997 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3998 : LoongArch::PseudoLA_TLS_IE,
3999 /*UseGOT=*/true, Large);
4001 // This model is used when static linking as the TLS offsets are resolved
4002 // during program linking.
4003 //
4004 // This node doesn't need an extra argument for the large code model.
4005 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
4006 /*UseGOT=*/false, Large);
4007 }
4008
4009 return getTLSDescAddr(N, DAG,
4010 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4011 : LoongArch::PseudoLA_TLS_DESC,
4012 Large);
4013}
4014
4015template <unsigned N>
4017 SelectionDAG &DAG, bool IsSigned = false) {
4018 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
4019 // Check the ImmArg.
4020 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4021 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4022 DAG.getContext()->emitError(Op->getOperationName(0) +
4023 ": argument out of range.");
4024 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
4025 }
4026 return SDValue();
4027}
4028
4029SDValue
4030LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4031 SelectionDAG &DAG) const {
4032 switch (Op.getConstantOperandVal(0)) {
4033 default:
4034 return SDValue(); // Don't custom lower most intrinsics.
4035 case Intrinsic::thread_pointer: {
4036 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4037 return DAG.getRegister(LoongArch::R2, PtrVT);
4038 }
4039 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4040 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4041 case Intrinsic::loongarch_lsx_vreplvei_d:
4042 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4043 return checkIntrinsicImmArg<1>(Op, 2, DAG);
4044 case Intrinsic::loongarch_lsx_vreplvei_w:
4045 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4046 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4047 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4048 case Intrinsic::loongarch_lasx_xvpickve_d:
4049 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4050 return checkIntrinsicImmArg<2>(Op, 2, DAG);
4051 case Intrinsic::loongarch_lasx_xvinsve0_d:
4052 return checkIntrinsicImmArg<2>(Op, 3, DAG);
4053 case Intrinsic::loongarch_lsx_vsat_b:
4054 case Intrinsic::loongarch_lsx_vsat_bu:
4055 case Intrinsic::loongarch_lsx_vrotri_b:
4056 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4057 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4058 case Intrinsic::loongarch_lsx_vsrlri_b:
4059 case Intrinsic::loongarch_lsx_vsrari_b:
4060 case Intrinsic::loongarch_lsx_vreplvei_h:
4061 case Intrinsic::loongarch_lasx_xvsat_b:
4062 case Intrinsic::loongarch_lasx_xvsat_bu:
4063 case Intrinsic::loongarch_lasx_xvrotri_b:
4064 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4065 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4066 case Intrinsic::loongarch_lasx_xvsrlri_b:
4067 case Intrinsic::loongarch_lasx_xvsrari_b:
4068 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4069 case Intrinsic::loongarch_lasx_xvpickve_w:
4070 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4071 return checkIntrinsicImmArg<3>(Op, 2, DAG);
4072 case Intrinsic::loongarch_lasx_xvinsve0_w:
4073 return checkIntrinsicImmArg<3>(Op, 3, DAG);
4074 case Intrinsic::loongarch_lsx_vsat_h:
4075 case Intrinsic::loongarch_lsx_vsat_hu:
4076 case Intrinsic::loongarch_lsx_vrotri_h:
4077 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4078 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4079 case Intrinsic::loongarch_lsx_vsrlri_h:
4080 case Intrinsic::loongarch_lsx_vsrari_h:
4081 case Intrinsic::loongarch_lsx_vreplvei_b:
4082 case Intrinsic::loongarch_lasx_xvsat_h:
4083 case Intrinsic::loongarch_lasx_xvsat_hu:
4084 case Intrinsic::loongarch_lasx_xvrotri_h:
4085 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4086 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4087 case Intrinsic::loongarch_lasx_xvsrlri_h:
4088 case Intrinsic::loongarch_lasx_xvsrari_h:
4089 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4090 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4091 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4092 case Intrinsic::loongarch_lsx_vsrani_b_h:
4093 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4094 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4095 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4096 case Intrinsic::loongarch_lsx_vssrani_b_h:
4097 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4098 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4099 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4100 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4101 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4102 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4103 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4104 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4105 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4106 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4107 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4108 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4109 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4110 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4111 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4112 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4113 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4114 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4115 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4116 case Intrinsic::loongarch_lsx_vsat_w:
4117 case Intrinsic::loongarch_lsx_vsat_wu:
4118 case Intrinsic::loongarch_lsx_vrotri_w:
4119 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4120 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4121 case Intrinsic::loongarch_lsx_vsrlri_w:
4122 case Intrinsic::loongarch_lsx_vsrari_w:
4123 case Intrinsic::loongarch_lsx_vslei_bu:
4124 case Intrinsic::loongarch_lsx_vslei_hu:
4125 case Intrinsic::loongarch_lsx_vslei_wu:
4126 case Intrinsic::loongarch_lsx_vslei_du:
4127 case Intrinsic::loongarch_lsx_vslti_bu:
4128 case Intrinsic::loongarch_lsx_vslti_hu:
4129 case Intrinsic::loongarch_lsx_vslti_wu:
4130 case Intrinsic::loongarch_lsx_vslti_du:
4131 case Intrinsic::loongarch_lsx_vbsll_v:
4132 case Intrinsic::loongarch_lsx_vbsrl_v:
4133 case Intrinsic::loongarch_lasx_xvsat_w:
4134 case Intrinsic::loongarch_lasx_xvsat_wu:
4135 case Intrinsic::loongarch_lasx_xvrotri_w:
4136 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4137 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4138 case Intrinsic::loongarch_lasx_xvsrlri_w:
4139 case Intrinsic::loongarch_lasx_xvsrari_w:
4140 case Intrinsic::loongarch_lasx_xvslei_bu:
4141 case Intrinsic::loongarch_lasx_xvslei_hu:
4142 case Intrinsic::loongarch_lasx_xvslei_wu:
4143 case Intrinsic::loongarch_lasx_xvslei_du:
4144 case Intrinsic::loongarch_lasx_xvslti_bu:
4145 case Intrinsic::loongarch_lasx_xvslti_hu:
4146 case Intrinsic::loongarch_lasx_xvslti_wu:
4147 case Intrinsic::loongarch_lasx_xvslti_du:
4148 case Intrinsic::loongarch_lasx_xvbsll_v:
4149 case Intrinsic::loongarch_lasx_xvbsrl_v:
4150 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4151 case Intrinsic::loongarch_lsx_vseqi_b:
4152 case Intrinsic::loongarch_lsx_vseqi_h:
4153 case Intrinsic::loongarch_lsx_vseqi_w:
4154 case Intrinsic::loongarch_lsx_vseqi_d:
4155 case Intrinsic::loongarch_lsx_vslei_b:
4156 case Intrinsic::loongarch_lsx_vslei_h:
4157 case Intrinsic::loongarch_lsx_vslei_w:
4158 case Intrinsic::loongarch_lsx_vslei_d:
4159 case Intrinsic::loongarch_lsx_vslti_b:
4160 case Intrinsic::loongarch_lsx_vslti_h:
4161 case Intrinsic::loongarch_lsx_vslti_w:
4162 case Intrinsic::loongarch_lsx_vslti_d:
4163 case Intrinsic::loongarch_lasx_xvseqi_b:
4164 case Intrinsic::loongarch_lasx_xvseqi_h:
4165 case Intrinsic::loongarch_lasx_xvseqi_w:
4166 case Intrinsic::loongarch_lasx_xvseqi_d:
4167 case Intrinsic::loongarch_lasx_xvslei_b:
4168 case Intrinsic::loongarch_lasx_xvslei_h:
4169 case Intrinsic::loongarch_lasx_xvslei_w:
4170 case Intrinsic::loongarch_lasx_xvslei_d:
4171 case Intrinsic::loongarch_lasx_xvslti_b:
4172 case Intrinsic::loongarch_lasx_xvslti_h:
4173 case Intrinsic::loongarch_lasx_xvslti_w:
4174 case Intrinsic::loongarch_lasx_xvslti_d:
4175 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4176 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4177 case Intrinsic::loongarch_lsx_vsrani_h_w:
4178 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4179 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4180 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4181 case Intrinsic::loongarch_lsx_vssrani_h_w:
4182 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4183 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4184 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4185 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4186 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4187 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4188 case Intrinsic::loongarch_lsx_vfrstpi_b:
4189 case Intrinsic::loongarch_lsx_vfrstpi_h:
4190 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4191 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4192 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4193 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4194 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4195 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4196 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4197 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4198 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4199 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4200 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4201 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4202 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4203 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4204 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4205 case Intrinsic::loongarch_lsx_vsat_d:
4206 case Intrinsic::loongarch_lsx_vsat_du:
4207 case Intrinsic::loongarch_lsx_vrotri_d:
4208 case Intrinsic::loongarch_lsx_vsrlri_d:
4209 case Intrinsic::loongarch_lsx_vsrari_d:
4210 case Intrinsic::loongarch_lasx_xvsat_d:
4211 case Intrinsic::loongarch_lasx_xvsat_du:
4212 case Intrinsic::loongarch_lasx_xvrotri_d:
4213 case Intrinsic::loongarch_lasx_xvsrlri_d:
4214 case Intrinsic::loongarch_lasx_xvsrari_d:
4215 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4216 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4217 case Intrinsic::loongarch_lsx_vsrani_w_d:
4218 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4219 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4220 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4221 case Intrinsic::loongarch_lsx_vssrani_w_d:
4222 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4223 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4224 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4225 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4226 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4227 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4228 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4229 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4230 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4231 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4232 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4233 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4234 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4235 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4236 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4237 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4238 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4239 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4240 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4241 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4242 case Intrinsic::loongarch_lsx_vsrani_d_q:
4243 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4244 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4245 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4246 case Intrinsic::loongarch_lsx_vssrani_d_q:
4247 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4248 case Intrinsic::loongarch_lsx_vssrani_du_q:
4249 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4250 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4251 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4252 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4253 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4254 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4255 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4256 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4257 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4258 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4259 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4260 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4261 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4262 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4263 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4264 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4265 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4266 case Intrinsic::loongarch_lsx_vnori_b:
4267 case Intrinsic::loongarch_lsx_vshuf4i_b:
4268 case Intrinsic::loongarch_lsx_vshuf4i_h:
4269 case Intrinsic::loongarch_lsx_vshuf4i_w:
4270 case Intrinsic::loongarch_lasx_xvnori_b:
4271 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4272 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4273 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4274 case Intrinsic::loongarch_lasx_xvpermi_d:
4275 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4276 case Intrinsic::loongarch_lsx_vshuf4i_d:
4277 case Intrinsic::loongarch_lsx_vpermi_w:
4278 case Intrinsic::loongarch_lsx_vbitseli_b:
4279 case Intrinsic::loongarch_lsx_vextrins_b:
4280 case Intrinsic::loongarch_lsx_vextrins_h:
4281 case Intrinsic::loongarch_lsx_vextrins_w:
4282 case Intrinsic::loongarch_lsx_vextrins_d:
4283 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4284 case Intrinsic::loongarch_lasx_xvpermi_w:
4285 case Intrinsic::loongarch_lasx_xvpermi_q:
4286 case Intrinsic::loongarch_lasx_xvbitseli_b:
4287 case Intrinsic::loongarch_lasx_xvextrins_b:
4288 case Intrinsic::loongarch_lasx_xvextrins_h:
4289 case Intrinsic::loongarch_lasx_xvextrins_w:
4290 case Intrinsic::loongarch_lasx_xvextrins_d:
4291 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4292 case Intrinsic::loongarch_lsx_vrepli_b:
4293 case Intrinsic::loongarch_lsx_vrepli_h:
4294 case Intrinsic::loongarch_lsx_vrepli_w:
4295 case Intrinsic::loongarch_lsx_vrepli_d:
4296 case Intrinsic::loongarch_lasx_xvrepli_b:
4297 case Intrinsic::loongarch_lasx_xvrepli_h:
4298 case Intrinsic::loongarch_lasx_xvrepli_w:
4299 case Intrinsic::loongarch_lasx_xvrepli_d:
4300 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4301 case Intrinsic::loongarch_lsx_vldi:
4302 case Intrinsic::loongarch_lasx_xvldi:
4303 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4304 }
4305}
4306
4307// Helper function that emits error message for intrinsics with chain and return
4308// merge values of a UNDEF and the chain.
4310 StringRef ErrorMsg,
4311 SelectionDAG &DAG) {
4312 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4313 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4314 SDLoc(Op));
4315}
4316
4317SDValue
4318LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4319 SelectionDAG &DAG) const {
4320 SDLoc DL(Op);
4321 MVT GRLenVT = Subtarget.getGRLenVT();
4322 EVT VT = Op.getValueType();
4323 SDValue Chain = Op.getOperand(0);
4324 const StringRef ErrorMsgOOR = "argument out of range";
4325 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4326 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4327
4328 switch (Op.getConstantOperandVal(1)) {
4329 default:
4330 return Op;
4331 case Intrinsic::loongarch_crc_w_b_w:
4332 case Intrinsic::loongarch_crc_w_h_w:
4333 case Intrinsic::loongarch_crc_w_w_w:
4334 case Intrinsic::loongarch_crc_w_d_w:
4335 case Intrinsic::loongarch_crcc_w_b_w:
4336 case Intrinsic::loongarch_crcc_w_h_w:
4337 case Intrinsic::loongarch_crcc_w_w_w:
4338 case Intrinsic::loongarch_crcc_w_d_w:
4339 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4340 case Intrinsic::loongarch_csrrd_w:
4341 case Intrinsic::loongarch_csrrd_d: {
4342 unsigned Imm = Op.getConstantOperandVal(2);
4343 return !isUInt<14>(Imm)
4344 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4345 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4346 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4347 }
4348 case Intrinsic::loongarch_csrwr_w:
4349 case Intrinsic::loongarch_csrwr_d: {
4350 unsigned Imm = Op.getConstantOperandVal(3);
4351 return !isUInt<14>(Imm)
4352 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4353 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4354 {Chain, Op.getOperand(2),
4355 DAG.getConstant(Imm, DL, GRLenVT)});
4356 }
4357 case Intrinsic::loongarch_csrxchg_w:
4358 case Intrinsic::loongarch_csrxchg_d: {
4359 unsigned Imm = Op.getConstantOperandVal(4);
4360 return !isUInt<14>(Imm)
4361 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4362 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4363 {Chain, Op.getOperand(2), Op.getOperand(3),
4364 DAG.getConstant(Imm, DL, GRLenVT)});
4365 }
4366 case Intrinsic::loongarch_iocsrrd_d: {
4367 return DAG.getNode(
4368 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4369 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4370 }
4371#define IOCSRRD_CASE(NAME, NODE) \
4372 case Intrinsic::loongarch_##NAME: { \
4373 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4374 {Chain, Op.getOperand(2)}); \
4375 }
4376 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4377 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4378 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4379#undef IOCSRRD_CASE
4380 case Intrinsic::loongarch_cpucfg: {
4381 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4382 {Chain, Op.getOperand(2)});
4383 }
4384 case Intrinsic::loongarch_lddir_d: {
4385 unsigned Imm = Op.getConstantOperandVal(3);
4386 return !isUInt<8>(Imm)
4387 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4388 : Op;
4389 }
4390 case Intrinsic::loongarch_movfcsr2gr: {
4391 if (!Subtarget.hasBasicF())
4392 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4393 unsigned Imm = Op.getConstantOperandVal(2);
4394 return !isUInt<2>(Imm)
4395 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4396 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4397 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4398 }
4399 case Intrinsic::loongarch_lsx_vld:
4400 case Intrinsic::loongarch_lsx_vldrepl_b:
4401 case Intrinsic::loongarch_lasx_xvld:
4402 case Intrinsic::loongarch_lasx_xvldrepl_b:
4403 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4404 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4405 : SDValue();
4406 case Intrinsic::loongarch_lsx_vldrepl_h:
4407 case Intrinsic::loongarch_lasx_xvldrepl_h:
4408 return !isShiftedInt<11, 1>(
4409 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4411 Op, "argument out of range or not a multiple of 2", DAG)
4412 : SDValue();
4413 case Intrinsic::loongarch_lsx_vldrepl_w:
4414 case Intrinsic::loongarch_lasx_xvldrepl_w:
4415 return !isShiftedInt<10, 2>(
4416 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4418 Op, "argument out of range or not a multiple of 4", DAG)
4419 : SDValue();
4420 case Intrinsic::loongarch_lsx_vldrepl_d:
4421 case Intrinsic::loongarch_lasx_xvldrepl_d:
4422 return !isShiftedInt<9, 3>(
4423 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4425 Op, "argument out of range or not a multiple of 8", DAG)
4426 : SDValue();
4427 }
4428}
4429
4430// Helper function that emits error message for intrinsics with void return
4431// value and return the chain.
4433 SelectionDAG &DAG) {
4434
4435 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4436 return Op.getOperand(0);
4437}
4438
4439SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4440 SelectionDAG &DAG) const {
4441 SDLoc DL(Op);
4442 MVT GRLenVT = Subtarget.getGRLenVT();
4443 SDValue Chain = Op.getOperand(0);
4444 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4445 SDValue Op2 = Op.getOperand(2);
4446 const StringRef ErrorMsgOOR = "argument out of range";
4447 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4448 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4449 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4450
4451 switch (IntrinsicEnum) {
4452 default:
4453 // TODO: Add more Intrinsics.
4454 return SDValue();
4455 case Intrinsic::loongarch_cacop_d:
4456 case Intrinsic::loongarch_cacop_w: {
4457 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4458 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4459 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4460 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4461 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4462 unsigned Imm1 = Op2->getAsZExtVal();
4463 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4464 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4465 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4466 return Op;
4467 }
4468 case Intrinsic::loongarch_dbar: {
4469 unsigned Imm = Op2->getAsZExtVal();
4470 return !isUInt<15>(Imm)
4471 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4472 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4473 DAG.getConstant(Imm, DL, GRLenVT));
4474 }
4475 case Intrinsic::loongarch_ibar: {
4476 unsigned Imm = Op2->getAsZExtVal();
4477 return !isUInt<15>(Imm)
4478 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4479 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4480 DAG.getConstant(Imm, DL, GRLenVT));
4481 }
4482 case Intrinsic::loongarch_break: {
4483 unsigned Imm = Op2->getAsZExtVal();
4484 return !isUInt<15>(Imm)
4485 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4486 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4487 DAG.getConstant(Imm, DL, GRLenVT));
4488 }
4489 case Intrinsic::loongarch_movgr2fcsr: {
4490 if (!Subtarget.hasBasicF())
4491 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4492 unsigned Imm = Op2->getAsZExtVal();
4493 return !isUInt<2>(Imm)
4494 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4495 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4496 DAG.getConstant(Imm, DL, GRLenVT),
4497 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4498 Op.getOperand(3)));
4499 }
4500 case Intrinsic::loongarch_syscall: {
4501 unsigned Imm = Op2->getAsZExtVal();
4502 return !isUInt<15>(Imm)
4503 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4504 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4505 DAG.getConstant(Imm, DL, GRLenVT));
4506 }
4507#define IOCSRWR_CASE(NAME, NODE) \
4508 case Intrinsic::loongarch_##NAME: { \
4509 SDValue Op3 = Op.getOperand(3); \
4510 return Subtarget.is64Bit() \
4511 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4512 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4513 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4514 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4515 Op3); \
4516 }
4517 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4518 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4519 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4520#undef IOCSRWR_CASE
4521 case Intrinsic::loongarch_iocsrwr_d: {
4522 return !Subtarget.is64Bit()
4523 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4524 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4525 Op2,
4526 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4527 Op.getOperand(3)));
4528 }
4529#define ASRT_LE_GT_CASE(NAME) \
4530 case Intrinsic::loongarch_##NAME: { \
4531 return !Subtarget.is64Bit() \
4532 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4533 : Op; \
4534 }
4535 ASRT_LE_GT_CASE(asrtle_d)
4536 ASRT_LE_GT_CASE(asrtgt_d)
4537#undef ASRT_LE_GT_CASE
4538 case Intrinsic::loongarch_ldpte_d: {
4539 unsigned Imm = Op.getConstantOperandVal(3);
4540 return !Subtarget.is64Bit()
4541 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4542 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4543 : Op;
4544 }
4545 case Intrinsic::loongarch_lsx_vst:
4546 case Intrinsic::loongarch_lasx_xvst:
4547 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4548 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4549 : SDValue();
4550 case Intrinsic::loongarch_lasx_xvstelm_b:
4551 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4552 !isUInt<5>(Op.getConstantOperandVal(5)))
4553 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4554 : SDValue();
4555 case Intrinsic::loongarch_lsx_vstelm_b:
4556 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4557 !isUInt<4>(Op.getConstantOperandVal(5)))
4558 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4559 : SDValue();
4560 case Intrinsic::loongarch_lasx_xvstelm_h:
4561 return (!isShiftedInt<8, 1>(
4562 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4563 !isUInt<4>(Op.getConstantOperandVal(5)))
4565 Op, "argument out of range or not a multiple of 2", DAG)
4566 : SDValue();
4567 case Intrinsic::loongarch_lsx_vstelm_h:
4568 return (!isShiftedInt<8, 1>(
4569 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4570 !isUInt<3>(Op.getConstantOperandVal(5)))
4572 Op, "argument out of range or not a multiple of 2", DAG)
4573 : SDValue();
4574 case Intrinsic::loongarch_lasx_xvstelm_w:
4575 return (!isShiftedInt<8, 2>(
4576 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4577 !isUInt<3>(Op.getConstantOperandVal(5)))
4579 Op, "argument out of range or not a multiple of 4", DAG)
4580 : SDValue();
4581 case Intrinsic::loongarch_lsx_vstelm_w:
4582 return (!isShiftedInt<8, 2>(
4583 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4584 !isUInt<2>(Op.getConstantOperandVal(5)))
4586 Op, "argument out of range or not a multiple of 4", DAG)
4587 : SDValue();
4588 case Intrinsic::loongarch_lasx_xvstelm_d:
4589 return (!isShiftedInt<8, 3>(
4590 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4591 !isUInt<2>(Op.getConstantOperandVal(5)))
4593 Op, "argument out of range or not a multiple of 8", DAG)
4594 : SDValue();
4595 case Intrinsic::loongarch_lsx_vstelm_d:
4596 return (!isShiftedInt<8, 3>(
4597 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4598 !isUInt<1>(Op.getConstantOperandVal(5)))
4600 Op, "argument out of range or not a multiple of 8", DAG)
4601 : SDValue();
4602 }
4603}
4604
4605SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4606 SelectionDAG &DAG) const {
4607 SDLoc DL(Op);
4608 SDValue Lo = Op.getOperand(0);
4609 SDValue Hi = Op.getOperand(1);
4610 SDValue Shamt = Op.getOperand(2);
4611 EVT VT = Lo.getValueType();
4612
4613 // if Shamt-GRLen < 0: // Shamt < GRLen
4614 // Lo = Lo << Shamt
4615 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4616 // else:
4617 // Lo = 0
4618 // Hi = Lo << (Shamt-GRLen)
4619
4620 SDValue Zero = DAG.getConstant(0, DL, VT);
4621 SDValue One = DAG.getConstant(1, DL, VT);
4622 SDValue MinusGRLen =
4623 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4624 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4625 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4626 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4627
4628 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4629 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4630 SDValue ShiftRightLo =
4631 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4632 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4633 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4634 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4635
4636 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4637
4638 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4639 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4640
4641 SDValue Parts[2] = {Lo, Hi};
4642 return DAG.getMergeValues(Parts, DL);
4643}
4644
4645SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4646 SelectionDAG &DAG,
4647 bool IsSRA) const {
4648 SDLoc DL(Op);
4649 SDValue Lo = Op.getOperand(0);
4650 SDValue Hi = Op.getOperand(1);
4651 SDValue Shamt = Op.getOperand(2);
4652 EVT VT = Lo.getValueType();
4653
4654 // SRA expansion:
4655 // if Shamt-GRLen < 0: // Shamt < GRLen
4656 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4657 // Hi = Hi >>s Shamt
4658 // else:
4659 // Lo = Hi >>s (Shamt-GRLen);
4660 // Hi = Hi >>s (GRLen-1)
4661 //
4662 // SRL expansion:
4663 // if Shamt-GRLen < 0: // Shamt < GRLen
4664 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4665 // Hi = Hi >>u Shamt
4666 // else:
4667 // Lo = Hi >>u (Shamt-GRLen);
4668 // Hi = 0;
4669
4670 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4671
4672 SDValue Zero = DAG.getConstant(0, DL, VT);
4673 SDValue One = DAG.getConstant(1, DL, VT);
4674 SDValue MinusGRLen =
4675 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4676 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4677 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4678 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4679
4680 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4681 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4682 SDValue ShiftLeftHi =
4683 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4684 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4685 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4686 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4687 SDValue HiFalse =
4688 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4689
4690 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4691
4692 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4693 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4694
4695 SDValue Parts[2] = {Lo, Hi};
4696 return DAG.getMergeValues(Parts, DL);
4697}
4698
4699// Returns the opcode of the target-specific SDNode that implements the 32-bit
4700// form of the given Opcode.
4701static unsigned getLoongArchWOpcode(unsigned Opcode) {
4702 switch (Opcode) {
4703 default:
4704 llvm_unreachable("Unexpected opcode");
4705 case ISD::SDIV:
4706 return LoongArchISD::DIV_W;
4707 case ISD::UDIV:
4708 return LoongArchISD::DIV_WU;
4709 case ISD::SREM:
4710 return LoongArchISD::MOD_W;
4711 case ISD::UREM:
4712 return LoongArchISD::MOD_WU;
4713 case ISD::SHL:
4714 return LoongArchISD::SLL_W;
4715 case ISD::SRA:
4716 return LoongArchISD::SRA_W;
4717 case ISD::SRL:
4718 return LoongArchISD::SRL_W;
4719 case ISD::ROTL:
4720 case ISD::ROTR:
4721 return LoongArchISD::ROTR_W;
4722 case ISD::CTTZ:
4723 return LoongArchISD::CTZ_W;
4724 case ISD::CTLZ:
4725 return LoongArchISD::CLZ_W;
4726 }
4727}
4728
4729// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4730// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4731// otherwise be promoted to i64, making it difficult to select the
4732// SLL_W/.../*W later one because the fact the operation was originally of
4733// type i8/i16/i32 is lost.
4735 unsigned ExtOpc = ISD::ANY_EXTEND) {
4736 SDLoc DL(N);
4737 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
4738 SDValue NewOp0, NewRes;
4739
4740 switch (NumOp) {
4741 default:
4742 llvm_unreachable("Unexpected NumOp");
4743 case 1: {
4744 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4745 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4746 break;
4747 }
4748 case 2: {
4749 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4750 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4751 if (N->getOpcode() == ISD::ROTL) {
4752 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4753 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4754 }
4755 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4756 break;
4757 }
4758 // TODO:Handle more NumOp.
4759 }
4760
4761 // ReplaceNodeResults requires we maintain the same type for the return
4762 // value.
4763 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4764}
4765
4766// Converts the given 32-bit operation to a i64 operation with signed extension
4767// semantic to reduce the signed extension instructions.
4769 SDLoc DL(N);
4770 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4771 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4772 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4773 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4774 DAG.getValueType(MVT::i32));
4775 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4776}
4777
4778// Helper function that emits error message for intrinsics with/without chain
4779// and return a UNDEF or and the chain as the results.
4782 StringRef ErrorMsg, bool WithChain = true) {
4783 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4784 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4785 if (!WithChain)
4786 return;
4787 Results.push_back(N->getOperand(0));
4788}
4789
4790template <unsigned N>
4791static void
4793 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4794 unsigned ResOp) {
4795 const StringRef ErrorMsgOOR = "argument out of range";
4796 unsigned Imm = Node->getConstantOperandVal(2);
4797 if (!isUInt<N>(Imm)) {
4799 /*WithChain=*/false);
4800 return;
4801 }
4802 SDLoc DL(Node);
4803 SDValue Vec = Node->getOperand(1);
4804
4805 SDValue PickElt =
4806 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4807 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4809 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4810 PickElt.getValue(0)));
4811}
4812
4815 SelectionDAG &DAG,
4816 const LoongArchSubtarget &Subtarget,
4817 unsigned ResOp) {
4818 SDLoc DL(N);
4819 SDValue Vec = N->getOperand(1);
4820
4821 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4822 Results.push_back(
4823 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4824}
4825
4826static void
4828 SelectionDAG &DAG,
4829 const LoongArchSubtarget &Subtarget) {
4830 switch (N->getConstantOperandVal(0)) {
4831 default:
4832 llvm_unreachable("Unexpected Intrinsic.");
4833 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4834 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4835 LoongArchISD::VPICK_SEXT_ELT);
4836 break;
4837 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4838 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4839 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4840 LoongArchISD::VPICK_SEXT_ELT);
4841 break;
4842 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4843 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4844 LoongArchISD::VPICK_SEXT_ELT);
4845 break;
4846 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4847 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4848 LoongArchISD::VPICK_ZEXT_ELT);
4849 break;
4850 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4851 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4852 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4853 LoongArchISD::VPICK_ZEXT_ELT);
4854 break;
4855 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4856 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4857 LoongArchISD::VPICK_ZEXT_ELT);
4858 break;
4859 case Intrinsic::loongarch_lsx_bz_b:
4860 case Intrinsic::loongarch_lsx_bz_h:
4861 case Intrinsic::loongarch_lsx_bz_w:
4862 case Intrinsic::loongarch_lsx_bz_d:
4863 case Intrinsic::loongarch_lasx_xbz_b:
4864 case Intrinsic::loongarch_lasx_xbz_h:
4865 case Intrinsic::loongarch_lasx_xbz_w:
4866 case Intrinsic::loongarch_lasx_xbz_d:
4867 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4868 LoongArchISD::VALL_ZERO);
4869 break;
4870 case Intrinsic::loongarch_lsx_bz_v:
4871 case Intrinsic::loongarch_lasx_xbz_v:
4872 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4873 LoongArchISD::VANY_ZERO);
4874 break;
4875 case Intrinsic::loongarch_lsx_bnz_b:
4876 case Intrinsic::loongarch_lsx_bnz_h:
4877 case Intrinsic::loongarch_lsx_bnz_w:
4878 case Intrinsic::loongarch_lsx_bnz_d:
4879 case Intrinsic::loongarch_lasx_xbnz_b:
4880 case Intrinsic::loongarch_lasx_xbnz_h:
4881 case Intrinsic::loongarch_lasx_xbnz_w:
4882 case Intrinsic::loongarch_lasx_xbnz_d:
4883 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4884 LoongArchISD::VALL_NONZERO);
4885 break;
4886 case Intrinsic::loongarch_lsx_bnz_v:
4887 case Intrinsic::loongarch_lasx_xbnz_v:
4888 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4889 LoongArchISD::VANY_NONZERO);
4890 break;
4891 }
4892}
4893
4896 SelectionDAG &DAG) {
4897 assert(N->getValueType(0) == MVT::i128 &&
4898 "AtomicCmpSwap on types less than 128 should be legal");
4899 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4900
4901 unsigned Opcode;
4902 switch (MemOp->getMergedOrdering()) {
4906 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4907 break;
4910 Opcode = LoongArch::PseudoCmpXchg128;
4911 break;
4912 default:
4913 llvm_unreachable("Unexpected ordering!");
4914 }
4915
4916 SDLoc DL(N);
4917 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4918 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4919 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4920 NewVal.first, NewVal.second, N->getOperand(0)};
4921
4922 SDNode *CmpSwap = DAG.getMachineNode(
4923 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4924 Ops);
4925 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4926 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4927 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4928 Results.push_back(SDValue(CmpSwap, 3));
4929}
4930
4933 SDLoc DL(N);
4934 EVT VT = N->getValueType(0);
4935 switch (N->getOpcode()) {
4936 default:
4937 llvm_unreachable("Don't know how to legalize this operation");
4938 case ISD::ADD:
4939 case ISD::SUB:
4940 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4941 "Unexpected custom legalisation");
4942 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4943 break;
4944 case ISD::SDIV:
4945 case ISD::UDIV:
4946 case ISD::SREM:
4947 case ISD::UREM:
4948 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4949 "Unexpected custom legalisation");
4950 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4951 Subtarget.hasDiv32() && VT == MVT::i32
4953 : ISD::SIGN_EXTEND));
4954 break;
4955 case ISD::SHL:
4956 case ISD::SRA:
4957 case ISD::SRL:
4958 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4959 "Unexpected custom legalisation");
4960 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4961 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4962 break;
4963 }
4964 break;
4965 case ISD::ROTL:
4966 case ISD::ROTR:
4967 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4968 "Unexpected custom legalisation");
4969 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4970 break;
4971 case ISD::FP_TO_SINT: {
4972 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4973 "Unexpected custom legalisation");
4974 SDValue Src = N->getOperand(0);
4975 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4976 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4978 if (!isTypeLegal(Src.getValueType()))
4979 return;
4980 if (Src.getValueType() == MVT::f16)
4981 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4982 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4983 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4984 return;
4985 }
4986 // If the FP type needs to be softened, emit a library call using the 'si'
4987 // version. If we left it to default legalization we'd end up with 'di'.
4988 RTLIB::Libcall LC;
4989 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4990 MakeLibCallOptions CallOptions;
4991 EVT OpVT = Src.getValueType();
4992 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4993 SDValue Chain = SDValue();
4994 SDValue Result;
4995 std::tie(Result, Chain) =
4996 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4997 Results.push_back(Result);
4998 break;
4999 }
5000 case ISD::BITCAST: {
5001 SDValue Src = N->getOperand(0);
5002 EVT SrcVT = Src.getValueType();
5003 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5004 Subtarget.hasBasicF()) {
5005 SDValue Dst =
5006 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
5007 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
5008 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5009 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
5010 DAG.getVTList(MVT::i32, MVT::i32), Src);
5011 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5012 NewReg.getValue(0), NewReg.getValue(1));
5013 Results.push_back(RetReg);
5014 }
5015 break;
5016 }
5017 case ISD::FP_TO_UINT: {
5018 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5019 "Unexpected custom legalisation");
5020 auto &TLI = DAG.getTargetLoweringInfo();
5021 SDValue Tmp1, Tmp2;
5022 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
5023 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
5024 break;
5025 }
5026 case ISD::BSWAP: {
5027 SDValue Src = N->getOperand(0);
5028 assert((VT == MVT::i16 || VT == MVT::i32) &&
5029 "Unexpected custom legalization");
5030 MVT GRLenVT = Subtarget.getGRLenVT();
5031 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5032 SDValue Tmp;
5033 switch (VT.getSizeInBits()) {
5034 default:
5035 llvm_unreachable("Unexpected operand width");
5036 case 16:
5037 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
5038 break;
5039 case 32:
5040 // Only LA64 will get to here due to the size mismatch between VT and
5041 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5042 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
5043 break;
5044 }
5045 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5046 break;
5047 }
5048 case ISD::BITREVERSE: {
5049 SDValue Src = N->getOperand(0);
5050 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5051 "Unexpected custom legalization");
5052 MVT GRLenVT = Subtarget.getGRLenVT();
5053 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5054 SDValue Tmp;
5055 switch (VT.getSizeInBits()) {
5056 default:
5057 llvm_unreachable("Unexpected operand width");
5058 case 8:
5059 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
5060 break;
5061 case 32:
5062 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
5063 break;
5064 }
5065 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5066 break;
5067 }
5068 case ISD::CTLZ:
5069 case ISD::CTTZ: {
5070 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5071 "Unexpected custom legalisation");
5072 Results.push_back(customLegalizeToWOp(N, DAG, 1));
5073 break;
5074 }
5076 SDValue Chain = N->getOperand(0);
5077 SDValue Op2 = N->getOperand(2);
5078 MVT GRLenVT = Subtarget.getGRLenVT();
5079 const StringRef ErrorMsgOOR = "argument out of range";
5080 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5081 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5082
5083 switch (N->getConstantOperandVal(1)) {
5084 default:
5085 llvm_unreachable("Unexpected Intrinsic.");
5086 case Intrinsic::loongarch_movfcsr2gr: {
5087 if (!Subtarget.hasBasicF()) {
5088 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5089 return;
5090 }
5091 unsigned Imm = Op2->getAsZExtVal();
5092 if (!isUInt<2>(Imm)) {
5093 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5094 return;
5095 }
5096 SDValue MOVFCSR2GRResults = DAG.getNode(
5097 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5098 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5099 Results.push_back(
5100 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5101 Results.push_back(MOVFCSR2GRResults.getValue(1));
5102 break;
5103 }
5104#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5105 case Intrinsic::loongarch_##NAME: { \
5106 SDValue NODE = DAG.getNode( \
5107 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5108 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5109 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5110 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5111 Results.push_back(NODE.getValue(1)); \
5112 break; \
5113 }
5114 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5115 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5116 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5117 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5118 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5119 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5120#undef CRC_CASE_EXT_BINARYOP
5121
5122#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5123 case Intrinsic::loongarch_##NAME: { \
5124 SDValue NODE = DAG.getNode( \
5125 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5126 {Chain, Op2, \
5127 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5128 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5129 Results.push_back(NODE.getValue(1)); \
5130 break; \
5131 }
5132 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5133 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5134#undef CRC_CASE_EXT_UNARYOP
5135#define CSR_CASE(ID) \
5136 case Intrinsic::loongarch_##ID: { \
5137 if (!Subtarget.is64Bit()) \
5138 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5139 break; \
5140 }
5141 CSR_CASE(csrrd_d);
5142 CSR_CASE(csrwr_d);
5143 CSR_CASE(csrxchg_d);
5144 CSR_CASE(iocsrrd_d);
5145#undef CSR_CASE
5146 case Intrinsic::loongarch_csrrd_w: {
5147 unsigned Imm = Op2->getAsZExtVal();
5148 if (!isUInt<14>(Imm)) {
5149 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5150 return;
5151 }
5152 SDValue CSRRDResults =
5153 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5154 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5155 Results.push_back(
5156 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5157 Results.push_back(CSRRDResults.getValue(1));
5158 break;
5159 }
5160 case Intrinsic::loongarch_csrwr_w: {
5161 unsigned Imm = N->getConstantOperandVal(3);
5162 if (!isUInt<14>(Imm)) {
5163 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5164 return;
5165 }
5166 SDValue CSRWRResults =
5167 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5168 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5169 DAG.getConstant(Imm, DL, GRLenVT)});
5170 Results.push_back(
5171 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5172 Results.push_back(CSRWRResults.getValue(1));
5173 break;
5174 }
5175 case Intrinsic::loongarch_csrxchg_w: {
5176 unsigned Imm = N->getConstantOperandVal(4);
5177 if (!isUInt<14>(Imm)) {
5178 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5179 return;
5180 }
5181 SDValue CSRXCHGResults = DAG.getNode(
5182 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5183 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5184 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5185 DAG.getConstant(Imm, DL, GRLenVT)});
5186 Results.push_back(
5187 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5188 Results.push_back(CSRXCHGResults.getValue(1));
5189 break;
5190 }
5191#define IOCSRRD_CASE(NAME, NODE) \
5192 case Intrinsic::loongarch_##NAME: { \
5193 SDValue IOCSRRDResults = \
5194 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5195 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5196 Results.push_back( \
5197 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5198 Results.push_back(IOCSRRDResults.getValue(1)); \
5199 break; \
5200 }
5201 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5202 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5203 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5204#undef IOCSRRD_CASE
5205 case Intrinsic::loongarch_cpucfg: {
5206 SDValue CPUCFGResults =
5207 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5208 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5209 Results.push_back(
5210 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5211 Results.push_back(CPUCFGResults.getValue(1));
5212 break;
5213 }
5214 case Intrinsic::loongarch_lddir_d: {
5215 if (!Subtarget.is64Bit()) {
5216 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5217 return;
5218 }
5219 break;
5220 }
5221 }
5222 break;
5223 }
5224 case ISD::READ_REGISTER: {
5225 if (Subtarget.is64Bit())
5226 DAG.getContext()->emitError(
5227 "On LA64, only 64-bit registers can be read.");
5228 else
5229 DAG.getContext()->emitError(
5230 "On LA32, only 32-bit registers can be read.");
5231 Results.push_back(DAG.getUNDEF(VT));
5232 Results.push_back(N->getOperand(0));
5233 break;
5234 }
5236 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5237 break;
5238 }
5239 case ISD::LROUND: {
5240 SDValue Op0 = N->getOperand(0);
5241 EVT OpVT = Op0.getValueType();
5242 RTLIB::Libcall LC =
5243 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5244 MakeLibCallOptions CallOptions;
5245 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5246 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5247 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5248 Results.push_back(Result);
5249 break;
5250 }
5251 case ISD::ATOMIC_CMP_SWAP: {
5253 break;
5254 }
5255 case ISD::TRUNCATE: {
5256 MVT VT = N->getSimpleValueType(0);
5257 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5258 return;
5259
5260 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5261 SDValue In = N->getOperand(0);
5262 EVT InVT = In.getValueType();
5263 EVT InEltVT = InVT.getVectorElementType();
5264 EVT EltVT = VT.getVectorElementType();
5265 unsigned MinElts = VT.getVectorNumElements();
5266 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5267 unsigned InBits = InVT.getSizeInBits();
5268
5269 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5270 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5271 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5272 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5273 for (unsigned I = 0; I < MinElts; ++I)
5274 TruncMask[I] = Scale * I;
5275
5276 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5277 MVT SVT = In.getSimpleValueType().getScalarType();
5278 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5279 SDValue WidenIn =
5280 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5281 DAG.getVectorIdxConstant(0, DL));
5282 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5283 "Illegal vector type in truncation");
5284 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5285 Results.push_back(
5286 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5287 return;
5288 }
5289 }
5290
5291 break;
5292 }
5293 }
5294}
5295
5296/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5298 SelectionDAG &DAG) {
5299 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5300
5301 MVT VT = N->getSimpleValueType(0);
5302 if (!VT.is128BitVector() && !VT.is256BitVector())
5303 return SDValue();
5304
5305 SDValue X, Y;
5306 SDValue N0 = N->getOperand(0);
5307 SDValue N1 = N->getOperand(1);
5308
5309 if (SDValue Not = isNOT(N0, DAG)) {
5310 X = Not;
5311 Y = N1;
5312 } else if (SDValue Not = isNOT(N1, DAG)) {
5313 X = Not;
5314 Y = N0;
5315 } else
5316 return SDValue();
5317
5318 X = DAG.getBitcast(VT, X);
5319 Y = DAG.getBitcast(VT, Y);
5320 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5321}
5322
5325 const LoongArchSubtarget &Subtarget) {
5326 if (DCI.isBeforeLegalizeOps())
5327 return SDValue();
5328
5329 SDValue FirstOperand = N->getOperand(0);
5330 SDValue SecondOperand = N->getOperand(1);
5331 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5332 EVT ValTy = N->getValueType(0);
5333 SDLoc DL(N);
5334 uint64_t lsb, msb;
5335 unsigned SMIdx, SMLen;
5336 ConstantSDNode *CN;
5337 SDValue NewOperand;
5338 MVT GRLenVT = Subtarget.getGRLenVT();
5339
5340 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5341 return R;
5342
5343 // BSTRPICK requires the 32S feature.
5344 if (!Subtarget.has32S())
5345 return SDValue();
5346
5347 // Op's second operand must be a shifted mask.
5348 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5349 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5350 return SDValue();
5351
5352 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5353 // Pattern match BSTRPICK.
5354 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5355 // => BSTRPICK $dst, $src, msb, lsb
5356 // where msb = lsb + len - 1
5357
5358 // The second operand of the shift must be an immediate.
5359 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5360 return SDValue();
5361
5362 lsb = CN->getZExtValue();
5363
5364 // Return if the shifted mask does not start at bit 0 or the sum of its
5365 // length and lsb exceeds the word's size.
5366 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5367 return SDValue();
5368
5369 NewOperand = FirstOperand.getOperand(0);
5370 } else {
5371 // Pattern match BSTRPICK.
5372 // $dst = and $src, (2**len- 1) , if len > 12
5373 // => BSTRPICK $dst, $src, msb, lsb
5374 // where lsb = 0 and msb = len - 1
5375
5376 // If the mask is <= 0xfff, andi can be used instead.
5377 if (CN->getZExtValue() <= 0xfff)
5378 return SDValue();
5379
5380 // Return if the MSB exceeds.
5381 if (SMIdx + SMLen > ValTy.getSizeInBits())
5382 return SDValue();
5383
5384 if (SMIdx > 0) {
5385 // Omit if the constant has more than 2 uses. This a conservative
5386 // decision. Whether it is a win depends on the HW microarchitecture.
5387 // However it should always be better for 1 and 2 uses.
5388 if (CN->use_size() > 2)
5389 return SDValue();
5390 // Return if the constant can be composed by a single LU12I.W.
5391 if ((CN->getZExtValue() & 0xfff) == 0)
5392 return SDValue();
5393 // Return if the constand can be composed by a single ADDI with
5394 // the zero register.
5395 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5396 return SDValue();
5397 }
5398
5399 lsb = SMIdx;
5400 NewOperand = FirstOperand;
5401 }
5402
5403 msb = lsb + SMLen - 1;
5404 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5405 DAG.getConstant(msb, DL, GRLenVT),
5406 DAG.getConstant(lsb, DL, GRLenVT));
5407 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5408 return NR0;
5409 // Try to optimize to
5410 // bstrpick $Rd, $Rs, msb, lsb
5411 // slli $Rd, $Rd, lsb
5412 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5413 DAG.getConstant(lsb, DL, GRLenVT));
5414}
5415
5418 const LoongArchSubtarget &Subtarget) {
5419 // BSTRPICK requires the 32S feature.
5420 if (!Subtarget.has32S())
5421 return SDValue();
5422
5423 if (DCI.isBeforeLegalizeOps())
5424 return SDValue();
5425
5426 // $dst = srl (and $src, Mask), Shamt
5427 // =>
5428 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5429 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5430 //
5431
5432 SDValue FirstOperand = N->getOperand(0);
5433 ConstantSDNode *CN;
5434 EVT ValTy = N->getValueType(0);
5435 SDLoc DL(N);
5436 MVT GRLenVT = Subtarget.getGRLenVT();
5437 unsigned MaskIdx, MaskLen;
5438 uint64_t Shamt;
5439
5440 // The first operand must be an AND and the second operand of the AND must be
5441 // a shifted mask.
5442 if (FirstOperand.getOpcode() != ISD::AND ||
5443 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5444 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5445 return SDValue();
5446
5447 // The second operand (shift amount) must be an immediate.
5448 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5449 return SDValue();
5450
5451 Shamt = CN->getZExtValue();
5452 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5453 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5454 FirstOperand->getOperand(0),
5455 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5456 DAG.getConstant(Shamt, DL, GRLenVT));
5457
5458 return SDValue();
5459}
5460
5461// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5462// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5463static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5464 unsigned Depth) {
5465 // Limit recursion.
5467 return false;
5468 switch (Src.getOpcode()) {
5469 case ISD::SETCC:
5470 case ISD::TRUNCATE:
5471 return Src.getOperand(0).getValueSizeInBits() == Size;
5472 case ISD::FREEZE:
5473 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5474 case ISD::AND:
5475 case ISD::XOR:
5476 case ISD::OR:
5477 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5478 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5479 case ISD::SELECT:
5480 case ISD::VSELECT:
5481 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5482 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5483 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5484 case ISD::BUILD_VECTOR:
5485 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5486 ISD::isBuildVectorAllOnes(Src.getNode());
5487 }
5488 return false;
5489}
5490
5491// Helper to push sign extension of vXi1 SETCC result through bitops.
5493 SDValue Src, const SDLoc &DL) {
5494 switch (Src.getOpcode()) {
5495 case ISD::SETCC:
5496 case ISD::FREEZE:
5497 case ISD::TRUNCATE:
5498 case ISD::BUILD_VECTOR:
5499 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5500 case ISD::AND:
5501 case ISD::XOR:
5502 case ISD::OR:
5503 return DAG.getNode(
5504 Src.getOpcode(), DL, SExtVT,
5505 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5506 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5507 case ISD::SELECT:
5508 case ISD::VSELECT:
5509 return DAG.getSelect(
5510 DL, SExtVT, Src.getOperand(0),
5511 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5512 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5513 }
5514 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5515}
5516
5517static SDValue
5520 const LoongArchSubtarget &Subtarget) {
5521 SDLoc DL(N);
5522 EVT VT = N->getValueType(0);
5523 SDValue Src = N->getOperand(0);
5524 EVT SrcVT = Src.getValueType();
5525
5526 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5527 return SDValue();
5528
5529 bool UseLASX;
5530 unsigned Opc = ISD::DELETED_NODE;
5531 EVT CmpVT = Src.getOperand(0).getValueType();
5532 EVT EltVT = CmpVT.getVectorElementType();
5533
5534 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5535 UseLASX = false;
5536 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5537 CmpVT.getSizeInBits() == 256)
5538 UseLASX = true;
5539 else
5540 return SDValue();
5541
5542 SDValue SrcN1 = Src.getOperand(1);
5543 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5544 default:
5545 break;
5546 case ISD::SETEQ:
5547 // x == 0 => not (vmsknez.b x)
5548 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5549 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5550 break;
5551 case ISD::SETGT:
5552 // x > -1 => vmskgez.b x
5553 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5554 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5555 break;
5556 case ISD::SETGE:
5557 // x >= 0 => vmskgez.b x
5558 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5559 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5560 break;
5561 case ISD::SETLT:
5562 // x < 0 => vmskltz.{b,h,w,d} x
5563 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5564 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5565 EltVT == MVT::i64))
5566 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5567 break;
5568 case ISD::SETLE:
5569 // x <= -1 => vmskltz.{b,h,w,d} x
5570 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5571 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5572 EltVT == MVT::i64))
5573 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5574 break;
5575 case ISD::SETNE:
5576 // x != 0 => vmsknez.b x
5577 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5578 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5579 break;
5580 }
5581
5582 if (Opc == ISD::DELETED_NODE)
5583 return SDValue();
5584
5585 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5587 V = DAG.getZExtOrTrunc(V, DL, T);
5588 return DAG.getBitcast(VT, V);
5589}
5590
5593 const LoongArchSubtarget &Subtarget) {
5594 SDLoc DL(N);
5595 EVT VT = N->getValueType(0);
5596 SDValue Src = N->getOperand(0);
5597 EVT SrcVT = Src.getValueType();
5598 MVT GRLenVT = Subtarget.getGRLenVT();
5599
5600 if (!DCI.isBeforeLegalizeOps())
5601 return SDValue();
5602
5603 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5604 return SDValue();
5605
5606 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5607 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5608 if (Res)
5609 return Res;
5610
5611 // Generate vXi1 using [X]VMSKLTZ
5612 MVT SExtVT;
5613 unsigned Opc;
5614 bool UseLASX = false;
5615 bool PropagateSExt = false;
5616
5617 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5618 EVT CmpVT = Src.getOperand(0).getValueType();
5619 if (CmpVT.getSizeInBits() > 256)
5620 return SDValue();
5621 }
5622
5623 switch (SrcVT.getSimpleVT().SimpleTy) {
5624 default:
5625 return SDValue();
5626 case MVT::v2i1:
5627 SExtVT = MVT::v2i64;
5628 break;
5629 case MVT::v4i1:
5630 SExtVT = MVT::v4i32;
5631 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5632 SExtVT = MVT::v4i64;
5633 UseLASX = true;
5634 PropagateSExt = true;
5635 }
5636 break;
5637 case MVT::v8i1:
5638 SExtVT = MVT::v8i16;
5639 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5640 SExtVT = MVT::v8i32;
5641 UseLASX = true;
5642 PropagateSExt = true;
5643 }
5644 break;
5645 case MVT::v16i1:
5646 SExtVT = MVT::v16i8;
5647 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5648 SExtVT = MVT::v16i16;
5649 UseLASX = true;
5650 PropagateSExt = true;
5651 }
5652 break;
5653 case MVT::v32i1:
5654 SExtVT = MVT::v32i8;
5655 UseLASX = true;
5656 break;
5657 };
5658 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5659 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5660
5661 SDValue V;
5662 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5663 if (Src.getSimpleValueType() == MVT::v32i8) {
5664 SDValue Lo, Hi;
5665 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5666 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5667 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5668 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5669 DAG.getShiftAmountConstant(16, GRLenVT, DL));
5670 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5671 } else if (UseLASX) {
5672 return SDValue();
5673 }
5674 }
5675
5676 if (!V) {
5677 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5678 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5679 }
5680
5682 V = DAG.getZExtOrTrunc(V, DL, T);
5683 return DAG.getBitcast(VT, V);
5684}
5685
5688 const LoongArchSubtarget &Subtarget) {
5689 MVT GRLenVT = Subtarget.getGRLenVT();
5690 EVT ValTy = N->getValueType(0);
5691 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5692 ConstantSDNode *CN0, *CN1;
5693 SDLoc DL(N);
5694 unsigned ValBits = ValTy.getSizeInBits();
5695 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5696 unsigned Shamt;
5697 bool SwapAndRetried = false;
5698
5699 // BSTRPICK requires the 32S feature.
5700 if (!Subtarget.has32S())
5701 return SDValue();
5702
5703 if (DCI.isBeforeLegalizeOps())
5704 return SDValue();
5705
5706 if (ValBits != 32 && ValBits != 64)
5707 return SDValue();
5708
5709Retry:
5710 // 1st pattern to match BSTRINS:
5711 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5712 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5713 // =>
5714 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5715 if (N0.getOpcode() == ISD::AND &&
5716 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5717 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5718 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5719 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5720 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5721 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5722 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5723 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5724 (MaskIdx0 + MaskLen0 <= ValBits)) {
5725 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5726 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5727 N1.getOperand(0).getOperand(0),
5728 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5729 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5730 }
5731
5732 // 2nd pattern to match BSTRINS:
5733 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5734 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5735 // =>
5736 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5737 if (N0.getOpcode() == ISD::AND &&
5738 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5739 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5740 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5741 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5742 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5743 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5744 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5745 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5746 (MaskIdx0 + MaskLen0 <= ValBits)) {
5747 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5748 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5749 N1.getOperand(0).getOperand(0),
5750 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5751 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5752 }
5753
5754 // 3rd pattern to match BSTRINS:
5755 // R = or (and X, mask0), (and Y, mask1)
5756 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5757 // =>
5758 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5759 // where msb = lsb + size - 1
5760 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5761 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5762 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5763 (MaskIdx0 + MaskLen0 <= 64) &&
5764 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5765 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5766 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5767 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5768 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5769 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5770 DAG.getConstant(ValBits == 32
5771 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5772 : (MaskIdx0 + MaskLen0 - 1),
5773 DL, GRLenVT),
5774 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5775 }
5776
5777 // 4th pattern to match BSTRINS:
5778 // R = or (and X, mask), (shl Y, shamt)
5779 // where mask = (2**shamt - 1)
5780 // =>
5781 // R = BSTRINS X, Y, ValBits - 1, shamt
5782 // where ValBits = 32 or 64
5783 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5784 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5785 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5786 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5787 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5788 (MaskIdx0 + MaskLen0 <= ValBits)) {
5789 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5790 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5791 N1.getOperand(0),
5792 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5793 DAG.getConstant(Shamt, DL, GRLenVT));
5794 }
5795
5796 // 5th pattern to match BSTRINS:
5797 // R = or (and X, mask), const
5798 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5799 // =>
5800 // R = BSTRINS X, (const >> lsb), msb, lsb
5801 // where msb = lsb + size - 1
5802 if (N0.getOpcode() == ISD::AND &&
5803 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5804 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5805 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5806 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5807 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5808 return DAG.getNode(
5809 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5810 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5811 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5812 : (MaskIdx0 + MaskLen0 - 1),
5813 DL, GRLenVT),
5814 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5815 }
5816
5817 // 6th pattern.
5818 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5819 // by the incoming bits are known to be zero.
5820 // =>
5821 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5822 //
5823 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5824 // pattern is more common than the 1st. So we put the 1st before the 6th in
5825 // order to match as many nodes as possible.
5826 ConstantSDNode *CNMask, *CNShamt;
5827 unsigned MaskIdx, MaskLen;
5828 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5829 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5830 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5831 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5832 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5833 Shamt = CNShamt->getZExtValue();
5834 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5835 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5836 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5837 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5838 N1.getOperand(0).getOperand(0),
5839 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5840 DAG.getConstant(Shamt, DL, GRLenVT));
5841 }
5842 }
5843
5844 // 7th pattern.
5845 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5846 // overwritten by the incoming bits are known to be zero.
5847 // =>
5848 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5849 //
5850 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5851 // before the 7th in order to match as many nodes as possible.
5852 if (N1.getOpcode() == ISD::AND &&
5853 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5854 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5855 N1.getOperand(0).getOpcode() == ISD::SHL &&
5856 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5857 CNShamt->getZExtValue() == MaskIdx) {
5858 APInt ShMask(ValBits, CNMask->getZExtValue());
5859 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5860 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5861 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5862 N1.getOperand(0).getOperand(0),
5863 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5864 DAG.getConstant(MaskIdx, DL, GRLenVT));
5865 }
5866 }
5867
5868 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5869 if (!SwapAndRetried) {
5870 std::swap(N0, N1);
5871 SwapAndRetried = true;
5872 goto Retry;
5873 }
5874
5875 SwapAndRetried = false;
5876Retry2:
5877 // 8th pattern.
5878 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5879 // the incoming bits are known to be zero.
5880 // =>
5881 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5882 //
5883 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5884 // we put it here in order to match as many nodes as possible or generate less
5885 // instructions.
5886 if (N1.getOpcode() == ISD::AND &&
5887 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5888 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5889 APInt ShMask(ValBits, CNMask->getZExtValue());
5890 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5891 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5892 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5893 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5894 N1->getOperand(0),
5895 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5896 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5897 DAG.getConstant(MaskIdx, DL, GRLenVT));
5898 }
5899 }
5900 // Swap N0/N1 and retry.
5901 if (!SwapAndRetried) {
5902 std::swap(N0, N1);
5903 SwapAndRetried = true;
5904 goto Retry2;
5905 }
5906
5907 return SDValue();
5908}
5909
5910static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5911 ExtType = ISD::NON_EXTLOAD;
5912
5913 switch (V.getNode()->getOpcode()) {
5914 case ISD::LOAD: {
5915 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5916 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5917 (LoadNode->getMemoryVT() == MVT::i16)) {
5918 ExtType = LoadNode->getExtensionType();
5919 return true;
5920 }
5921 return false;
5922 }
5923 case ISD::AssertSext: {
5924 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5925 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5926 ExtType = ISD::SEXTLOAD;
5927 return true;
5928 }
5929 return false;
5930 }
5931 case ISD::AssertZext: {
5932 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5933 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5934 ExtType = ISD::ZEXTLOAD;
5935 return true;
5936 }
5937 return false;
5938 }
5939 default:
5940 return false;
5941 }
5942
5943 return false;
5944}
5945
5946// Eliminate redundant truncation and zero-extension nodes.
5947// * Case 1:
5948// +------------+ +------------+ +------------+
5949// | Input1 | | Input2 | | CC |
5950// +------------+ +------------+ +------------+
5951// | | |
5952// V V +----+
5953// +------------+ +------------+ |
5954// | TRUNCATE | | TRUNCATE | |
5955// +------------+ +------------+ |
5956// | | |
5957// V V |
5958// +------------+ +------------+ |
5959// | ZERO_EXT | | ZERO_EXT | |
5960// +------------+ +------------+ |
5961// | | |
5962// | +-------------+ |
5963// V V | |
5964// +----------------+ | |
5965// | AND | | |
5966// +----------------+ | |
5967// | | |
5968// +---------------+ | |
5969// | | |
5970// V V V
5971// +-------------+
5972// | CMP |
5973// +-------------+
5974// * Case 2:
5975// +------------+ +------------+ +-------------+ +------------+ +------------+
5976// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5977// +------------+ +------------+ +-------------+ +------------+ +------------+
5978// | | | | |
5979// V | | | |
5980// +------------+ | | | |
5981// | XOR |<---------------------+ | |
5982// +------------+ | | |
5983// | | | |
5984// V V +---------------+ |
5985// +------------+ +------------+ | |
5986// | TRUNCATE | | TRUNCATE | | +-------------------------+
5987// +------------+ +------------+ | |
5988// | | | |
5989// V V | |
5990// +------------+ +------------+ | |
5991// | ZERO_EXT | | ZERO_EXT | | |
5992// +------------+ +------------+ | |
5993// | | | |
5994// V V | |
5995// +----------------+ | |
5996// | AND | | |
5997// +----------------+ | |
5998// | | |
5999// +---------------+ | |
6000// | | |
6001// V V V
6002// +-------------+
6003// | CMP |
6004// +-------------+
6007 const LoongArchSubtarget &Subtarget) {
6008 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
6009
6010 SDNode *AndNode = N->getOperand(0).getNode();
6011 if (AndNode->getOpcode() != ISD::AND)
6012 return SDValue();
6013
6014 SDValue AndInputValue2 = AndNode->getOperand(1);
6015 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
6016 return SDValue();
6017
6018 SDValue CmpInputValue = N->getOperand(1);
6019 SDValue AndInputValue1 = AndNode->getOperand(0);
6020 if (AndInputValue1.getOpcode() == ISD::XOR) {
6021 if (CC != ISD::SETEQ && CC != ISD::SETNE)
6022 return SDValue();
6023 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
6024 if (!CN || !CN->isAllOnes())
6025 return SDValue();
6026 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
6027 if (!CN || !CN->isZero())
6028 return SDValue();
6029 AndInputValue1 = AndInputValue1.getOperand(0);
6030 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
6031 return SDValue();
6032 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
6033 if (AndInputValue2 != CmpInputValue)
6034 return SDValue();
6035 } else {
6036 return SDValue();
6037 }
6038
6039 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
6040 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
6041 return SDValue();
6042
6043 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
6044 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
6045 return SDValue();
6046
6047 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
6048 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
6049 ISD::LoadExtType ExtType1;
6050 ISD::LoadExtType ExtType2;
6051
6052 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
6053 !checkValueWidth(TruncInputValue2, ExtType2))
6054 return SDValue();
6055
6056 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
6057 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
6058 return SDValue();
6059
6060 if ((ExtType2 != ISD::ZEXTLOAD) &&
6061 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
6062 return SDValue();
6063
6064 // These truncation and zero-extension nodes are not necessary, remove them.
6065 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
6066 TruncInputValue1, TruncInputValue2);
6067 SDValue NewSetCC =
6068 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
6069 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
6070 return SDValue(N, 0);
6071}
6072
6073// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
6076 const LoongArchSubtarget &Subtarget) {
6077 if (DCI.isBeforeLegalizeOps())
6078 return SDValue();
6079
6080 SDValue Src = N->getOperand(0);
6081 if (Src.getOpcode() != LoongArchISD::REVB_2W)
6082 return SDValue();
6083
6084 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
6085 Src.getOperand(0));
6086}
6087
6088// Perform common combines for BR_CC and SELECT_CC conditions.
6089static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6090 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6091 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6092
6093 // As far as arithmetic right shift always saves the sign,
6094 // shift can be omitted.
6095 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6096 // setge (sra X, N), 0 -> setge X, 0
6097 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6098 LHS.getOpcode() == ISD::SRA) {
6099 LHS = LHS.getOperand(0);
6100 return true;
6101 }
6102
6103 if (!ISD::isIntEqualitySetCC(CCVal))
6104 return false;
6105
6106 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6107 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6108 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6109 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6110 // If we're looking for eq 0 instead of ne 0, we need to invert the
6111 // condition.
6112 bool Invert = CCVal == ISD::SETEQ;
6113 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6114 if (Invert)
6115 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6116
6117 RHS = LHS.getOperand(1);
6118 LHS = LHS.getOperand(0);
6119 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6120
6121 CC = DAG.getCondCode(CCVal);
6122 return true;
6123 }
6124
6125 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6126 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6127 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6128 SDValue LHS0 = LHS.getOperand(0);
6129 if (LHS0.getOpcode() == ISD::AND &&
6130 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6131 uint64_t Mask = LHS0.getConstantOperandVal(1);
6132 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6133 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6134 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6135 CC = DAG.getCondCode(CCVal);
6136
6137 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6138 LHS = LHS0.getOperand(0);
6139 if (ShAmt != 0)
6140 LHS =
6141 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6142 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6143 return true;
6144 }
6145 }
6146 }
6147
6148 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6149 // This can occur when legalizing some floating point comparisons.
6150 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6151 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6152 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6153 CC = DAG.getCondCode(CCVal);
6154 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6155 return true;
6156 }
6157
6158 return false;
6159}
6160
6163 const LoongArchSubtarget &Subtarget) {
6164 SDValue LHS = N->getOperand(1);
6165 SDValue RHS = N->getOperand(2);
6166 SDValue CC = N->getOperand(3);
6167 SDLoc DL(N);
6168
6169 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6170 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6171 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6172
6173 return SDValue();
6174}
6175
6178 const LoongArchSubtarget &Subtarget) {
6179 // Transform
6180 SDValue LHS = N->getOperand(0);
6181 SDValue RHS = N->getOperand(1);
6182 SDValue CC = N->getOperand(2);
6183 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6184 SDValue TrueV = N->getOperand(3);
6185 SDValue FalseV = N->getOperand(4);
6186 SDLoc DL(N);
6187 EVT VT = N->getValueType(0);
6188
6189 // If the True and False values are the same, we don't need a select_cc.
6190 if (TrueV == FalseV)
6191 return TrueV;
6192
6193 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6194 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6195 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6197 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6198 if (CCVal == ISD::CondCode::SETGE)
6199 std::swap(TrueV, FalseV);
6200
6201 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6202 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6203 // Only handle simm12, if it is not in this range, it can be considered as
6204 // register.
6205 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6206 isInt<12>(TrueSImm - FalseSImm)) {
6207 SDValue SRA =
6208 DAG.getNode(ISD::SRA, DL, VT, LHS,
6209 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6210 SDValue AND =
6211 DAG.getNode(ISD::AND, DL, VT, SRA,
6212 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6213 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6214 }
6215
6216 if (CCVal == ISD::CondCode::SETGE)
6217 std::swap(TrueV, FalseV);
6218 }
6219
6220 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6221 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6222 {LHS, RHS, CC, TrueV, FalseV});
6223
6224 return SDValue();
6225}
6226
6227template <unsigned N>
6229 SelectionDAG &DAG,
6230 const LoongArchSubtarget &Subtarget,
6231 bool IsSigned = false) {
6232 SDLoc DL(Node);
6233 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6234 // Check the ImmArg.
6235 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6236 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6237 DAG.getContext()->emitError(Node->getOperationName(0) +
6238 ": argument out of range.");
6239 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6240 }
6241 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6242}
6243
6244template <unsigned N>
6245static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6246 SelectionDAG &DAG, bool IsSigned = false) {
6247 SDLoc DL(Node);
6248 EVT ResTy = Node->getValueType(0);
6249 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6250
6251 // Check the ImmArg.
6252 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6253 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6254 DAG.getContext()->emitError(Node->getOperationName(0) +
6255 ": argument out of range.");
6256 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6257 }
6258 return DAG.getConstant(
6260 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6261 DL, ResTy);
6262}
6263
6265 SDLoc DL(Node);
6266 EVT ResTy = Node->getValueType(0);
6267 SDValue Vec = Node->getOperand(2);
6268 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6269 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6270}
6271
6273 SDLoc DL(Node);
6274 EVT ResTy = Node->getValueType(0);
6275 SDValue One = DAG.getConstant(1, DL, ResTy);
6276 SDValue Bit =
6277 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6278
6279 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6280 DAG.getNOT(DL, Bit, ResTy));
6281}
6282
6283template <unsigned N>
6285 SDLoc DL(Node);
6286 EVT ResTy = Node->getValueType(0);
6287 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6288 // Check the unsigned ImmArg.
6289 if (!isUInt<N>(CImm->getZExtValue())) {
6290 DAG.getContext()->emitError(Node->getOperationName(0) +
6291 ": argument out of range.");
6292 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6293 }
6294
6295 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6296 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6297
6298 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6299}
6300
6301template <unsigned N>
6303 SDLoc DL(Node);
6304 EVT ResTy = Node->getValueType(0);
6305 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6306 // Check the unsigned ImmArg.
6307 if (!isUInt<N>(CImm->getZExtValue())) {
6308 DAG.getContext()->emitError(Node->getOperationName(0) +
6309 ": argument out of range.");
6310 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6311 }
6312
6313 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6314 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6315 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6316}
6317
6318template <unsigned N>
6320 SDLoc DL(Node);
6321 EVT ResTy = Node->getValueType(0);
6322 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6323 // Check the unsigned ImmArg.
6324 if (!isUInt<N>(CImm->getZExtValue())) {
6325 DAG.getContext()->emitError(Node->getOperationName(0) +
6326 ": argument out of range.");
6327 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6328 }
6329
6330 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6331 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6332 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6333}
6334
6335template <unsigned W>
6337 unsigned ResOp) {
6338 unsigned Imm = N->getConstantOperandVal(2);
6339 if (!isUInt<W>(Imm)) {
6340 const StringRef ErrorMsg = "argument out of range";
6341 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6342 return DAG.getUNDEF(N->getValueType(0));
6343 }
6344 SDLoc DL(N);
6345 SDValue Vec = N->getOperand(1);
6346 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6348 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6349}
6350
6351static SDValue
6354 const LoongArchSubtarget &Subtarget) {
6355 SDLoc DL(N);
6356 switch (N->getConstantOperandVal(0)) {
6357 default:
6358 break;
6359 case Intrinsic::loongarch_lsx_vadd_b:
6360 case Intrinsic::loongarch_lsx_vadd_h:
6361 case Intrinsic::loongarch_lsx_vadd_w:
6362 case Intrinsic::loongarch_lsx_vadd_d:
6363 case Intrinsic::loongarch_lasx_xvadd_b:
6364 case Intrinsic::loongarch_lasx_xvadd_h:
6365 case Intrinsic::loongarch_lasx_xvadd_w:
6366 case Intrinsic::loongarch_lasx_xvadd_d:
6367 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6368 N->getOperand(2));
6369 case Intrinsic::loongarch_lsx_vaddi_bu:
6370 case Intrinsic::loongarch_lsx_vaddi_hu:
6371 case Intrinsic::loongarch_lsx_vaddi_wu:
6372 case Intrinsic::loongarch_lsx_vaddi_du:
6373 case Intrinsic::loongarch_lasx_xvaddi_bu:
6374 case Intrinsic::loongarch_lasx_xvaddi_hu:
6375 case Intrinsic::loongarch_lasx_xvaddi_wu:
6376 case Intrinsic::loongarch_lasx_xvaddi_du:
6377 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6378 lowerVectorSplatImm<5>(N, 2, DAG));
6379 case Intrinsic::loongarch_lsx_vsub_b:
6380 case Intrinsic::loongarch_lsx_vsub_h:
6381 case Intrinsic::loongarch_lsx_vsub_w:
6382 case Intrinsic::loongarch_lsx_vsub_d:
6383 case Intrinsic::loongarch_lasx_xvsub_b:
6384 case Intrinsic::loongarch_lasx_xvsub_h:
6385 case Intrinsic::loongarch_lasx_xvsub_w:
6386 case Intrinsic::loongarch_lasx_xvsub_d:
6387 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6388 N->getOperand(2));
6389 case Intrinsic::loongarch_lsx_vsubi_bu:
6390 case Intrinsic::loongarch_lsx_vsubi_hu:
6391 case Intrinsic::loongarch_lsx_vsubi_wu:
6392 case Intrinsic::loongarch_lsx_vsubi_du:
6393 case Intrinsic::loongarch_lasx_xvsubi_bu:
6394 case Intrinsic::loongarch_lasx_xvsubi_hu:
6395 case Intrinsic::loongarch_lasx_xvsubi_wu:
6396 case Intrinsic::loongarch_lasx_xvsubi_du:
6397 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6398 lowerVectorSplatImm<5>(N, 2, DAG));
6399 case Intrinsic::loongarch_lsx_vneg_b:
6400 case Intrinsic::loongarch_lsx_vneg_h:
6401 case Intrinsic::loongarch_lsx_vneg_w:
6402 case Intrinsic::loongarch_lsx_vneg_d:
6403 case Intrinsic::loongarch_lasx_xvneg_b:
6404 case Intrinsic::loongarch_lasx_xvneg_h:
6405 case Intrinsic::loongarch_lasx_xvneg_w:
6406 case Intrinsic::loongarch_lasx_xvneg_d:
6407 return DAG.getNode(
6408 ISD::SUB, DL, N->getValueType(0),
6409 DAG.getConstant(
6410 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6411 /*isSigned=*/true),
6412 SDLoc(N), N->getValueType(0)),
6413 N->getOperand(1));
6414 case Intrinsic::loongarch_lsx_vmax_b:
6415 case Intrinsic::loongarch_lsx_vmax_h:
6416 case Intrinsic::loongarch_lsx_vmax_w:
6417 case Intrinsic::loongarch_lsx_vmax_d:
6418 case Intrinsic::loongarch_lasx_xvmax_b:
6419 case Intrinsic::loongarch_lasx_xvmax_h:
6420 case Intrinsic::loongarch_lasx_xvmax_w:
6421 case Intrinsic::loongarch_lasx_xvmax_d:
6422 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6423 N->getOperand(2));
6424 case Intrinsic::loongarch_lsx_vmax_bu:
6425 case Intrinsic::loongarch_lsx_vmax_hu:
6426 case Intrinsic::loongarch_lsx_vmax_wu:
6427 case Intrinsic::loongarch_lsx_vmax_du:
6428 case Intrinsic::loongarch_lasx_xvmax_bu:
6429 case Intrinsic::loongarch_lasx_xvmax_hu:
6430 case Intrinsic::loongarch_lasx_xvmax_wu:
6431 case Intrinsic::loongarch_lasx_xvmax_du:
6432 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6433 N->getOperand(2));
6434 case Intrinsic::loongarch_lsx_vmaxi_b:
6435 case Intrinsic::loongarch_lsx_vmaxi_h:
6436 case Intrinsic::loongarch_lsx_vmaxi_w:
6437 case Intrinsic::loongarch_lsx_vmaxi_d:
6438 case Intrinsic::loongarch_lasx_xvmaxi_b:
6439 case Intrinsic::loongarch_lasx_xvmaxi_h:
6440 case Intrinsic::loongarch_lasx_xvmaxi_w:
6441 case Intrinsic::loongarch_lasx_xvmaxi_d:
6442 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6443 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6444 case Intrinsic::loongarch_lsx_vmaxi_bu:
6445 case Intrinsic::loongarch_lsx_vmaxi_hu:
6446 case Intrinsic::loongarch_lsx_vmaxi_wu:
6447 case Intrinsic::loongarch_lsx_vmaxi_du:
6448 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6449 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6450 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6451 case Intrinsic::loongarch_lasx_xvmaxi_du:
6452 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6453 lowerVectorSplatImm<5>(N, 2, DAG));
6454 case Intrinsic::loongarch_lsx_vmin_b:
6455 case Intrinsic::loongarch_lsx_vmin_h:
6456 case Intrinsic::loongarch_lsx_vmin_w:
6457 case Intrinsic::loongarch_lsx_vmin_d:
6458 case Intrinsic::loongarch_lasx_xvmin_b:
6459 case Intrinsic::loongarch_lasx_xvmin_h:
6460 case Intrinsic::loongarch_lasx_xvmin_w:
6461 case Intrinsic::loongarch_lasx_xvmin_d:
6462 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6463 N->getOperand(2));
6464 case Intrinsic::loongarch_lsx_vmin_bu:
6465 case Intrinsic::loongarch_lsx_vmin_hu:
6466 case Intrinsic::loongarch_lsx_vmin_wu:
6467 case Intrinsic::loongarch_lsx_vmin_du:
6468 case Intrinsic::loongarch_lasx_xvmin_bu:
6469 case Intrinsic::loongarch_lasx_xvmin_hu:
6470 case Intrinsic::loongarch_lasx_xvmin_wu:
6471 case Intrinsic::loongarch_lasx_xvmin_du:
6472 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6473 N->getOperand(2));
6474 case Intrinsic::loongarch_lsx_vmini_b:
6475 case Intrinsic::loongarch_lsx_vmini_h:
6476 case Intrinsic::loongarch_lsx_vmini_w:
6477 case Intrinsic::loongarch_lsx_vmini_d:
6478 case Intrinsic::loongarch_lasx_xvmini_b:
6479 case Intrinsic::loongarch_lasx_xvmini_h:
6480 case Intrinsic::loongarch_lasx_xvmini_w:
6481 case Intrinsic::loongarch_lasx_xvmini_d:
6482 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6483 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6484 case Intrinsic::loongarch_lsx_vmini_bu:
6485 case Intrinsic::loongarch_lsx_vmini_hu:
6486 case Intrinsic::loongarch_lsx_vmini_wu:
6487 case Intrinsic::loongarch_lsx_vmini_du:
6488 case Intrinsic::loongarch_lasx_xvmini_bu:
6489 case Intrinsic::loongarch_lasx_xvmini_hu:
6490 case Intrinsic::loongarch_lasx_xvmini_wu:
6491 case Intrinsic::loongarch_lasx_xvmini_du:
6492 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6493 lowerVectorSplatImm<5>(N, 2, DAG));
6494 case Intrinsic::loongarch_lsx_vmul_b:
6495 case Intrinsic::loongarch_lsx_vmul_h:
6496 case Intrinsic::loongarch_lsx_vmul_w:
6497 case Intrinsic::loongarch_lsx_vmul_d:
6498 case Intrinsic::loongarch_lasx_xvmul_b:
6499 case Intrinsic::loongarch_lasx_xvmul_h:
6500 case Intrinsic::loongarch_lasx_xvmul_w:
6501 case Intrinsic::loongarch_lasx_xvmul_d:
6502 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6503 N->getOperand(2));
6504 case Intrinsic::loongarch_lsx_vmadd_b:
6505 case Intrinsic::loongarch_lsx_vmadd_h:
6506 case Intrinsic::loongarch_lsx_vmadd_w:
6507 case Intrinsic::loongarch_lsx_vmadd_d:
6508 case Intrinsic::loongarch_lasx_xvmadd_b:
6509 case Intrinsic::loongarch_lasx_xvmadd_h:
6510 case Intrinsic::loongarch_lasx_xvmadd_w:
6511 case Intrinsic::loongarch_lasx_xvmadd_d: {
6512 EVT ResTy = N->getValueType(0);
6513 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6514 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6515 N->getOperand(3)));
6516 }
6517 case Intrinsic::loongarch_lsx_vmsub_b:
6518 case Intrinsic::loongarch_lsx_vmsub_h:
6519 case Intrinsic::loongarch_lsx_vmsub_w:
6520 case Intrinsic::loongarch_lsx_vmsub_d:
6521 case Intrinsic::loongarch_lasx_xvmsub_b:
6522 case Intrinsic::loongarch_lasx_xvmsub_h:
6523 case Intrinsic::loongarch_lasx_xvmsub_w:
6524 case Intrinsic::loongarch_lasx_xvmsub_d: {
6525 EVT ResTy = N->getValueType(0);
6526 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6527 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6528 N->getOperand(3)));
6529 }
6530 case Intrinsic::loongarch_lsx_vdiv_b:
6531 case Intrinsic::loongarch_lsx_vdiv_h:
6532 case Intrinsic::loongarch_lsx_vdiv_w:
6533 case Intrinsic::loongarch_lsx_vdiv_d:
6534 case Intrinsic::loongarch_lasx_xvdiv_b:
6535 case Intrinsic::loongarch_lasx_xvdiv_h:
6536 case Intrinsic::loongarch_lasx_xvdiv_w:
6537 case Intrinsic::loongarch_lasx_xvdiv_d:
6538 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6539 N->getOperand(2));
6540 case Intrinsic::loongarch_lsx_vdiv_bu:
6541 case Intrinsic::loongarch_lsx_vdiv_hu:
6542 case Intrinsic::loongarch_lsx_vdiv_wu:
6543 case Intrinsic::loongarch_lsx_vdiv_du:
6544 case Intrinsic::loongarch_lasx_xvdiv_bu:
6545 case Intrinsic::loongarch_lasx_xvdiv_hu:
6546 case Intrinsic::loongarch_lasx_xvdiv_wu:
6547 case Intrinsic::loongarch_lasx_xvdiv_du:
6548 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6549 N->getOperand(2));
6550 case Intrinsic::loongarch_lsx_vmod_b:
6551 case Intrinsic::loongarch_lsx_vmod_h:
6552 case Intrinsic::loongarch_lsx_vmod_w:
6553 case Intrinsic::loongarch_lsx_vmod_d:
6554 case Intrinsic::loongarch_lasx_xvmod_b:
6555 case Intrinsic::loongarch_lasx_xvmod_h:
6556 case Intrinsic::loongarch_lasx_xvmod_w:
6557 case Intrinsic::loongarch_lasx_xvmod_d:
6558 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6559 N->getOperand(2));
6560 case Intrinsic::loongarch_lsx_vmod_bu:
6561 case Intrinsic::loongarch_lsx_vmod_hu:
6562 case Intrinsic::loongarch_lsx_vmod_wu:
6563 case Intrinsic::loongarch_lsx_vmod_du:
6564 case Intrinsic::loongarch_lasx_xvmod_bu:
6565 case Intrinsic::loongarch_lasx_xvmod_hu:
6566 case Intrinsic::loongarch_lasx_xvmod_wu:
6567 case Intrinsic::loongarch_lasx_xvmod_du:
6568 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6569 N->getOperand(2));
6570 case Intrinsic::loongarch_lsx_vand_v:
6571 case Intrinsic::loongarch_lasx_xvand_v:
6572 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6573 N->getOperand(2));
6574 case Intrinsic::loongarch_lsx_vor_v:
6575 case Intrinsic::loongarch_lasx_xvor_v:
6576 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6577 N->getOperand(2));
6578 case Intrinsic::loongarch_lsx_vxor_v:
6579 case Intrinsic::loongarch_lasx_xvxor_v:
6580 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6581 N->getOperand(2));
6582 case Intrinsic::loongarch_lsx_vnor_v:
6583 case Intrinsic::loongarch_lasx_xvnor_v: {
6584 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6585 N->getOperand(2));
6586 return DAG.getNOT(DL, Res, Res->getValueType(0));
6587 }
6588 case Intrinsic::loongarch_lsx_vandi_b:
6589 case Intrinsic::loongarch_lasx_xvandi_b:
6590 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6591 lowerVectorSplatImm<8>(N, 2, DAG));
6592 case Intrinsic::loongarch_lsx_vori_b:
6593 case Intrinsic::loongarch_lasx_xvori_b:
6594 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6595 lowerVectorSplatImm<8>(N, 2, DAG));
6596 case Intrinsic::loongarch_lsx_vxori_b:
6597 case Intrinsic::loongarch_lasx_xvxori_b:
6598 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6599 lowerVectorSplatImm<8>(N, 2, DAG));
6600 case Intrinsic::loongarch_lsx_vsll_b:
6601 case Intrinsic::loongarch_lsx_vsll_h:
6602 case Intrinsic::loongarch_lsx_vsll_w:
6603 case Intrinsic::loongarch_lsx_vsll_d:
6604 case Intrinsic::loongarch_lasx_xvsll_b:
6605 case Intrinsic::loongarch_lasx_xvsll_h:
6606 case Intrinsic::loongarch_lasx_xvsll_w:
6607 case Intrinsic::loongarch_lasx_xvsll_d:
6608 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6609 truncateVecElts(N, DAG));
6610 case Intrinsic::loongarch_lsx_vslli_b:
6611 case Intrinsic::loongarch_lasx_xvslli_b:
6612 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6613 lowerVectorSplatImm<3>(N, 2, DAG));
6614 case Intrinsic::loongarch_lsx_vslli_h:
6615 case Intrinsic::loongarch_lasx_xvslli_h:
6616 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6617 lowerVectorSplatImm<4>(N, 2, DAG));
6618 case Intrinsic::loongarch_lsx_vslli_w:
6619 case Intrinsic::loongarch_lasx_xvslli_w:
6620 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6621 lowerVectorSplatImm<5>(N, 2, DAG));
6622 case Intrinsic::loongarch_lsx_vslli_d:
6623 case Intrinsic::loongarch_lasx_xvslli_d:
6624 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6625 lowerVectorSplatImm<6>(N, 2, DAG));
6626 case Intrinsic::loongarch_lsx_vsrl_b:
6627 case Intrinsic::loongarch_lsx_vsrl_h:
6628 case Intrinsic::loongarch_lsx_vsrl_w:
6629 case Intrinsic::loongarch_lsx_vsrl_d:
6630 case Intrinsic::loongarch_lasx_xvsrl_b:
6631 case Intrinsic::loongarch_lasx_xvsrl_h:
6632 case Intrinsic::loongarch_lasx_xvsrl_w:
6633 case Intrinsic::loongarch_lasx_xvsrl_d:
6634 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6635 truncateVecElts(N, DAG));
6636 case Intrinsic::loongarch_lsx_vsrli_b:
6637 case Intrinsic::loongarch_lasx_xvsrli_b:
6638 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6639 lowerVectorSplatImm<3>(N, 2, DAG));
6640 case Intrinsic::loongarch_lsx_vsrli_h:
6641 case Intrinsic::loongarch_lasx_xvsrli_h:
6642 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6643 lowerVectorSplatImm<4>(N, 2, DAG));
6644 case Intrinsic::loongarch_lsx_vsrli_w:
6645 case Intrinsic::loongarch_lasx_xvsrli_w:
6646 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6647 lowerVectorSplatImm<5>(N, 2, DAG));
6648 case Intrinsic::loongarch_lsx_vsrli_d:
6649 case Intrinsic::loongarch_lasx_xvsrli_d:
6650 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6651 lowerVectorSplatImm<6>(N, 2, DAG));
6652 case Intrinsic::loongarch_lsx_vsra_b:
6653 case Intrinsic::loongarch_lsx_vsra_h:
6654 case Intrinsic::loongarch_lsx_vsra_w:
6655 case Intrinsic::loongarch_lsx_vsra_d:
6656 case Intrinsic::loongarch_lasx_xvsra_b:
6657 case Intrinsic::loongarch_lasx_xvsra_h:
6658 case Intrinsic::loongarch_lasx_xvsra_w:
6659 case Intrinsic::loongarch_lasx_xvsra_d:
6660 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6661 truncateVecElts(N, DAG));
6662 case Intrinsic::loongarch_lsx_vsrai_b:
6663 case Intrinsic::loongarch_lasx_xvsrai_b:
6664 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6665 lowerVectorSplatImm<3>(N, 2, DAG));
6666 case Intrinsic::loongarch_lsx_vsrai_h:
6667 case Intrinsic::loongarch_lasx_xvsrai_h:
6668 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6669 lowerVectorSplatImm<4>(N, 2, DAG));
6670 case Intrinsic::loongarch_lsx_vsrai_w:
6671 case Intrinsic::loongarch_lasx_xvsrai_w:
6672 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6673 lowerVectorSplatImm<5>(N, 2, DAG));
6674 case Intrinsic::loongarch_lsx_vsrai_d:
6675 case Intrinsic::loongarch_lasx_xvsrai_d:
6676 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6677 lowerVectorSplatImm<6>(N, 2, DAG));
6678 case Intrinsic::loongarch_lsx_vclz_b:
6679 case Intrinsic::loongarch_lsx_vclz_h:
6680 case Intrinsic::loongarch_lsx_vclz_w:
6681 case Intrinsic::loongarch_lsx_vclz_d:
6682 case Intrinsic::loongarch_lasx_xvclz_b:
6683 case Intrinsic::loongarch_lasx_xvclz_h:
6684 case Intrinsic::loongarch_lasx_xvclz_w:
6685 case Intrinsic::loongarch_lasx_xvclz_d:
6686 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6687 case Intrinsic::loongarch_lsx_vpcnt_b:
6688 case Intrinsic::loongarch_lsx_vpcnt_h:
6689 case Intrinsic::loongarch_lsx_vpcnt_w:
6690 case Intrinsic::loongarch_lsx_vpcnt_d:
6691 case Intrinsic::loongarch_lasx_xvpcnt_b:
6692 case Intrinsic::loongarch_lasx_xvpcnt_h:
6693 case Intrinsic::loongarch_lasx_xvpcnt_w:
6694 case Intrinsic::loongarch_lasx_xvpcnt_d:
6695 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6696 case Intrinsic::loongarch_lsx_vbitclr_b:
6697 case Intrinsic::loongarch_lsx_vbitclr_h:
6698 case Intrinsic::loongarch_lsx_vbitclr_w:
6699 case Intrinsic::loongarch_lsx_vbitclr_d:
6700 case Intrinsic::loongarch_lasx_xvbitclr_b:
6701 case Intrinsic::loongarch_lasx_xvbitclr_h:
6702 case Intrinsic::loongarch_lasx_xvbitclr_w:
6703 case Intrinsic::loongarch_lasx_xvbitclr_d:
6704 return lowerVectorBitClear(N, DAG);
6705 case Intrinsic::loongarch_lsx_vbitclri_b:
6706 case Intrinsic::loongarch_lasx_xvbitclri_b:
6707 return lowerVectorBitClearImm<3>(N, DAG);
6708 case Intrinsic::loongarch_lsx_vbitclri_h:
6709 case Intrinsic::loongarch_lasx_xvbitclri_h:
6710 return lowerVectorBitClearImm<4>(N, DAG);
6711 case Intrinsic::loongarch_lsx_vbitclri_w:
6712 case Intrinsic::loongarch_lasx_xvbitclri_w:
6713 return lowerVectorBitClearImm<5>(N, DAG);
6714 case Intrinsic::loongarch_lsx_vbitclri_d:
6715 case Intrinsic::loongarch_lasx_xvbitclri_d:
6716 return lowerVectorBitClearImm<6>(N, DAG);
6717 case Intrinsic::loongarch_lsx_vbitset_b:
6718 case Intrinsic::loongarch_lsx_vbitset_h:
6719 case Intrinsic::loongarch_lsx_vbitset_w:
6720 case Intrinsic::loongarch_lsx_vbitset_d:
6721 case Intrinsic::loongarch_lasx_xvbitset_b:
6722 case Intrinsic::loongarch_lasx_xvbitset_h:
6723 case Intrinsic::loongarch_lasx_xvbitset_w:
6724 case Intrinsic::loongarch_lasx_xvbitset_d: {
6725 EVT VecTy = N->getValueType(0);
6726 SDValue One = DAG.getConstant(1, DL, VecTy);
6727 return DAG.getNode(
6728 ISD::OR, DL, VecTy, N->getOperand(1),
6729 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6730 }
6731 case Intrinsic::loongarch_lsx_vbitseti_b:
6732 case Intrinsic::loongarch_lasx_xvbitseti_b:
6733 return lowerVectorBitSetImm<3>(N, DAG);
6734 case Intrinsic::loongarch_lsx_vbitseti_h:
6735 case Intrinsic::loongarch_lasx_xvbitseti_h:
6736 return lowerVectorBitSetImm<4>(N, DAG);
6737 case Intrinsic::loongarch_lsx_vbitseti_w:
6738 case Intrinsic::loongarch_lasx_xvbitseti_w:
6739 return lowerVectorBitSetImm<5>(N, DAG);
6740 case Intrinsic::loongarch_lsx_vbitseti_d:
6741 case Intrinsic::loongarch_lasx_xvbitseti_d:
6742 return lowerVectorBitSetImm<6>(N, DAG);
6743 case Intrinsic::loongarch_lsx_vbitrev_b:
6744 case Intrinsic::loongarch_lsx_vbitrev_h:
6745 case Intrinsic::loongarch_lsx_vbitrev_w:
6746 case Intrinsic::loongarch_lsx_vbitrev_d:
6747 case Intrinsic::loongarch_lasx_xvbitrev_b:
6748 case Intrinsic::loongarch_lasx_xvbitrev_h:
6749 case Intrinsic::loongarch_lasx_xvbitrev_w:
6750 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6751 EVT VecTy = N->getValueType(0);
6752 SDValue One = DAG.getConstant(1, DL, VecTy);
6753 return DAG.getNode(
6754 ISD::XOR, DL, VecTy, N->getOperand(1),
6755 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6756 }
6757 case Intrinsic::loongarch_lsx_vbitrevi_b:
6758 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6759 return lowerVectorBitRevImm<3>(N, DAG);
6760 case Intrinsic::loongarch_lsx_vbitrevi_h:
6761 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6762 return lowerVectorBitRevImm<4>(N, DAG);
6763 case Intrinsic::loongarch_lsx_vbitrevi_w:
6764 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6765 return lowerVectorBitRevImm<5>(N, DAG);
6766 case Intrinsic::loongarch_lsx_vbitrevi_d:
6767 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6768 return lowerVectorBitRevImm<6>(N, DAG);
6769 case Intrinsic::loongarch_lsx_vfadd_s:
6770 case Intrinsic::loongarch_lsx_vfadd_d:
6771 case Intrinsic::loongarch_lasx_xvfadd_s:
6772 case Intrinsic::loongarch_lasx_xvfadd_d:
6773 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6774 N->getOperand(2));
6775 case Intrinsic::loongarch_lsx_vfsub_s:
6776 case Intrinsic::loongarch_lsx_vfsub_d:
6777 case Intrinsic::loongarch_lasx_xvfsub_s:
6778 case Intrinsic::loongarch_lasx_xvfsub_d:
6779 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6780 N->getOperand(2));
6781 case Intrinsic::loongarch_lsx_vfmul_s:
6782 case Intrinsic::loongarch_lsx_vfmul_d:
6783 case Intrinsic::loongarch_lasx_xvfmul_s:
6784 case Intrinsic::loongarch_lasx_xvfmul_d:
6785 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6786 N->getOperand(2));
6787 case Intrinsic::loongarch_lsx_vfdiv_s:
6788 case Intrinsic::loongarch_lsx_vfdiv_d:
6789 case Intrinsic::loongarch_lasx_xvfdiv_s:
6790 case Intrinsic::loongarch_lasx_xvfdiv_d:
6791 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6792 N->getOperand(2));
6793 case Intrinsic::loongarch_lsx_vfmadd_s:
6794 case Intrinsic::loongarch_lsx_vfmadd_d:
6795 case Intrinsic::loongarch_lasx_xvfmadd_s:
6796 case Intrinsic::loongarch_lasx_xvfmadd_d:
6797 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6798 N->getOperand(2), N->getOperand(3));
6799 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6800 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6801 N->getOperand(1), N->getOperand(2),
6802 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6803 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6804 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6805 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6806 N->getOperand(1), N->getOperand(2),
6807 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6808 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6809 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6810 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6811 N->getOperand(1), N->getOperand(2),
6812 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6813 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6814 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6815 N->getOperand(1), N->getOperand(2),
6816 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6817 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6818 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6819 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6820 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6821 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6822 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6823 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6824 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6825 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6826 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6827 N->getOperand(1)));
6828 case Intrinsic::loongarch_lsx_vreplve_b:
6829 case Intrinsic::loongarch_lsx_vreplve_h:
6830 case Intrinsic::loongarch_lsx_vreplve_w:
6831 case Intrinsic::loongarch_lsx_vreplve_d:
6832 case Intrinsic::loongarch_lasx_xvreplve_b:
6833 case Intrinsic::loongarch_lasx_xvreplve_h:
6834 case Intrinsic::loongarch_lasx_xvreplve_w:
6835 case Intrinsic::loongarch_lasx_xvreplve_d:
6836 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6837 N->getOperand(1),
6838 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6839 N->getOperand(2)));
6840 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6841 if (!Subtarget.is64Bit())
6842 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6843 break;
6844 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6845 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6846 if (!Subtarget.is64Bit())
6847 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6848 break;
6849 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6850 if (!Subtarget.is64Bit())
6851 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6852 break;
6853 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6854 if (!Subtarget.is64Bit())
6855 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6856 break;
6857 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6858 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6859 if (!Subtarget.is64Bit())
6860 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6861 break;
6862 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6863 if (!Subtarget.is64Bit())
6864 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6865 break;
6866 case Intrinsic::loongarch_lsx_bz_b:
6867 case Intrinsic::loongarch_lsx_bz_h:
6868 case Intrinsic::loongarch_lsx_bz_w:
6869 case Intrinsic::loongarch_lsx_bz_d:
6870 case Intrinsic::loongarch_lasx_xbz_b:
6871 case Intrinsic::loongarch_lasx_xbz_h:
6872 case Intrinsic::loongarch_lasx_xbz_w:
6873 case Intrinsic::loongarch_lasx_xbz_d:
6874 if (!Subtarget.is64Bit())
6875 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6876 N->getOperand(1));
6877 break;
6878 case Intrinsic::loongarch_lsx_bz_v:
6879 case Intrinsic::loongarch_lasx_xbz_v:
6880 if (!Subtarget.is64Bit())
6881 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6882 N->getOperand(1));
6883 break;
6884 case Intrinsic::loongarch_lsx_bnz_b:
6885 case Intrinsic::loongarch_lsx_bnz_h:
6886 case Intrinsic::loongarch_lsx_bnz_w:
6887 case Intrinsic::loongarch_lsx_bnz_d:
6888 case Intrinsic::loongarch_lasx_xbnz_b:
6889 case Intrinsic::loongarch_lasx_xbnz_h:
6890 case Intrinsic::loongarch_lasx_xbnz_w:
6891 case Intrinsic::loongarch_lasx_xbnz_d:
6892 if (!Subtarget.is64Bit())
6893 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6894 N->getOperand(1));
6895 break;
6896 case Intrinsic::loongarch_lsx_bnz_v:
6897 case Intrinsic::loongarch_lasx_xbnz_v:
6898 if (!Subtarget.is64Bit())
6899 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6900 N->getOperand(1));
6901 break;
6902 case Intrinsic::loongarch_lasx_concat_128_s:
6903 case Intrinsic::loongarch_lasx_concat_128_d:
6904 case Intrinsic::loongarch_lasx_concat_128:
6905 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6906 N->getOperand(1), N->getOperand(2));
6907 }
6908 return SDValue();
6909}
6910
6913 const LoongArchSubtarget &Subtarget) {
6914 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6915 // conversion is unnecessary and can be replaced with the
6916 // MOVFR2GR_S_LA64 operand.
6917 SDValue Op0 = N->getOperand(0);
6918 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6919 return Op0.getOperand(0);
6920 return SDValue();
6921}
6922
6925 const LoongArchSubtarget &Subtarget) {
6926 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6927 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6928 // operand.
6929 SDValue Op0 = N->getOperand(0);
6930 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6931 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6932 "Unexpected value type!");
6933 return Op0.getOperand(0);
6934 }
6935 return SDValue();
6936}
6937
6940 const LoongArchSubtarget &Subtarget) {
6941 MVT VT = N->getSimpleValueType(0);
6942 unsigned NumBits = VT.getScalarSizeInBits();
6943
6944 // Simplify the inputs.
6945 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6946 APInt DemandedMask(APInt::getAllOnes(NumBits));
6947 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6948 return SDValue(N, 0);
6949
6950 return SDValue();
6951}
6952
6953static SDValue
6956 const LoongArchSubtarget &Subtarget) {
6957 SDValue Op0 = N->getOperand(0);
6958 SDLoc DL(N);
6959
6960 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6961 // redundant. Instead, use BuildPairF64's operands directly.
6962 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6963 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6964
6965 if (Op0->isUndef()) {
6966 SDValue Lo = DAG.getUNDEF(MVT::i32);
6967 SDValue Hi = DAG.getUNDEF(MVT::i32);
6968 return DCI.CombineTo(N, Lo, Hi);
6969 }
6970
6971 // It's cheaper to materialise two 32-bit integers than to load a double
6972 // from the constant pool and transfer it to integer registers through the
6973 // stack.
6975 APInt V = C->getValueAPF().bitcastToAPInt();
6976 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6977 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6978 return DCI.CombineTo(N, Lo, Hi);
6979 }
6980
6981 return SDValue();
6982}
6983
6984/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6987 const LoongArchSubtarget &Subtarget) {
6988 SDValue N0 = N->getOperand(0);
6989 SDValue N1 = N->getOperand(1);
6990 MVT VT = N->getSimpleValueType(0);
6991 SDLoc DL(N);
6992
6993 // VANDN(undef, x) -> 0
6994 // VANDN(x, undef) -> 0
6995 if (N0.isUndef() || N1.isUndef())
6996 return DAG.getConstant(0, DL, VT);
6997
6998 // VANDN(0, x) -> x
7000 return N1;
7001
7002 // VANDN(x, 0) -> 0
7004 return DAG.getConstant(0, DL, VT);
7005
7006 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
7008 return DAG.getNOT(DL, N0, VT);
7009
7010 // Turn VANDN back to AND if input is inverted.
7011 if (SDValue Not = isNOT(N0, DAG))
7012 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
7013
7014 // Folds for better commutativity:
7015 if (N1->hasOneUse()) {
7016 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
7017 if (SDValue Not = isNOT(N1, DAG))
7018 return DAG.getNOT(
7019 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
7020
7021 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
7022 // -> NOT(OR(x, SplatVector(-Imm))
7023 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
7024 // gain benefits.
7025 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
7026 N1.getOpcode() == ISD::BUILD_VECTOR) {
7027 if (SDValue SplatValue =
7028 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
7029 if (!N1->isOnlyUserOf(SplatValue.getNode()))
7030 return SDValue();
7031
7032 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
7033 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
7034 SDValue Not =
7035 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
7036 return DAG.getNOT(
7037 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
7038 VT);
7039 }
7040 }
7041 }
7042 }
7043
7044 return SDValue();
7045}
7046
7049 const LoongArchSubtarget &Subtarget) {
7050 SDLoc DL(N);
7051 EVT VT = N->getValueType(0);
7052
7053 if (VT != MVT::f32 && VT != MVT::f64)
7054 return SDValue();
7055 if (VT == MVT::f32 && !Subtarget.hasBasicF())
7056 return SDValue();
7057 if (VT == MVT::f64 && !Subtarget.hasBasicD())
7058 return SDValue();
7059
7060 // Only optimize when the source and destination types have the same width.
7061 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
7062 return SDValue();
7063
7064 SDValue Src = N->getOperand(0);
7065 // If the result of an integer load is only used by an integer-to-float
7066 // conversion, use a fp load instead. This eliminates an integer-to-float-move
7067 // (movgr2fr) instruction.
7068 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
7069 // Do not change the width of a volatile load. This condition check is
7070 // inspired by AArch64.
7071 !cast<LoadSDNode>(Src)->isVolatile()) {
7072 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
7073 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
7074 LN0->getPointerInfo(), LN0->getAlign(),
7075 LN0->getMemOperand()->getFlags());
7076
7077 // Make sure successors of the original load stay after it by updating them
7078 // to use the new Chain.
7079 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
7080 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
7081 }
7082
7083 return SDValue();
7084}
7085
7086// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
7087// logical operations, like in the example below.
7088// or (and (truncate x, truncate y)),
7089// (xor (truncate z, build_vector (constants)))
7090// Given a target type \p VT, we generate
7091// or (and x, y), (xor z, zext(build_vector (constants)))
7092// given x, y and z are of type \p VT. We can do so, if operands are either
7093// truncates from VT types, the second operand is a vector of constants, can
7094// be recursively promoted or is an existing extension we can extend further.
7096 SelectionDAG &DAG,
7097 const LoongArchSubtarget &Subtarget,
7098 unsigned Depth) {
7099 // Limit recursion to avoid excessive compile times.
7101 return SDValue();
7102
7103 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
7104 return SDValue();
7105
7106 SDValue N0 = N.getOperand(0);
7107 SDValue N1 = N.getOperand(1);
7108
7109 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7110 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
7111 return SDValue();
7112
7113 if (SDValue NN0 =
7114 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
7115 N0 = NN0;
7116 else {
7117 // The left side has to be a 'trunc'.
7118 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
7119 N0.getOperand(0).getValueType() == VT;
7120 if (LHSTrunc)
7121 N0 = N0.getOperand(0);
7122 else
7123 return SDValue();
7124 }
7125
7126 if (SDValue NN1 =
7127 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
7128 N1 = NN1;
7129 else {
7130 // The right side has to be a 'trunc', a (foldable) constant or an
7131 // existing extension we can extend further.
7132 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
7133 N1.getOperand(0).getValueType() == VT;
7134 if (RHSTrunc)
7135 N1 = N1.getOperand(0);
7136 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
7137 Subtarget.hasExtLASX() && N1.hasOneUse())
7138 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
7139 // On 32-bit platform, i64 is an illegal integer scalar type, and
7140 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
7141 // future.
7142 else if (SDValue Cst =
7144 N1 = Cst;
7145 else
7146 return SDValue();
7147 }
7148
7149 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
7150}
7151
7152// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
7153// is LSX-sized register. In most cases we actually compare or select LASX-sized
7154// registers and mixing the two types creates horrible code. This method
7155// optimizes some of the transition sequences.
7157 SelectionDAG &DAG,
7158 const LoongArchSubtarget &Subtarget) {
7159 EVT VT = N.getValueType();
7160 assert(VT.isVector() && "Expected vector type");
7161 assert((N.getOpcode() == ISD::ANY_EXTEND ||
7162 N.getOpcode() == ISD::ZERO_EXTEND ||
7163 N.getOpcode() == ISD::SIGN_EXTEND) &&
7164 "Invalid Node");
7165
7166 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
7167 return SDValue();
7168
7169 SDValue Narrow = N.getOperand(0);
7170 EVT NarrowVT = Narrow.getValueType();
7171
7172 // Generate the wide operation.
7173 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
7174 if (!Op)
7175 return SDValue();
7176 switch (N.getOpcode()) {
7177 default:
7178 llvm_unreachable("Unexpected opcode");
7179 case ISD::ANY_EXTEND:
7180 return Op;
7181 case ISD::ZERO_EXTEND:
7182 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
7183 case ISD::SIGN_EXTEND:
7184 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7185 DAG.getValueType(NarrowVT));
7186 }
7187}
7188
7191 const LoongArchSubtarget &Subtarget) {
7192 EVT VT = N->getValueType(0);
7193 SDLoc DL(N);
7194
7195 if (VT.isVector())
7196 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
7197 return R;
7198
7199 return SDValue();
7200}
7201
7203 DAGCombinerInfo &DCI) const {
7204 SelectionDAG &DAG = DCI.DAG;
7205 switch (N->getOpcode()) {
7206 default:
7207 break;
7208 case ISD::AND:
7209 return performANDCombine(N, DAG, DCI, Subtarget);
7210 case ISD::OR:
7211 return performORCombine(N, DAG, DCI, Subtarget);
7212 case ISD::SETCC:
7213 return performSETCCCombine(N, DAG, DCI, Subtarget);
7214 case ISD::SRL:
7215 return performSRLCombine(N, DAG, DCI, Subtarget);
7216 case ISD::BITCAST:
7217 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7218 case ISD::ANY_EXTEND:
7219 case ISD::ZERO_EXTEND:
7220 case ISD::SIGN_EXTEND:
7221 return performEXTENDCombine(N, DAG, DCI, Subtarget);
7222 case ISD::SINT_TO_FP:
7223 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7224 case LoongArchISD::BITREV_W:
7225 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7226 case LoongArchISD::BR_CC:
7227 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7228 case LoongArchISD::SELECT_CC:
7229 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7231 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7232 case LoongArchISD::MOVGR2FR_W_LA64:
7233 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7234 case LoongArchISD::MOVFR2GR_S_LA64:
7235 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7236 case LoongArchISD::VMSKLTZ:
7237 case LoongArchISD::XVMSKLTZ:
7238 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7239 case LoongArchISD::SPLIT_PAIR_F64:
7240 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7241 case LoongArchISD::VANDN:
7242 return performVANDNCombine(N, DAG, DCI, Subtarget);
7243 }
7244 return SDValue();
7245}
7246
7249 if (!ZeroDivCheck)
7250 return MBB;
7251
7252 // Build instructions:
7253 // MBB:
7254 // div(or mod) $dst, $dividend, $divisor
7255 // bne $divisor, $zero, SinkMBB
7256 // BreakMBB:
7257 // break 7 // BRK_DIVZERO
7258 // SinkMBB:
7259 // fallthrough
7260 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7261 MachineFunction::iterator It = ++MBB->getIterator();
7262 MachineFunction *MF = MBB->getParent();
7263 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7264 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7265 MF->insert(It, BreakMBB);
7266 MF->insert(It, SinkMBB);
7267
7268 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7269 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7270 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7271
7272 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7273 DebugLoc DL = MI.getDebugLoc();
7274 MachineOperand &Divisor = MI.getOperand(2);
7275 Register DivisorReg = Divisor.getReg();
7276
7277 // MBB:
7278 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7279 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7280 .addReg(LoongArch::R0)
7281 .addMBB(SinkMBB);
7282 MBB->addSuccessor(BreakMBB);
7283 MBB->addSuccessor(SinkMBB);
7284
7285 // BreakMBB:
7286 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7287 // definition of BRK_DIVZERO.
7288 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7289 BreakMBB->addSuccessor(SinkMBB);
7290
7291 // Clear Divisor's kill flag.
7292 Divisor.setIsKill(false);
7293
7294 return SinkMBB;
7295}
7296
7297static MachineBasicBlock *
7299 const LoongArchSubtarget &Subtarget) {
7300 unsigned CondOpc;
7301 switch (MI.getOpcode()) {
7302 default:
7303 llvm_unreachable("Unexpected opcode");
7304 case LoongArch::PseudoVBZ:
7305 CondOpc = LoongArch::VSETEQZ_V;
7306 break;
7307 case LoongArch::PseudoVBZ_B:
7308 CondOpc = LoongArch::VSETANYEQZ_B;
7309 break;
7310 case LoongArch::PseudoVBZ_H:
7311 CondOpc = LoongArch::VSETANYEQZ_H;
7312 break;
7313 case LoongArch::PseudoVBZ_W:
7314 CondOpc = LoongArch::VSETANYEQZ_W;
7315 break;
7316 case LoongArch::PseudoVBZ_D:
7317 CondOpc = LoongArch::VSETANYEQZ_D;
7318 break;
7319 case LoongArch::PseudoVBNZ:
7320 CondOpc = LoongArch::VSETNEZ_V;
7321 break;
7322 case LoongArch::PseudoVBNZ_B:
7323 CondOpc = LoongArch::VSETALLNEZ_B;
7324 break;
7325 case LoongArch::PseudoVBNZ_H:
7326 CondOpc = LoongArch::VSETALLNEZ_H;
7327 break;
7328 case LoongArch::PseudoVBNZ_W:
7329 CondOpc = LoongArch::VSETALLNEZ_W;
7330 break;
7331 case LoongArch::PseudoVBNZ_D:
7332 CondOpc = LoongArch::VSETALLNEZ_D;
7333 break;
7334 case LoongArch::PseudoXVBZ:
7335 CondOpc = LoongArch::XVSETEQZ_V;
7336 break;
7337 case LoongArch::PseudoXVBZ_B:
7338 CondOpc = LoongArch::XVSETANYEQZ_B;
7339 break;
7340 case LoongArch::PseudoXVBZ_H:
7341 CondOpc = LoongArch::XVSETANYEQZ_H;
7342 break;
7343 case LoongArch::PseudoXVBZ_W:
7344 CondOpc = LoongArch::XVSETANYEQZ_W;
7345 break;
7346 case LoongArch::PseudoXVBZ_D:
7347 CondOpc = LoongArch::XVSETANYEQZ_D;
7348 break;
7349 case LoongArch::PseudoXVBNZ:
7350 CondOpc = LoongArch::XVSETNEZ_V;
7351 break;
7352 case LoongArch::PseudoXVBNZ_B:
7353 CondOpc = LoongArch::XVSETALLNEZ_B;
7354 break;
7355 case LoongArch::PseudoXVBNZ_H:
7356 CondOpc = LoongArch::XVSETALLNEZ_H;
7357 break;
7358 case LoongArch::PseudoXVBNZ_W:
7359 CondOpc = LoongArch::XVSETALLNEZ_W;
7360 break;
7361 case LoongArch::PseudoXVBNZ_D:
7362 CondOpc = LoongArch::XVSETALLNEZ_D;
7363 break;
7364 }
7365
7366 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7367 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7368 DebugLoc DL = MI.getDebugLoc();
7371
7372 MachineFunction *F = BB->getParent();
7373 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7374 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7375 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7376
7377 F->insert(It, FalseBB);
7378 F->insert(It, TrueBB);
7379 F->insert(It, SinkBB);
7380
7381 // Transfer the remainder of MBB and its successor edges to Sink.
7382 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7384
7385 // Insert the real instruction to BB.
7386 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7387 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7388
7389 // Insert branch.
7390 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7391 BB->addSuccessor(FalseBB);
7392 BB->addSuccessor(TrueBB);
7393
7394 // FalseBB.
7395 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7396 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7397 .addReg(LoongArch::R0)
7398 .addImm(0);
7399 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7400 FalseBB->addSuccessor(SinkBB);
7401
7402 // TrueBB.
7403 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7404 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7405 .addReg(LoongArch::R0)
7406 .addImm(1);
7407 TrueBB->addSuccessor(SinkBB);
7408
7409 // SinkBB: merge the results.
7410 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7411 MI.getOperand(0).getReg())
7412 .addReg(RD1)
7413 .addMBB(FalseBB)
7414 .addReg(RD2)
7415 .addMBB(TrueBB);
7416
7417 // The pseudo instruction is gone now.
7418 MI.eraseFromParent();
7419 return SinkBB;
7420}
7421
7422static MachineBasicBlock *
7424 const LoongArchSubtarget &Subtarget) {
7425 unsigned InsOp;
7426 unsigned BroadcastOp;
7427 unsigned HalfSize;
7428 switch (MI.getOpcode()) {
7429 default:
7430 llvm_unreachable("Unexpected opcode");
7431 case LoongArch::PseudoXVINSGR2VR_B:
7432 HalfSize = 16;
7433 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7434 InsOp = LoongArch::XVEXTRINS_B;
7435 break;
7436 case LoongArch::PseudoXVINSGR2VR_H:
7437 HalfSize = 8;
7438 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7439 InsOp = LoongArch::XVEXTRINS_H;
7440 break;
7441 }
7442 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7443 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7444 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7445 DebugLoc DL = MI.getDebugLoc();
7447 // XDst = vector_insert XSrc, Elt, Idx
7448 Register XDst = MI.getOperand(0).getReg();
7449 Register XSrc = MI.getOperand(1).getReg();
7450 Register Elt = MI.getOperand(2).getReg();
7451 unsigned Idx = MI.getOperand(3).getImm();
7452
7453 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
7454 Idx < HalfSize) {
7455 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
7456 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7457
7458 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7459 .addReg(XSrc, {}, LoongArch::sub_128);
7460 BuildMI(*BB, MI, DL,
7461 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7462 : LoongArch::VINSGR2VR_B),
7463 ScratchSubReg2)
7464 .addReg(ScratchSubReg1)
7465 .addReg(Elt)
7466 .addImm(Idx);
7467
7468 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7469 .addReg(ScratchSubReg2)
7470 .addImm(LoongArch::sub_128);
7471 } else {
7472 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7473 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7474
7475 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7476
7477 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7478 .addReg(ScratchReg1)
7479 .addReg(XSrc)
7480 .addImm(Idx >= HalfSize ? 48 : 18);
7481
7482 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7483 .addReg(XSrc)
7484 .addReg(ScratchReg2)
7485 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7486 }
7487
7488 MI.eraseFromParent();
7489 return BB;
7490}
7491
7494 const LoongArchSubtarget &Subtarget) {
7495 assert(Subtarget.hasExtLSX());
7496 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7497 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7498 DebugLoc DL = MI.getDebugLoc();
7500 Register Dst = MI.getOperand(0).getReg();
7501 Register Src = MI.getOperand(1).getReg();
7502 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7503 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7504 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7505
7506 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7507 BuildMI(*BB, MI, DL,
7508 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7509 : LoongArch::VINSGR2VR_W),
7510 ScratchReg2)
7511 .addReg(ScratchReg1)
7512 .addReg(Src)
7513 .addImm(0);
7514 BuildMI(
7515 *BB, MI, DL,
7516 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7517 ScratchReg3)
7518 .addReg(ScratchReg2);
7519 BuildMI(*BB, MI, DL,
7520 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7521 : LoongArch::VPICKVE2GR_W),
7522 Dst)
7523 .addReg(ScratchReg3)
7524 .addImm(0);
7525
7526 MI.eraseFromParent();
7527 return BB;
7528}
7529
7530static MachineBasicBlock *
7532 const LoongArchSubtarget &Subtarget) {
7533 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7534 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7535 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7537 Register Dst = MI.getOperand(0).getReg();
7538 Register Src = MI.getOperand(1).getReg();
7539 DebugLoc DL = MI.getDebugLoc();
7540 unsigned EleBits = 8;
7541 unsigned NotOpc = 0;
7542 unsigned MskOpc;
7543
7544 switch (MI.getOpcode()) {
7545 default:
7546 llvm_unreachable("Unexpected opcode");
7547 case LoongArch::PseudoVMSKLTZ_B:
7548 MskOpc = LoongArch::VMSKLTZ_B;
7549 break;
7550 case LoongArch::PseudoVMSKLTZ_H:
7551 MskOpc = LoongArch::VMSKLTZ_H;
7552 EleBits = 16;
7553 break;
7554 case LoongArch::PseudoVMSKLTZ_W:
7555 MskOpc = LoongArch::VMSKLTZ_W;
7556 EleBits = 32;
7557 break;
7558 case LoongArch::PseudoVMSKLTZ_D:
7559 MskOpc = LoongArch::VMSKLTZ_D;
7560 EleBits = 64;
7561 break;
7562 case LoongArch::PseudoVMSKGEZ_B:
7563 MskOpc = LoongArch::VMSKGEZ_B;
7564 break;
7565 case LoongArch::PseudoVMSKEQZ_B:
7566 MskOpc = LoongArch::VMSKNZ_B;
7567 NotOpc = LoongArch::VNOR_V;
7568 break;
7569 case LoongArch::PseudoVMSKNEZ_B:
7570 MskOpc = LoongArch::VMSKNZ_B;
7571 break;
7572 case LoongArch::PseudoXVMSKLTZ_B:
7573 MskOpc = LoongArch::XVMSKLTZ_B;
7574 RC = &LoongArch::LASX256RegClass;
7575 break;
7576 case LoongArch::PseudoXVMSKLTZ_H:
7577 MskOpc = LoongArch::XVMSKLTZ_H;
7578 RC = &LoongArch::LASX256RegClass;
7579 EleBits = 16;
7580 break;
7581 case LoongArch::PseudoXVMSKLTZ_W:
7582 MskOpc = LoongArch::XVMSKLTZ_W;
7583 RC = &LoongArch::LASX256RegClass;
7584 EleBits = 32;
7585 break;
7586 case LoongArch::PseudoXVMSKLTZ_D:
7587 MskOpc = LoongArch::XVMSKLTZ_D;
7588 RC = &LoongArch::LASX256RegClass;
7589 EleBits = 64;
7590 break;
7591 case LoongArch::PseudoXVMSKGEZ_B:
7592 MskOpc = LoongArch::XVMSKGEZ_B;
7593 RC = &LoongArch::LASX256RegClass;
7594 break;
7595 case LoongArch::PseudoXVMSKEQZ_B:
7596 MskOpc = LoongArch::XVMSKNZ_B;
7597 NotOpc = LoongArch::XVNOR_V;
7598 RC = &LoongArch::LASX256RegClass;
7599 break;
7600 case LoongArch::PseudoXVMSKNEZ_B:
7601 MskOpc = LoongArch::XVMSKNZ_B;
7602 RC = &LoongArch::LASX256RegClass;
7603 break;
7604 }
7605
7606 Register Msk = MRI.createVirtualRegister(RC);
7607 if (NotOpc) {
7608 Register Tmp = MRI.createVirtualRegister(RC);
7609 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7610 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7611 .addReg(Tmp, RegState::Kill)
7612 .addReg(Tmp, RegState::Kill);
7613 } else {
7614 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7615 }
7616
7617 if (TRI->getRegSizeInBits(*RC) > 128) {
7618 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7619 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7620 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7621 .addReg(Msk)
7622 .addImm(0);
7623 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7624 .addReg(Msk, RegState::Kill)
7625 .addImm(4);
7626 BuildMI(*BB, MI, DL,
7627 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7628 : LoongArch::BSTRINS_W),
7629 Dst)
7632 .addImm(256 / EleBits - 1)
7633 .addImm(128 / EleBits);
7634 } else {
7635 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7636 .addReg(Msk, RegState::Kill)
7637 .addImm(0);
7638 }
7639
7640 MI.eraseFromParent();
7641 return BB;
7642}
7643
7644static MachineBasicBlock *
7646 const LoongArchSubtarget &Subtarget) {
7647 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7648 "Unexpected instruction");
7649
7650 MachineFunction &MF = *BB->getParent();
7651 DebugLoc DL = MI.getDebugLoc();
7653 Register LoReg = MI.getOperand(0).getReg();
7654 Register HiReg = MI.getOperand(1).getReg();
7655 Register SrcReg = MI.getOperand(2).getReg();
7656
7657 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7658 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7659 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7660 MI.eraseFromParent(); // The pseudo instruction is gone now.
7661 return BB;
7662}
7663
7664static MachineBasicBlock *
7666 const LoongArchSubtarget &Subtarget) {
7667 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7668 "Unexpected instruction");
7669
7670 MachineFunction &MF = *BB->getParent();
7671 DebugLoc DL = MI.getDebugLoc();
7674 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7675 Register DstReg = MI.getOperand(0).getReg();
7676 Register LoReg = MI.getOperand(1).getReg();
7677 Register HiReg = MI.getOperand(2).getReg();
7678
7679 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7680 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7681 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7682 .addReg(TmpReg, RegState::Kill)
7683 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7684 MI.eraseFromParent(); // The pseudo instruction is gone now.
7685 return BB;
7686}
7687
7689 switch (MI.getOpcode()) {
7690 default:
7691 return false;
7692 case LoongArch::Select_GPR_Using_CC_GPR:
7693 return true;
7694 }
7695}
7696
7697static MachineBasicBlock *
7699 const LoongArchSubtarget &Subtarget) {
7700 // To "insert" Select_* instructions, we actually have to insert the triangle
7701 // control-flow pattern. The incoming instructions know the destination vreg
7702 // to set, the condition code register to branch on, the true/false values to
7703 // select between, and the condcode to use to select the appropriate branch.
7704 //
7705 // We produce the following control flow:
7706 // HeadMBB
7707 // | \
7708 // | IfFalseMBB
7709 // | /
7710 // TailMBB
7711 //
7712 // When we find a sequence of selects we attempt to optimize their emission
7713 // by sharing the control flow. Currently we only handle cases where we have
7714 // multiple selects with the exact same condition (same LHS, RHS and CC).
7715 // The selects may be interleaved with other instructions if the other
7716 // instructions meet some requirements we deem safe:
7717 // - They are not pseudo instructions.
7718 // - They are debug instructions. Otherwise,
7719 // - They do not have side-effects, do not access memory and their inputs do
7720 // not depend on the results of the select pseudo-instructions.
7721 // The TrueV/FalseV operands of the selects cannot depend on the result of
7722 // previous selects in the sequence.
7723 // These conditions could be further relaxed. See the X86 target for a
7724 // related approach and more information.
7725
7726 Register LHS = MI.getOperand(1).getReg();
7727 Register RHS;
7728 if (MI.getOperand(2).isReg())
7729 RHS = MI.getOperand(2).getReg();
7730 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7731
7732 SmallVector<MachineInstr *, 4> SelectDebugValues;
7733 SmallSet<Register, 4> SelectDests;
7734 SelectDests.insert(MI.getOperand(0).getReg());
7735
7736 MachineInstr *LastSelectPseudo = &MI;
7737 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7738 SequenceMBBI != E; ++SequenceMBBI) {
7739 if (SequenceMBBI->isDebugInstr())
7740 continue;
7741 if (isSelectPseudo(*SequenceMBBI)) {
7742 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7743 !SequenceMBBI->getOperand(2).isReg() ||
7744 SequenceMBBI->getOperand(2).getReg() != RHS ||
7745 SequenceMBBI->getOperand(3).getImm() != CC ||
7746 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7747 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7748 break;
7749 LastSelectPseudo = &*SequenceMBBI;
7750 SequenceMBBI->collectDebugValues(SelectDebugValues);
7751 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7752 continue;
7753 }
7754 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7755 SequenceMBBI->mayLoadOrStore() ||
7756 SequenceMBBI->usesCustomInsertionHook())
7757 break;
7758 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7759 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7760 }))
7761 break;
7762 }
7763
7764 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7765 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7766 DebugLoc DL = MI.getDebugLoc();
7768
7769 MachineBasicBlock *HeadMBB = BB;
7770 MachineFunction *F = BB->getParent();
7771 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7772 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7773
7774 F->insert(I, IfFalseMBB);
7775 F->insert(I, TailMBB);
7776
7777 // Set the call frame size on entry to the new basic blocks.
7778 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7779 IfFalseMBB->setCallFrameSize(CallFrameSize);
7780 TailMBB->setCallFrameSize(CallFrameSize);
7781
7782 // Transfer debug instructions associated with the selects to TailMBB.
7783 for (MachineInstr *DebugInstr : SelectDebugValues) {
7784 TailMBB->push_back(DebugInstr->removeFromParent());
7785 }
7786
7787 // Move all instructions after the sequence to TailMBB.
7788 TailMBB->splice(TailMBB->end(), HeadMBB,
7789 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7790 // Update machine-CFG edges by transferring all successors of the current
7791 // block to the new block which will contain the Phi nodes for the selects.
7792 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7793 // Set the successors for HeadMBB.
7794 HeadMBB->addSuccessor(IfFalseMBB);
7795 HeadMBB->addSuccessor(TailMBB);
7796
7797 // Insert appropriate branch.
7798 if (MI.getOperand(2).isImm())
7799 BuildMI(HeadMBB, DL, TII.get(CC))
7800 .addReg(LHS)
7801 .addImm(MI.getOperand(2).getImm())
7802 .addMBB(TailMBB);
7803 else
7804 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7805
7806 // IfFalseMBB just falls through to TailMBB.
7807 IfFalseMBB->addSuccessor(TailMBB);
7808
7809 // Create PHIs for all of the select pseudo-instructions.
7810 auto SelectMBBI = MI.getIterator();
7811 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7812 auto InsertionPoint = TailMBB->begin();
7813 while (SelectMBBI != SelectEnd) {
7814 auto Next = std::next(SelectMBBI);
7815 if (isSelectPseudo(*SelectMBBI)) {
7816 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7817 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7818 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7819 .addReg(SelectMBBI->getOperand(4).getReg())
7820 .addMBB(HeadMBB)
7821 .addReg(SelectMBBI->getOperand(5).getReg())
7822 .addMBB(IfFalseMBB);
7823 SelectMBBI->eraseFromParent();
7824 }
7825 SelectMBBI = Next;
7826 }
7827
7828 F->getProperties().resetNoPHIs();
7829 return TailMBB;
7830}
7831
7832MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7833 MachineInstr &MI, MachineBasicBlock *BB) const {
7834 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7835 DebugLoc DL = MI.getDebugLoc();
7836
7837 switch (MI.getOpcode()) {
7838 default:
7839 llvm_unreachable("Unexpected instr type to insert");
7840 case LoongArch::DIV_W:
7841 case LoongArch::DIV_WU:
7842 case LoongArch::MOD_W:
7843 case LoongArch::MOD_WU:
7844 case LoongArch::DIV_D:
7845 case LoongArch::DIV_DU:
7846 case LoongArch::MOD_D:
7847 case LoongArch::MOD_DU:
7848 return insertDivByZeroTrap(MI, BB);
7849 break;
7850 case LoongArch::WRFCSR: {
7851 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7852 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7853 .addReg(MI.getOperand(1).getReg());
7854 MI.eraseFromParent();
7855 return BB;
7856 }
7857 case LoongArch::RDFCSR: {
7858 MachineInstr *ReadFCSR =
7859 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7860 MI.getOperand(0).getReg())
7861 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7862 ReadFCSR->getOperand(1).setIsUndef();
7863 MI.eraseFromParent();
7864 return BB;
7865 }
7866 case LoongArch::Select_GPR_Using_CC_GPR:
7867 return emitSelectPseudo(MI, BB, Subtarget);
7868 case LoongArch::BuildPairF64Pseudo:
7869 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7870 case LoongArch::SplitPairF64Pseudo:
7871 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7872 case LoongArch::PseudoVBZ:
7873 case LoongArch::PseudoVBZ_B:
7874 case LoongArch::PseudoVBZ_H:
7875 case LoongArch::PseudoVBZ_W:
7876 case LoongArch::PseudoVBZ_D:
7877 case LoongArch::PseudoVBNZ:
7878 case LoongArch::PseudoVBNZ_B:
7879 case LoongArch::PseudoVBNZ_H:
7880 case LoongArch::PseudoVBNZ_W:
7881 case LoongArch::PseudoVBNZ_D:
7882 case LoongArch::PseudoXVBZ:
7883 case LoongArch::PseudoXVBZ_B:
7884 case LoongArch::PseudoXVBZ_H:
7885 case LoongArch::PseudoXVBZ_W:
7886 case LoongArch::PseudoXVBZ_D:
7887 case LoongArch::PseudoXVBNZ:
7888 case LoongArch::PseudoXVBNZ_B:
7889 case LoongArch::PseudoXVBNZ_H:
7890 case LoongArch::PseudoXVBNZ_W:
7891 case LoongArch::PseudoXVBNZ_D:
7892 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7893 case LoongArch::PseudoXVINSGR2VR_B:
7894 case LoongArch::PseudoXVINSGR2VR_H:
7895 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7896 case LoongArch::PseudoCTPOP:
7897 return emitPseudoCTPOP(MI, BB, Subtarget);
7898 case LoongArch::PseudoVMSKLTZ_B:
7899 case LoongArch::PseudoVMSKLTZ_H:
7900 case LoongArch::PseudoVMSKLTZ_W:
7901 case LoongArch::PseudoVMSKLTZ_D:
7902 case LoongArch::PseudoVMSKGEZ_B:
7903 case LoongArch::PseudoVMSKEQZ_B:
7904 case LoongArch::PseudoVMSKNEZ_B:
7905 case LoongArch::PseudoXVMSKLTZ_B:
7906 case LoongArch::PseudoXVMSKLTZ_H:
7907 case LoongArch::PseudoXVMSKLTZ_W:
7908 case LoongArch::PseudoXVMSKLTZ_D:
7909 case LoongArch::PseudoXVMSKGEZ_B:
7910 case LoongArch::PseudoXVMSKEQZ_B:
7911 case LoongArch::PseudoXVMSKNEZ_B:
7912 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7913 case TargetOpcode::STATEPOINT:
7914 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7915 // while bl call instruction (where statepoint will be lowered at the
7916 // end) has implicit def. This def is early-clobber as it will be set at
7917 // the moment of the call and earlier than any use is read.
7918 // Add this implicit dead def here as a workaround.
7919 MI.addOperand(*MI.getMF(),
7921 LoongArch::R1, /*isDef*/ true,
7922 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7923 /*isUndef*/ false, /*isEarlyClobber*/ true));
7924 if (!Subtarget.is64Bit())
7925 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7926 return emitPatchPoint(MI, BB);
7927 }
7928}
7929
7931 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7932 unsigned *Fast) const {
7933 if (!Subtarget.hasUAL())
7934 return false;
7935
7936 // TODO: set reasonable speed number.
7937 if (Fast)
7938 *Fast = 1;
7939 return true;
7940}
7941
7942//===----------------------------------------------------------------------===//
7943// Calling Convention Implementation
7944//===----------------------------------------------------------------------===//
7945
7946// Eight general-purpose registers a0-a7 used for passing integer arguments,
7947// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7948// fixed-point arguments, and floating-point arguments when no FPR is available
7949// or with soft float ABI.
7950const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7951 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7952 LoongArch::R10, LoongArch::R11};
7953
7954// PreserveNone calling convention:
7955// Arguments may be passed in any general-purpose registers except:
7956// - R1 : return address register
7957// - R22 : frame pointer
7958// - R31 : base pointer
7959//
7960// All general-purpose registers are treated as caller-saved,
7961// except R1 (RA) and R22 (FP).
7962//
7963// Non-volatile registers are allocated first so that a function
7964// can call normal functions without having to spill and reload
7965// argument registers.
7967 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
7968 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
7969 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
7970 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
7971 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
7972 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
7973 LoongArch::R20};
7974
7975// Eight floating-point registers fa0-fa7 used for passing floating-point
7976// arguments, and fa0-fa1 are also used to return values.
7977const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7978 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7979 LoongArch::F6, LoongArch::F7};
7980// FPR32 and FPR64 alias each other.
7982 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7983 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7984
7985const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7986 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7987 LoongArch::VR6, LoongArch::VR7};
7988
7989const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7990 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7991 LoongArch::XR6, LoongArch::XR7};
7992
7994 switch (State.getCallingConv()) {
7996 if (!State.isVarArg())
7997 return State.AllocateReg(PreserveNoneArgGPRs);
7998 [[fallthrough]];
7999 default:
8000 return State.AllocateReg(ArgGPRs);
8001 }
8002}
8003
8004// Pass a 2*GRLen argument that has been split into two GRLen values through
8005// registers or the stack as necessary.
8006static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
8007 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
8008 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
8009 ISD::ArgFlagsTy ArgFlags2) {
8010 unsigned GRLenInBytes = GRLen / 8;
8011 if (Register Reg = allocateArgGPR(State)) {
8012 // At least one half can be passed via register.
8013 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
8014 VA1.getLocVT(), CCValAssign::Full));
8015 } else {
8016 // Both halves must be passed on the stack, with proper alignment.
8017 Align StackAlign =
8018 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
8019 State.addLoc(
8021 State.AllocateStack(GRLenInBytes, StackAlign),
8022 VA1.getLocVT(), CCValAssign::Full));
8023 State.addLoc(CCValAssign::getMem(
8024 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8025 LocVT2, CCValAssign::Full));
8026 return false;
8027 }
8028 if (Register Reg = allocateArgGPR(State)) {
8029 // The second half can also be passed via register.
8030 State.addLoc(
8031 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
8032 } else {
8033 // The second half is passed via the stack, without additional alignment.
8034 State.addLoc(CCValAssign::getMem(
8035 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8036 LocVT2, CCValAssign::Full));
8037 }
8038 return false;
8039}
8040
8041// Implements the LoongArch calling convention. Returns true upon failure.
8043 unsigned ValNo, MVT ValVT,
8044 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
8045 CCState &State, bool IsRet, Type *OrigTy) {
8046 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
8047 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
8048 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
8049 MVT LocVT = ValVT;
8050
8051 // Any return value split into more than two values can't be returned
8052 // directly.
8053 if (IsRet && ValNo > 1)
8054 return true;
8055
8056 // If passing a variadic argument, or if no FPR is available.
8057 bool UseGPRForFloat = true;
8058
8059 switch (ABI) {
8060 default:
8061 llvm_unreachable("Unexpected ABI");
8062 break;
8067 UseGPRForFloat = ArgFlags.isVarArg();
8068 break;
8071 break;
8072 }
8073
8074 // If this is a variadic argument, the LoongArch calling convention requires
8075 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
8076 // byte alignment. An aligned register should be used regardless of whether
8077 // the original argument was split during legalisation or not. The argument
8078 // will not be passed by registers if the original type is larger than
8079 // 2*GRLen, so the register alignment rule does not apply.
8080 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
8081 if (ArgFlags.isVarArg() &&
8082 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
8083 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
8084 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
8085 // Skip 'odd' register if necessary.
8086 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
8087 State.AllocateReg(ArgGPRs);
8088 }
8089
8090 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8091 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8092 State.getPendingArgFlags();
8093
8094 assert(PendingLocs.size() == PendingArgFlags.size() &&
8095 "PendingLocs and PendingArgFlags out of sync");
8096
8097 // FPR32 and FPR64 alias each other.
8098 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
8099 UseGPRForFloat = true;
8100
8101 if (UseGPRForFloat && ValVT == MVT::f32) {
8102 LocVT = GRLenVT;
8103 LocInfo = CCValAssign::BCvt;
8104 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
8105 LocVT = MVT::i64;
8106 LocInfo = CCValAssign::BCvt;
8107 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
8108 // Handle passing f64 on LA32D with a soft float ABI or when floating point
8109 // registers are exhausted.
8110 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
8111 // Depending on available argument GPRS, f64 may be passed in a pair of
8112 // GPRs, split between a GPR and the stack, or passed completely on the
8113 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8114 // cases.
8115 MCRegister Reg = allocateArgGPR(State);
8116 if (!Reg) {
8117 int64_t StackOffset = State.AllocateStack(8, Align(8));
8118 State.addLoc(
8119 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8120 return false;
8121 }
8122 LocVT = MVT::i32;
8123 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8124 MCRegister HiReg = allocateArgGPR(State);
8125 if (HiReg) {
8126 State.addLoc(
8127 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
8128 } else {
8129 int64_t StackOffset = State.AllocateStack(4, Align(4));
8130 State.addLoc(
8131 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8132 }
8133 return false;
8134 }
8135
8136 // Split arguments might be passed indirectly, so keep track of the pending
8137 // values.
8138 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
8139 LocVT = GRLenVT;
8140 LocInfo = CCValAssign::Indirect;
8141 PendingLocs.push_back(
8142 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
8143 PendingArgFlags.push_back(ArgFlags);
8144 if (!ArgFlags.isSplitEnd()) {
8145 return false;
8146 }
8147 }
8148
8149 // If the split argument only had two elements, it should be passed directly
8150 // in registers or on the stack.
8151 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
8152 PendingLocs.size() <= 2) {
8153 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
8154 // Apply the normal calling convention rules to the first half of the
8155 // split argument.
8156 CCValAssign VA = PendingLocs[0];
8157 ISD::ArgFlagsTy AF = PendingArgFlags[0];
8158 PendingLocs.clear();
8159 PendingArgFlags.clear();
8160 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
8161 ArgFlags);
8162 }
8163
8164 // Allocate to a register if possible, or else a stack slot.
8165 Register Reg;
8166 unsigned StoreSizeBytes = GRLen / 8;
8167 Align StackAlign = Align(GRLen / 8);
8168
8169 if (ValVT == MVT::f32 && !UseGPRForFloat) {
8170 Reg = State.AllocateReg(ArgFPR32s);
8171 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
8172 Reg = State.AllocateReg(ArgFPR64s);
8173 } else if (ValVT.is128BitVector()) {
8174 Reg = State.AllocateReg(ArgVRs);
8175 UseGPRForFloat = false;
8176 StoreSizeBytes = 16;
8177 StackAlign = Align(16);
8178 } else if (ValVT.is256BitVector()) {
8179 Reg = State.AllocateReg(ArgXRs);
8180 UseGPRForFloat = false;
8181 StoreSizeBytes = 32;
8182 StackAlign = Align(32);
8183 } else {
8184 Reg = allocateArgGPR(State);
8185 }
8186
8187 unsigned StackOffset =
8188 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
8189
8190 // If we reach this point and PendingLocs is non-empty, we must be at the
8191 // end of a split argument that must be passed indirectly.
8192 if (!PendingLocs.empty()) {
8193 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8194 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
8195 for (auto &It : PendingLocs) {
8196 if (Reg)
8197 It.convertToReg(Reg);
8198 else
8199 It.convertToMem(StackOffset);
8200 State.addLoc(It);
8201 }
8202 PendingLocs.clear();
8203 PendingArgFlags.clear();
8204 return false;
8205 }
8206 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
8207 "Expected an GRLenVT at this stage");
8208
8209 if (Reg) {
8210 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8211 return false;
8212 }
8213
8214 // When a floating-point value is passed on the stack, no bit-cast is needed.
8215 if (ValVT.isFloatingPoint()) {
8216 LocVT = ValVT;
8217 LocInfo = CCValAssign::Full;
8218 }
8219
8220 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8221 return false;
8222}
8223
8224void LoongArchTargetLowering::analyzeInputArgs(
8225 MachineFunction &MF, CCState &CCInfo,
8226 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8227 LoongArchCCAssignFn Fn) const {
8228 FunctionType *FType = MF.getFunction().getFunctionType();
8229 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8230 MVT ArgVT = Ins[i].VT;
8231 Type *ArgTy = nullptr;
8232 if (IsRet)
8233 ArgTy = FType->getReturnType();
8234 else if (Ins[i].isOrigArg())
8235 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8237 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8238 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8239 CCInfo, IsRet, ArgTy)) {
8240 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8241 << '\n');
8242 llvm_unreachable("");
8243 }
8244 }
8245}
8246
8247void LoongArchTargetLowering::analyzeOutputArgs(
8248 MachineFunction &MF, CCState &CCInfo,
8249 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8250 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8251 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8252 MVT ArgVT = Outs[i].VT;
8253 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8255 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8256 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8257 CCInfo, IsRet, OrigTy)) {
8258 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8259 << "\n");
8260 llvm_unreachable("");
8261 }
8262 }
8263}
8264
8265// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8266// values.
8268 const CCValAssign &VA, const SDLoc &DL) {
8269 switch (VA.getLocInfo()) {
8270 default:
8271 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8272 case CCValAssign::Full:
8274 break;
8275 case CCValAssign::BCvt:
8276 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8277 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
8278 else
8279 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8280 break;
8281 }
8282 return Val;
8283}
8284
8286 const CCValAssign &VA, const SDLoc &DL,
8287 const ISD::InputArg &In,
8288 const LoongArchTargetLowering &TLI) {
8291 EVT LocVT = VA.getLocVT();
8292 SDValue Val;
8293 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8294 Register VReg = RegInfo.createVirtualRegister(RC);
8295 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8296 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8297
8298 // If input is sign extended from 32 bits, note it for the OptW pass.
8299 if (In.isOrigArg()) {
8300 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8301 if (OrigArg->getType()->isIntegerTy()) {
8302 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8303 // An input zero extended from i31 can also be considered sign extended.
8304 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8305 (BitWidth < 32 && In.Flags.isZExt())) {
8308 LAFI->addSExt32Register(VReg);
8309 }
8310 }
8311 }
8312
8313 return convertLocVTToValVT(DAG, Val, VA, DL);
8314}
8315
8316// The caller is responsible for loading the full value if the argument is
8317// passed with CCValAssign::Indirect.
8319 const CCValAssign &VA, const SDLoc &DL) {
8321 MachineFrameInfo &MFI = MF.getFrameInfo();
8322 EVT ValVT = VA.getValVT();
8323 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8324 /*IsImmutable=*/true);
8325 SDValue FIN = DAG.getFrameIndex(
8327
8328 ISD::LoadExtType ExtType;
8329 switch (VA.getLocInfo()) {
8330 default:
8331 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8332 case CCValAssign::Full:
8334 case CCValAssign::BCvt:
8335 ExtType = ISD::NON_EXTLOAD;
8336 break;
8337 }
8338 return DAG.getExtLoad(
8339 ExtType, DL, VA.getLocVT(), Chain, FIN,
8341}
8342
8344 const CCValAssign &VA,
8345 const CCValAssign &HiVA,
8346 const SDLoc &DL) {
8347 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8348 "Unexpected VA");
8350 MachineFrameInfo &MFI = MF.getFrameInfo();
8352
8353 assert(VA.isRegLoc() && "Expected register VA assignment");
8354
8355 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8356 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8357 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8358 SDValue Hi;
8359 if (HiVA.isMemLoc()) {
8360 // Second half of f64 is passed on the stack.
8361 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8362 /*IsImmutable=*/true);
8363 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8364 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8366 } else {
8367 // Second half of f64 is passed in another GPR.
8368 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8369 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8370 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8371 }
8372 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8373}
8374
8376 const CCValAssign &VA, const SDLoc &DL) {
8377 EVT LocVT = VA.getLocVT();
8378
8379 switch (VA.getLocInfo()) {
8380 default:
8381 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8382 case CCValAssign::Full:
8383 break;
8384 case CCValAssign::BCvt:
8385 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8386 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8387 else
8388 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8389 break;
8390 }
8391 return Val;
8392}
8393
8394static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8395 CCValAssign::LocInfo LocInfo,
8396 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8397 CCState &State) {
8398 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8399 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8400 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8401 static const MCPhysReg GPRList[] = {
8402 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8403 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8404 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8405 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8406 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8407 return false;
8408 }
8409 }
8410
8411 if (LocVT == MVT::f32) {
8412 // Pass in STG registers: F1, F2, F3, F4
8413 // fs0,fs1,fs2,fs3
8414 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8415 LoongArch::F26, LoongArch::F27};
8416 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8417 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8418 return false;
8419 }
8420 }
8421
8422 if (LocVT == MVT::f64) {
8423 // Pass in STG registers: D1, D2, D3, D4
8424 // fs4,fs5,fs6,fs7
8425 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8426 LoongArch::F30_64, LoongArch::F31_64};
8427 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8428 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8429 return false;
8430 }
8431 }
8432
8433 report_fatal_error("No registers left in GHC calling convention");
8434 return true;
8435}
8436
8437// Transform physical registers into virtual registers.
8439 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8440 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8441 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8442
8444
8445 switch (CallConv) {
8446 default:
8447 llvm_unreachable("Unsupported calling convention");
8448 case CallingConv::C:
8449 case CallingConv::Fast:
8452 break;
8453 case CallingConv::GHC:
8454 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8455 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8457 "GHC calling convention requires the F and D extensions");
8458 }
8459
8460 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8461 MVT GRLenVT = Subtarget.getGRLenVT();
8462 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8463 // Used with varargs to acumulate store chains.
8464 std::vector<SDValue> OutChains;
8465
8466 // Assign locations to all of the incoming arguments.
8468 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8469
8470 if (CallConv == CallingConv::GHC)
8472 else
8473 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8474
8475 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8476 CCValAssign &VA = ArgLocs[i];
8477 SDValue ArgValue;
8478 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8479 // case.
8480 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8481 assert(VA.needsCustom());
8482 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8483 } else if (VA.isRegLoc())
8484 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8485 else
8486 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8487 if (VA.getLocInfo() == CCValAssign::Indirect) {
8488 // If the original argument was split and passed by reference, we need to
8489 // load all parts of it here (using the same address).
8490 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8492 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8493 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8494 assert(ArgPartOffset == 0);
8495 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8496 CCValAssign &PartVA = ArgLocs[i + 1];
8497 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8498 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8499 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8500 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8502 ++i;
8503 ++InsIdx;
8504 }
8505 continue;
8506 }
8507 InVals.push_back(ArgValue);
8508 }
8509
8510 if (IsVarArg) {
8512 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8513 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8514 MachineFrameInfo &MFI = MF.getFrameInfo();
8515 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8516 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8517
8518 // Offset of the first variable argument from stack pointer, and size of
8519 // the vararg save area. For now, the varargs save area is either zero or
8520 // large enough to hold a0-a7.
8521 int VaArgOffset, VarArgsSaveSize;
8522
8523 // If all registers are allocated, then all varargs must be passed on the
8524 // stack and we don't need to save any argregs.
8525 if (ArgRegs.size() == Idx) {
8526 VaArgOffset = CCInfo.getStackSize();
8527 VarArgsSaveSize = 0;
8528 } else {
8529 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8530 VaArgOffset = -VarArgsSaveSize;
8531 }
8532
8533 // Record the frame index of the first variable argument
8534 // which is a value necessary to VASTART.
8535 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8536 LoongArchFI->setVarArgsFrameIndex(FI);
8537
8538 // If saving an odd number of registers then create an extra stack slot to
8539 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8540 // offsets to even-numbered registered remain 2*GRLen-aligned.
8541 if (Idx % 2) {
8542 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8543 true);
8544 VarArgsSaveSize += GRLenInBytes;
8545 }
8546
8547 // Copy the integer registers that may have been used for passing varargs
8548 // to the vararg save area.
8549 for (unsigned I = Idx; I < ArgRegs.size();
8550 ++I, VaArgOffset += GRLenInBytes) {
8551 const Register Reg = RegInfo.createVirtualRegister(RC);
8552 RegInfo.addLiveIn(ArgRegs[I], Reg);
8553 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8554 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8555 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8556 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8558 cast<StoreSDNode>(Store.getNode())
8559 ->getMemOperand()
8560 ->setValue((Value *)nullptr);
8561 OutChains.push_back(Store);
8562 }
8563 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8564 }
8565
8566 // All stores are grouped in one node to allow the matching between
8567 // the size of Ins and InVals. This only happens for vararg functions.
8568 if (!OutChains.empty()) {
8569 OutChains.push_back(Chain);
8570 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8571 }
8572
8573 return Chain;
8574}
8575
8577 return CI->isTailCall();
8578}
8579
8580// Check if the return value is used as only a return value, as otherwise
8581// we can't perform a tail-call.
8583 SDValue &Chain) const {
8584 if (N->getNumValues() != 1)
8585 return false;
8586 if (!N->hasNUsesOfValue(1, 0))
8587 return false;
8588
8589 SDNode *Copy = *N->user_begin();
8590 if (Copy->getOpcode() != ISD::CopyToReg)
8591 return false;
8592
8593 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8594 // isn't safe to perform a tail call.
8595 if (Copy->getGluedNode())
8596 return false;
8597
8598 // The copy must be used by a LoongArchISD::RET, and nothing else.
8599 bool HasRet = false;
8600 for (SDNode *Node : Copy->users()) {
8601 if (Node->getOpcode() != LoongArchISD::RET)
8602 return false;
8603 HasRet = true;
8604 }
8605
8606 if (!HasRet)
8607 return false;
8608
8609 Chain = Copy->getOperand(0);
8610 return true;
8611}
8612
8613// Check whether the call is eligible for tail call optimization.
8614bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8615 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8616 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8617
8618 auto CalleeCC = CLI.CallConv;
8619 auto &Outs = CLI.Outs;
8620 auto &Caller = MF.getFunction();
8621 auto CallerCC = Caller.getCallingConv();
8622
8623 // Do not tail call opt if the stack is used to pass parameters.
8624 if (CCInfo.getStackSize() != 0)
8625 return false;
8626
8627 // Do not tail call opt if any parameters need to be passed indirectly.
8628 for (auto &VA : ArgLocs)
8629 if (VA.getLocInfo() == CCValAssign::Indirect)
8630 return false;
8631
8632 // Do not tail call opt if either caller or callee uses struct return
8633 // semantics.
8634 auto IsCallerStructRet = Caller.hasStructRetAttr();
8635 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8636 if (IsCallerStructRet || IsCalleeStructRet)
8637 return false;
8638
8639 // Do not tail call opt if either the callee or caller has a byval argument.
8640 for (auto &Arg : Outs)
8641 if (Arg.Flags.isByVal())
8642 return false;
8643
8644 // The callee has to preserve all registers the caller needs to preserve.
8645 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8646 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8647 if (CalleeCC != CallerCC) {
8648 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8649 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8650 return false;
8651 }
8652 return true;
8653}
8654
8656 return DAG.getDataLayout().getPrefTypeAlign(
8657 VT.getTypeForEVT(*DAG.getContext()));
8658}
8659
8660// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8661// and output parameter nodes.
8662SDValue
8664 SmallVectorImpl<SDValue> &InVals) const {
8665 SelectionDAG &DAG = CLI.DAG;
8666 SDLoc &DL = CLI.DL;
8668 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8670 SDValue Chain = CLI.Chain;
8671 SDValue Callee = CLI.Callee;
8672 CallingConv::ID CallConv = CLI.CallConv;
8673 bool IsVarArg = CLI.IsVarArg;
8674 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8675 MVT GRLenVT = Subtarget.getGRLenVT();
8676 bool &IsTailCall = CLI.IsTailCall;
8677
8679
8680 // Analyze the operands of the call, assigning locations to each operand.
8682 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8683
8684 if (CallConv == CallingConv::GHC)
8685 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8686 else
8687 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8688
8689 // Check if it's really possible to do a tail call.
8690 if (IsTailCall)
8691 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8692
8693 if (IsTailCall)
8694 ++NumTailCalls;
8695 else if (CLI.CB && CLI.CB->isMustTailCall())
8696 report_fatal_error("failed to perform tail call elimination on a call "
8697 "site marked musttail");
8698
8699 // Get a count of how many bytes are to be pushed on the stack.
8700 unsigned NumBytes = ArgCCInfo.getStackSize();
8701
8702 // Create local copies for byval args.
8703 SmallVector<SDValue> ByValArgs;
8704 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8705 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8706 if (!Flags.isByVal())
8707 continue;
8708
8709 SDValue Arg = OutVals[i];
8710 unsigned Size = Flags.getByValSize();
8711 Align Alignment = Flags.getNonZeroByValAlign();
8712
8713 int FI =
8714 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8715 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8716 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8717
8718 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8719 /*IsVolatile=*/false,
8720 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8722 ByValArgs.push_back(FIPtr);
8723 }
8724
8725 if (!IsTailCall)
8726 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8727
8728 // Copy argument values to their designated locations.
8730 SmallVector<SDValue> MemOpChains;
8731 SDValue StackPtr;
8732 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8733 ++i, ++OutIdx) {
8734 CCValAssign &VA = ArgLocs[i];
8735 SDValue ArgValue = OutVals[OutIdx];
8736 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8737
8738 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8739 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8740 assert(VA.isRegLoc() && "Expected register VA assignment");
8741 assert(VA.needsCustom());
8742 SDValue SplitF64 =
8743 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8744 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8745 SDValue Lo = SplitF64.getValue(0);
8746 SDValue Hi = SplitF64.getValue(1);
8747
8748 Register RegLo = VA.getLocReg();
8749 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8750
8751 // Get the CCValAssign for the Hi part.
8752 CCValAssign &HiVA = ArgLocs[++i];
8753
8754 if (HiVA.isMemLoc()) {
8755 // Second half of f64 is passed on the stack.
8756 if (!StackPtr.getNode())
8757 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8759 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8760 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8761 // Emit the store.
8762 MemOpChains.push_back(DAG.getStore(
8763 Chain, DL, Hi, Address,
8765 } else {
8766 // Second half of f64 is passed in another GPR.
8767 Register RegHigh = HiVA.getLocReg();
8768 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8769 }
8770 continue;
8771 }
8772
8773 // Promote the value if needed.
8774 // For now, only handle fully promoted and indirect arguments.
8775 if (VA.getLocInfo() == CCValAssign::Indirect) {
8776 // Store the argument in a stack slot and pass its address.
8777 Align StackAlign =
8778 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8779 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8780 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8781 // If the original argument was split and passed by reference, we need to
8782 // store the required parts of it here (and pass just one address).
8783 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8784 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8785 assert(ArgPartOffset == 0);
8786 // Calculate the total size to store. We don't have access to what we're
8787 // actually storing other than performing the loop and collecting the
8788 // info.
8790 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8791 SDValue PartValue = OutVals[OutIdx + 1];
8792 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8793 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8794 EVT PartVT = PartValue.getValueType();
8795
8796 StoredSize += PartVT.getStoreSize();
8797 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8798 Parts.push_back(std::make_pair(PartValue, Offset));
8799 ++i;
8800 ++OutIdx;
8801 }
8802 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8803 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8804 MemOpChains.push_back(
8805 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8807 for (const auto &Part : Parts) {
8808 SDValue PartValue = Part.first;
8809 SDValue PartOffset = Part.second;
8811 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8812 MemOpChains.push_back(
8813 DAG.getStore(Chain, DL, PartValue, Address,
8815 }
8816 ArgValue = SpillSlot;
8817 } else {
8818 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8819 }
8820
8821 // Use local copy if it is a byval arg.
8822 if (Flags.isByVal())
8823 ArgValue = ByValArgs[j++];
8824
8825 if (VA.isRegLoc()) {
8826 // Queue up the argument copies and emit them at the end.
8827 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8828 } else {
8829 assert(VA.isMemLoc() && "Argument not register or memory");
8830 assert(!IsTailCall && "Tail call not allowed if stack is used "
8831 "for passing parameters");
8832
8833 // Work out the address of the stack slot.
8834 if (!StackPtr.getNode())
8835 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8837 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8839
8840 // Emit the store.
8841 MemOpChains.push_back(
8842 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8843 }
8844 }
8845
8846 // Join the stores, which are independent of one another.
8847 if (!MemOpChains.empty())
8848 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8849
8850 SDValue Glue;
8851
8852 // Build a sequence of copy-to-reg nodes, chained and glued together.
8853 for (auto &Reg : RegsToPass) {
8854 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8855 Glue = Chain.getValue(1);
8856 }
8857
8858 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8859 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8860 // split it and then direct call can be matched by PseudoCALL_SMALL.
8862 const GlobalValue *GV = S->getGlobal();
8863 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8866 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8867 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8868 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8871 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8872 }
8873
8874 // The first call operand is the chain and the second is the target address.
8876 Ops.push_back(Chain);
8877 Ops.push_back(Callee);
8878
8879 // Add argument registers to the end of the list so that they are
8880 // known live into the call.
8881 for (auto &Reg : RegsToPass)
8882 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8883
8884 if (!IsTailCall) {
8885 // Add a register mask operand representing the call-preserved registers.
8886 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8887 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8888 assert(Mask && "Missing call preserved mask for calling convention");
8889 Ops.push_back(DAG.getRegisterMask(Mask));
8890 }
8891
8892 // Glue the call to the argument copies, if any.
8893 if (Glue.getNode())
8894 Ops.push_back(Glue);
8895
8896 // Emit the call.
8897 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8898 unsigned Op;
8899 switch (DAG.getTarget().getCodeModel()) {
8900 default:
8901 report_fatal_error("Unsupported code model");
8902 case CodeModel::Small:
8903 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8904 break;
8905 case CodeModel::Medium:
8906 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8907 break;
8908 case CodeModel::Large:
8909 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8910 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8911 break;
8912 }
8913
8914 if (IsTailCall) {
8916 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8917 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8918 return Ret;
8919 }
8920
8921 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8922 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8923 Glue = Chain.getValue(1);
8924
8925 // Mark the end of the call, which is glued to the call itself.
8926 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8927 Glue = Chain.getValue(1);
8928
8929 // Assign locations to each value returned by this call.
8931 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8932 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8933
8934 // Copy all of the result registers out of their specified physreg.
8935 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8936 auto &VA = RVLocs[i];
8937 // Copy the value out.
8938 SDValue RetValue =
8939 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8940 // Glue the RetValue to the end of the call sequence.
8941 Chain = RetValue.getValue(1);
8942 Glue = RetValue.getValue(2);
8943
8944 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8945 assert(VA.needsCustom());
8946 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8947 MVT::i32, Glue);
8948 Chain = RetValue2.getValue(1);
8949 Glue = RetValue2.getValue(2);
8950 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8951 RetValue, RetValue2);
8952 } else
8953 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8954
8955 InVals.push_back(RetValue);
8956 }
8957
8958 return Chain;
8959}
8960
8962 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8963 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8964 const Type *RetTy) const {
8966 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8967
8968 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8969 LoongArchABI::ABI ABI =
8970 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8971 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8972 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8973 return false;
8974 }
8975 return true;
8976}
8977
8979 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8981 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8982 SelectionDAG &DAG) const {
8983 // Stores the assignment of the return value to a location.
8985
8986 // Info about the registers and stack slot.
8987 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8988 *DAG.getContext());
8989
8990 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8991 nullptr, CC_LoongArch);
8992 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8993 report_fatal_error("GHC functions return void only");
8994 SDValue Glue;
8995 SmallVector<SDValue, 4> RetOps(1, Chain);
8996
8997 // Copy the result values into the output registers.
8998 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8999 SDValue Val = OutVals[OutIdx];
9000 CCValAssign &VA = RVLocs[i];
9001 assert(VA.isRegLoc() && "Can only return in registers!");
9002
9003 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9004 // Handle returning f64 on LA32D with a soft float ABI.
9005 assert(VA.isRegLoc() && "Expected return via registers");
9006 assert(VA.needsCustom());
9007 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
9008 DAG.getVTList(MVT::i32, MVT::i32), Val);
9009 SDValue Lo = SplitF64.getValue(0);
9010 SDValue Hi = SplitF64.getValue(1);
9011 Register RegLo = VA.getLocReg();
9012 Register RegHi = RVLocs[++i].getLocReg();
9013
9014 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
9015 Glue = Chain.getValue(1);
9016 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
9017 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
9018 Glue = Chain.getValue(1);
9019 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
9020 } else {
9021 // Handle a 'normal' return.
9022 Val = convertValVTToLocVT(DAG, Val, VA, DL);
9023 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
9024
9025 // Guarantee that all emitted copies are stuck together.
9026 Glue = Chain.getValue(1);
9027 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
9028 }
9029 }
9030
9031 RetOps[0] = Chain; // Update chain.
9032
9033 // Add the glue node if we have it.
9034 if (Glue.getNode())
9035 RetOps.push_back(Glue);
9036
9037 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
9038}
9039
9040// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
9041// Note: The following prefixes are excluded:
9042// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
9043// as they can be represented using [x]vrepli.[whb]
9045 const APInt &SplatValue, const unsigned SplatBitSize) const {
9046 uint64_t RequiredImm = 0;
9047 uint64_t V = SplatValue.getZExtValue();
9048 if (SplatBitSize == 16 && !(V & 0x00FF)) {
9049 // 4'b0101
9050 RequiredImm = (0b10101 << 8) | (V >> 8);
9051 return {true, RequiredImm};
9052 } else if (SplatBitSize == 32) {
9053 // 4'b0001
9054 if (!(V & 0xFFFF00FF)) {
9055 RequiredImm = (0b10001 << 8) | (V >> 8);
9056 return {true, RequiredImm};
9057 }
9058 // 4'b0010
9059 if (!(V & 0xFF00FFFF)) {
9060 RequiredImm = (0b10010 << 8) | (V >> 16);
9061 return {true, RequiredImm};
9062 }
9063 // 4'b0011
9064 if (!(V & 0x00FFFFFF)) {
9065 RequiredImm = (0b10011 << 8) | (V >> 24);
9066 return {true, RequiredImm};
9067 }
9068 // 4'b0110
9069 if ((V & 0xFFFF00FF) == 0xFF) {
9070 RequiredImm = (0b10110 << 8) | (V >> 8);
9071 return {true, RequiredImm};
9072 }
9073 // 4'b0111
9074 if ((V & 0xFF00FFFF) == 0xFFFF) {
9075 RequiredImm = (0b10111 << 8) | (V >> 16);
9076 return {true, RequiredImm};
9077 }
9078 // 4'b1010
9079 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
9080 RequiredImm =
9081 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9082 return {true, RequiredImm};
9083 }
9084 } else if (SplatBitSize == 64) {
9085 // 4'b1011
9086 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
9087 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
9088 RequiredImm =
9089 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9090 return {true, RequiredImm};
9091 }
9092 // 4'b1100
9093 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
9094 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
9095 RequiredImm =
9096 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
9097 return {true, RequiredImm};
9098 }
9099 // 4'b1001
9100 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
9101 uint8_t res = 0;
9102 for (int i = 0; i < 8; ++i) {
9103 uint8_t byte = x & 0xFF;
9104 if (byte == 0 || byte == 0xFF)
9105 res |= ((byte & 1) << i);
9106 else
9107 return {false, 0};
9108 x >>= 8;
9109 }
9110 return {true, res};
9111 };
9112 auto [IsSame, Suffix] = sameBitsPreByte(V);
9113 if (IsSame) {
9114 RequiredImm = (0b11001 << 8) | Suffix;
9115 return {true, RequiredImm};
9116 }
9117 }
9118 return {false, RequiredImm};
9119}
9120
9122 EVT VT) const {
9123 if (!Subtarget.hasExtLSX())
9124 return false;
9125
9126 if (VT == MVT::f32) {
9127 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
9128 return (masked == 0x3e000000 || masked == 0x40000000);
9129 }
9130
9131 if (VT == MVT::f64) {
9132 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
9133 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
9134 }
9135
9136 return false;
9137}
9138
9139bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
9140 bool ForCodeSize) const {
9141 // TODO: Maybe need more checks here after vector extension is supported.
9142 if (VT == MVT::f32 && !Subtarget.hasBasicF())
9143 return false;
9144 if (VT == MVT::f64 && !Subtarget.hasBasicD())
9145 return false;
9146 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
9147}
9148
9150 return true;
9151}
9152
9154 return true;
9155}
9156
9157bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9158 const Instruction *I) const {
9159 if (!Subtarget.is64Bit())
9160 return isa<LoadInst>(I) || isa<StoreInst>(I);
9161
9162 if (isa<LoadInst>(I))
9163 return true;
9164
9165 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9166 // require fences beacuse we can use amswap_db.[w/d].
9167 Type *Ty = I->getOperand(0)->getType();
9168 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
9169 unsigned Size = Ty->getIntegerBitWidth();
9170 return (Size == 8 || Size == 16);
9171 }
9172
9173 return false;
9174}
9175
9177 LLVMContext &Context,
9178 EVT VT) const {
9179 if (!VT.isVector())
9180 return getPointerTy(DL);
9182}
9183
9185 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
9186 // Do not merge to float value size (128 or 256 bits) if no implicit
9187 // float attribute is set.
9188 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
9189 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
9190 if (NoFloat)
9191 return MemVT.getSizeInBits() <= MaxIntSize;
9192
9193 // Make sure we don't merge greater than our maximum supported vector width.
9194 if (Subtarget.hasExtLASX())
9195 MaxIntSize = 256;
9196 else if (Subtarget.hasExtLSX())
9197 MaxIntSize = 128;
9198
9199 return MemVT.getSizeInBits() <= MaxIntSize;
9200}
9201
9203 EVT VT = Y.getValueType();
9204
9205 if (VT.isVector())
9206 return Subtarget.hasExtLSX() && VT.isInteger();
9207
9208 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
9209}
9210
9213 MachineFunction &MF, unsigned Intrinsic) const {
9214 switch (Intrinsic) {
9215 default:
9216 return;
9217 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9218 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9219 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9220 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
9221 IntrinsicInfo Info;
9223 Info.memVT = MVT::i32;
9224 Info.ptrVal = I.getArgOperand(0);
9225 Info.offset = 0;
9226 Info.align = Align(4);
9229 Infos.push_back(Info);
9230 return;
9231 // TODO: Add more Intrinsics later.
9232 }
9233 }
9234}
9235
9236// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9237// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9238// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9239// regression, we need to implement it manually.
9242
9244 Op == AtomicRMWInst::And) &&
9245 "Unable to expand");
9246 unsigned MinWordSize = 4;
9247
9248 IRBuilder<> Builder(AI);
9249 LLVMContext &Ctx = Builder.getContext();
9250 const DataLayout &DL = AI->getDataLayout();
9251 Type *ValueType = AI->getType();
9252 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
9253
9254 Value *Addr = AI->getPointerOperand();
9255 PointerType *PtrTy = cast<PointerType>(Addr->getType());
9256 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
9257
9258 Value *AlignedAddr = Builder.CreateIntrinsic(
9259 Intrinsic::ptrmask, {PtrTy, IntTy},
9260 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
9261 "AlignedAddr");
9262
9263 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
9264 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
9265 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
9266 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
9267 Value *Mask = Builder.CreateShl(
9268 ConstantInt::get(WordType,
9269 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
9270 ShiftAmt, "Mask");
9271 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
9272 Value *ValOperand_Shifted =
9273 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
9274 ShiftAmt, "ValOperand_Shifted");
9275 Value *NewOperand;
9276 if (Op == AtomicRMWInst::And)
9277 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
9278 else
9279 NewOperand = ValOperand_Shifted;
9280
9281 AtomicRMWInst *NewAI =
9282 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
9283 AI->getOrdering(), AI->getSyncScopeID());
9284
9285 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
9286 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
9287 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
9288 AI->replaceAllUsesWith(FinalOldResult);
9289 AI->eraseFromParent();
9290}
9291
9294 const AtomicRMWInst *AI) const {
9295 // TODO: Add more AtomicRMWInst that needs to be extended.
9296
9297 // Since floating-point operation requires a non-trivial set of data
9298 // operations, use CmpXChg to expand.
9299 if (AI->isFloatingPointOperation() ||
9305
9306 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9309 AI->getOperation() == AtomicRMWInst::Sub)) {
9311 }
9312
9313 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9314 if (Subtarget.hasLAMCAS()) {
9315 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9319 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9321 }
9322
9323 if (Size == 8 || Size == 16)
9326}
9327
9328static Intrinsic::ID
9330 AtomicRMWInst::BinOp BinOp) {
9331 if (GRLen == 64) {
9332 switch (BinOp) {
9333 default:
9334 llvm_unreachable("Unexpected AtomicRMW BinOp");
9336 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9337 case AtomicRMWInst::Add:
9338 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9339 case AtomicRMWInst::Sub:
9340 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9342 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9344 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9346 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9347 case AtomicRMWInst::Max:
9348 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9349 case AtomicRMWInst::Min:
9350 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9351 // TODO: support other AtomicRMWInst.
9352 }
9353 }
9354
9355 if (GRLen == 32) {
9356 switch (BinOp) {
9357 default:
9358 llvm_unreachable("Unexpected AtomicRMW BinOp");
9360 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9361 case AtomicRMWInst::Add:
9362 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9363 case AtomicRMWInst::Sub:
9364 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9366 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9368 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9370 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9371 case AtomicRMWInst::Max:
9372 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9373 case AtomicRMWInst::Min:
9374 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9375 // TODO: support other AtomicRMWInst.
9376 }
9377 }
9378
9379 llvm_unreachable("Unexpected GRLen\n");
9380}
9381
9384 const AtomicCmpXchgInst *CI) const {
9385
9386 if (Subtarget.hasLAMCAS())
9388
9390 if (Size == 8 || Size == 16)
9393}
9394
9396 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9397 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9398 unsigned GRLen = Subtarget.getGRLen();
9399 AtomicOrdering FailOrd = CI->getFailureOrdering();
9400 Value *FailureOrdering =
9401 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9402 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9403 if (GRLen == 64) {
9404 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9405 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9406 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9407 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9408 }
9409 Type *Tys[] = {AlignedAddr->getType()};
9410 Value *Result = Builder.CreateIntrinsic(
9411 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9412 if (GRLen == 64)
9413 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9414 return Result;
9415}
9416
9418 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9419 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9420 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9421 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9422 // mask, as this produces better code than the LL/SC loop emitted by
9423 // int_loongarch_masked_atomicrmw_xchg.
9424 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9427 if (CVal->isZero())
9428 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9429 Builder.CreateNot(Mask, "Inv_Mask"),
9430 AI->getAlign(), Ord);
9431 if (CVal->isMinusOne())
9432 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9433 AI->getAlign(), Ord);
9434 }
9435
9436 unsigned GRLen = Subtarget.getGRLen();
9437 Value *Ordering =
9438 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9439 Type *Tys[] = {AlignedAddr->getType()};
9441 AI->getModule(),
9443
9444 if (GRLen == 64) {
9445 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9446 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9447 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9448 }
9449
9450 Value *Result;
9451
9452 // Must pass the shift amount needed to sign extend the loaded value prior
9453 // to performing a signed comparison for min/max. ShiftAmt is the number of
9454 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9455 // is the number of bits to left+right shift the value in order to
9456 // sign-extend.
9457 if (AI->getOperation() == AtomicRMWInst::Min ||
9459 const DataLayout &DL = AI->getDataLayout();
9460 unsigned ValWidth =
9461 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9462 Value *SextShamt =
9463 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9464 Result = Builder.CreateCall(LlwOpScwLoop,
9465 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9466 } else {
9467 Result =
9468 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9469 }
9470
9471 if (GRLen == 64)
9472 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9473 return Result;
9474}
9475
9477 const MachineFunction &MF, EVT VT) const {
9478 VT = VT.getScalarType();
9479
9480 if (!VT.isSimple())
9481 return false;
9482
9483 switch (VT.getSimpleVT().SimpleTy) {
9484 case MVT::f32:
9485 case MVT::f64:
9486 return true;
9487 default:
9488 break;
9489 }
9490
9491 return false;
9492}
9493
9495 const Constant *PersonalityFn) const {
9496 return LoongArch::R4;
9497}
9498
9500 const Constant *PersonalityFn) const {
9501 return LoongArch::R5;
9502}
9503
9504//===----------------------------------------------------------------------===//
9505// Target Optimization Hooks
9506//===----------------------------------------------------------------------===//
9507
9509 const LoongArchSubtarget &Subtarget) {
9510 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9511 // IEEE float has 23 digits and double has 52 digits.
9512 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9513 return RefinementSteps;
9514}
9515
9516static bool
9518 assert(Subtarget.hasFrecipe() &&
9519 "Reciprocal estimate queried on unsupported target");
9520
9521 if (!VT.isSimple())
9522 return false;
9523
9524 switch (VT.getSimpleVT().SimpleTy) {
9525 case MVT::f32:
9526 // f32 is the base type for reciprocal estimate instructions.
9527 return true;
9528
9529 case MVT::f64:
9530 return Subtarget.hasBasicD();
9531
9532 case MVT::v4f32:
9533 case MVT::v2f64:
9534 return Subtarget.hasExtLSX();
9535
9536 case MVT::v8f32:
9537 case MVT::v4f64:
9538 return Subtarget.hasExtLASX();
9539
9540 default:
9541 return false;
9542 }
9543}
9544
9546 SelectionDAG &DAG, int Enabled,
9547 int &RefinementSteps,
9548 bool &UseOneConstNR,
9549 bool Reciprocal) const {
9551 "Enabled should never be Disabled here");
9552
9553 if (!Subtarget.hasFrecipe())
9554 return SDValue();
9555
9556 SDLoc DL(Operand);
9557 EVT VT = Operand.getValueType();
9558
9559 // Check supported types.
9560 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
9561 return SDValue();
9562
9563 // Handle refinement steps.
9564 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9565 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9566
9567 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
9568 UseOneConstNR = false;
9569 SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9570
9571 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
9572 // are needed (which rely on the reciprocal form), return the raw reciprocal
9573 // estimate.
9574 if (Reciprocal || RefinementSteps > 0)
9575 return Rsqrt;
9576
9577 // Otherwise, return sqrt(x) by multiplying with the operand.
9578 return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
9579}
9580
9582 SelectionDAG &DAG,
9583 int Enabled,
9584 int &RefinementSteps) const {
9586 "Enabled should never be Disabled here");
9587
9588 if (!Subtarget.hasFrecipe())
9589 return SDValue();
9590
9591 SDLoc DL(Operand);
9592 EVT VT = Operand.getValueType();
9593
9594 // Check supported types.
9595 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
9596 return SDValue();
9597
9598 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9599 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9600
9601 // FRECIPE computes 1.0 / x.
9602 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9603}
9604
9605//===----------------------------------------------------------------------===//
9606// LoongArch Inline Assembly Support
9607//===----------------------------------------------------------------------===//
9608
9610LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9611 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9612 //
9613 // 'f': A floating-point register (if available).
9614 // 'k': A memory operand whose address is formed by a base register and
9615 // (optionally scaled) index register.
9616 // 'l': A signed 16-bit constant.
9617 // 'm': A memory operand whose address is formed by a base register and
9618 // offset that is suitable for use in instructions with the same
9619 // addressing mode as st.w and ld.w.
9620 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9621 // instruction)
9622 // 'I': A signed 12-bit constant (for arithmetic instructions).
9623 // 'J': Integer zero.
9624 // 'K': An unsigned 12-bit constant (for logic instructions).
9625 // "ZB": An address that is held in a general-purpose register. The offset is
9626 // zero.
9627 // "ZC": A memory operand whose address is formed by a base register and
9628 // offset that is suitable for use in instructions with the same
9629 // addressing mode as ll.w and sc.w.
9630 if (Constraint.size() == 1) {
9631 switch (Constraint[0]) {
9632 default:
9633 break;
9634 case 'f':
9635 case 'q':
9636 return C_RegisterClass;
9637 case 'l':
9638 case 'I':
9639 case 'J':
9640 case 'K':
9641 return C_Immediate;
9642 case 'k':
9643 return C_Memory;
9644 }
9645 }
9646
9647 if (Constraint == "ZC" || Constraint == "ZB")
9648 return C_Memory;
9649
9650 // 'm' is handled here.
9651 return TargetLowering::getConstraintType(Constraint);
9652}
9653
9654InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9655 StringRef ConstraintCode) const {
9656 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9660 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9661}
9662
9663std::pair<unsigned, const TargetRegisterClass *>
9664LoongArchTargetLowering::getRegForInlineAsmConstraint(
9665 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9666 // First, see if this is a constraint that directly corresponds to a LoongArch
9667 // register class.
9668 if (Constraint.size() == 1) {
9669 switch (Constraint[0]) {
9670 case 'r':
9671 // TODO: Support fixed vectors up to GRLen?
9672 if (VT.isVector())
9673 break;
9674 return std::make_pair(0U, &LoongArch::GPRRegClass);
9675 case 'q':
9676 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9677 case 'f':
9678 if (Subtarget.hasBasicF() && VT == MVT::f32)
9679 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9680 if (Subtarget.hasBasicD() && VT == MVT::f64)
9681 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9682 if (Subtarget.hasExtLSX() &&
9683 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9684 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9685 if (Subtarget.hasExtLASX() &&
9686 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9687 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9688 break;
9689 default:
9690 break;
9691 }
9692 }
9693
9694 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9695 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9696 // constraints while the official register name is prefixed with a '$'. So we
9697 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9698 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9699 // case insensitive, so no need to convert the constraint to upper case here.
9700 //
9701 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9702 // decode the usage of register name aliases into their official names. And
9703 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9704 // official register names.
9705 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9706 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9707 bool IsFP = Constraint[2] == 'f';
9708 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9709 std::pair<unsigned, const TargetRegisterClass *> R;
9711 TRI, join_items("", Temp.first, Temp.second), VT);
9712 // Match those names to the widest floating point register type available.
9713 if (IsFP) {
9714 unsigned RegNo = R.first;
9715 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9716 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9717 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9718 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9719 }
9720 }
9721 }
9722 return R;
9723 }
9724
9725 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9726}
9727
9728void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9729 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9730 SelectionDAG &DAG) const {
9731 // Currently only support length 1 constraints.
9732 if (Constraint.size() == 1) {
9733 switch (Constraint[0]) {
9734 case 'l':
9735 // Validate & create a 16-bit signed immediate operand.
9736 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9737 uint64_t CVal = C->getSExtValue();
9738 if (isInt<16>(CVal))
9739 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9740 Subtarget.getGRLenVT()));
9741 }
9742 return;
9743 case 'I':
9744 // Validate & create a 12-bit signed immediate operand.
9745 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9746 uint64_t CVal = C->getSExtValue();
9747 if (isInt<12>(CVal))
9748 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9749 Subtarget.getGRLenVT()));
9750 }
9751 return;
9752 case 'J':
9753 // Validate & create an integer zero operand.
9754 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9755 if (C->getZExtValue() == 0)
9756 Ops.push_back(
9757 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9758 return;
9759 case 'K':
9760 // Validate & create a 12-bit unsigned immediate operand.
9761 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9762 uint64_t CVal = C->getZExtValue();
9763 if (isUInt<12>(CVal))
9764 Ops.push_back(
9765 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9766 }
9767 return;
9768 default:
9769 break;
9770 }
9771 }
9773}
9774
9775#define GET_REGISTER_MATCHER
9776#include "LoongArchGenAsmMatcher.inc"
9777
9780 const MachineFunction &MF) const {
9781 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9782 std::string NewRegName = Name.second.str();
9783 Register Reg = MatchRegisterAltName(NewRegName);
9784 if (!Reg)
9785 Reg = MatchRegisterName(NewRegName);
9786 if (!Reg)
9787 return Reg;
9788 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9789 if (!ReservedRegs.test(Reg))
9790 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9791 StringRef(RegName) + "\"."));
9792 return Reg;
9793}
9794
9796 EVT VT, SDValue C) const {
9797 // TODO: Support vectors.
9798 if (!VT.isScalarInteger())
9799 return false;
9800
9801 // Omit the optimization if the data size exceeds GRLen.
9802 if (VT.getSizeInBits() > Subtarget.getGRLen())
9803 return false;
9804
9805 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9806 const APInt &Imm = ConstNode->getAPIntValue();
9807 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9808 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9809 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9810 return true;
9811 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9812 if (ConstNode->hasOneUse() &&
9813 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9814 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9815 return true;
9816 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9817 // in which the immediate has two set bits. Or Break (MUL x, imm)
9818 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9819 // equals to (1 << s0) - (1 << s1).
9820 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9821 unsigned Shifts = Imm.countr_zero();
9822 // Reject immediates which can be composed via a single LUI.
9823 if (Shifts >= 12)
9824 return false;
9825 // Reject multiplications can be optimized to
9826 // (SLLI (ALSL x, x, 1/2/3/4), s).
9827 APInt ImmPop = Imm.ashr(Shifts);
9828 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9829 return false;
9830 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9831 // since it needs one more instruction than other 3 cases.
9832 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9833 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9834 (ImmSmall - Imm).isPowerOf2())
9835 return true;
9836 }
9837 }
9838
9839 return false;
9840}
9841
9843 const AddrMode &AM,
9844 Type *Ty, unsigned AS,
9845 Instruction *I) const {
9846 // LoongArch has four basic addressing modes:
9847 // 1. reg
9848 // 2. reg + 12-bit signed offset
9849 // 3. reg + 14-bit signed offset left-shifted by 2
9850 // 4. reg1 + reg2
9851 // TODO: Add more checks after support vector extension.
9852
9853 // No global is ever allowed as a base.
9854 if (AM.BaseGV)
9855 return false;
9856
9857 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9858 // with `UAL` feature.
9859 if (!isInt<12>(AM.BaseOffs) &&
9860 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9861 return false;
9862
9863 switch (AM.Scale) {
9864 case 0:
9865 // "r+i" or just "i", depending on HasBaseReg.
9866 break;
9867 case 1:
9868 // "r+r+i" is not allowed.
9869 if (AM.HasBaseReg && AM.BaseOffs)
9870 return false;
9871 // Otherwise we have "r+r" or "r+i".
9872 break;
9873 case 2:
9874 // "2*r+r" or "2*r+i" is not allowed.
9875 if (AM.HasBaseReg || AM.BaseOffs)
9876 return false;
9877 // Allow "2*r" as "r+r".
9878 break;
9879 default:
9880 return false;
9881 }
9882
9883 return true;
9884}
9885
9887 return isInt<12>(Imm);
9888}
9889
9891 return isInt<12>(Imm);
9892}
9893
9895 // Zexts are free if they can be combined with a load.
9896 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9897 // poorly with type legalization of compares preferring sext.
9898 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9899 EVT MemVT = LD->getMemoryVT();
9900 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9901 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9902 LD->getExtensionType() == ISD::ZEXTLOAD))
9903 return true;
9904 }
9905
9906 return TargetLowering::isZExtFree(Val, VT2);
9907}
9908
9910 EVT DstVT) const {
9911 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9912}
9913
9915 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9916}
9917
9919 // TODO: Support vectors.
9920 if (Y.getValueType().isVector())
9921 return false;
9922
9923 return !isa<ConstantSDNode>(Y);
9924}
9925
9927 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9928 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9929}
9930
9932 Type *Ty, bool IsSigned) const {
9933 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9934 return true;
9935
9936 return IsSigned;
9937}
9938
9940 // Return false to suppress the unnecessary extensions if the LibCall
9941 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9942 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9943 Type.getSizeInBits() < Subtarget.getGRLen()))
9944 return false;
9945 return true;
9946}
9947
9948// memcpy, and other memory intrinsics, typically tries to use wider load/store
9949// if the source/dest is aligned and the copy size is large enough. We therefore
9950// want to align such objects passed to memory intrinsics.
9952 unsigned &MinSize,
9953 Align &PrefAlign) const {
9954 if (!isa<MemIntrinsic>(CI))
9955 return false;
9956
9957 if (Subtarget.is64Bit()) {
9958 MinSize = 8;
9959 PrefAlign = Align(8);
9960 } else {
9961 MinSize = 4;
9962 PrefAlign = Align(4);
9963 }
9964
9965 return true;
9966}
9967
9976
9977bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9978 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9979 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9980 bool IsABIRegCopy = CC.has_value();
9981 EVT ValueVT = Val.getValueType();
9982
9983 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9984 PartVT == MVT::f32) {
9985 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9986 // nan, and cast to f32.
9987 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9988 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9989 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9990 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9991 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9992 Parts[0] = Val;
9993 return true;
9994 }
9995
9996 return false;
9997}
9998
9999SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
10000 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
10001 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
10002 bool IsABIRegCopy = CC.has_value();
10003
10004 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
10005 PartVT == MVT::f32) {
10006 SDValue Val = Parts[0];
10007
10008 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
10009 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
10010 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
10011 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
10012 return Val;
10013 }
10014
10015 return SDValue();
10016}
10017
10018MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
10019 CallingConv::ID CC,
10020 EVT VT) const {
10021 // Use f32 to pass f16.
10022 if (VT == MVT::f16 && Subtarget.hasBasicF())
10023 return MVT::f32;
10024
10026}
10027
10028unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
10029 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
10030 // Use f32 to pass f16.
10031 if (VT == MVT::f16 && Subtarget.hasBasicF())
10032 return 1;
10033
10035}
10036
10038 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
10039 const SelectionDAG &DAG, unsigned Depth) const {
10040 unsigned Opc = Op.getOpcode();
10041 Known.resetAll();
10042 switch (Opc) {
10043 default:
10044 break;
10045 case LoongArchISD::VPICK_ZEXT_ELT: {
10046 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
10047 EVT VT = cast<VTSDNode>(Op->getOperand(2))->getVT();
10048 unsigned VTBits = VT.getScalarSizeInBits();
10049 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
10050 Known.Zero.setBitsFrom(VTBits);
10051 break;
10052 }
10053 }
10054}
10055
10057 SDValue Op, const APInt &OriginalDemandedBits,
10058 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
10059 unsigned Depth) const {
10060 EVT VT = Op.getValueType();
10061 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
10062 unsigned Opc = Op.getOpcode();
10063 switch (Opc) {
10064 default:
10065 break;
10066 case LoongArchISD::VMSKLTZ:
10067 case LoongArchISD::XVMSKLTZ: {
10068 SDValue Src = Op.getOperand(0);
10069 MVT SrcVT = Src.getSimpleValueType();
10070 unsigned SrcBits = SrcVT.getScalarSizeInBits();
10071 unsigned NumElts = SrcVT.getVectorNumElements();
10072
10073 // If we don't need the sign bits at all just return zero.
10074 if (OriginalDemandedBits.countr_zero() >= NumElts)
10075 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
10076
10077 // Only demand the vector elements of the sign bits we need.
10078 APInt KnownUndef, KnownZero;
10079 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
10080 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
10081 TLO, Depth + 1))
10082 return true;
10083
10084 Known.Zero = KnownZero.zext(BitWidth);
10085 Known.Zero.setHighBits(BitWidth - NumElts);
10086
10087 // [X]VMSKLTZ only uses the MSB from each vector element.
10088 KnownBits KnownSrc;
10089 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
10090 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
10091 Depth + 1))
10092 return true;
10093
10094 if (KnownSrc.One[SrcBits - 1])
10095 Known.One.setLowBits(NumElts);
10096 else if (KnownSrc.Zero[SrcBits - 1])
10097 Known.Zero.setLowBits(NumElts);
10098
10099 // Attempt to avoid multi-use ops if we don't need anything from it.
10101 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
10102 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
10103 return false;
10104 }
10105 }
10106
10108 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
10109}
10110
10112 unsigned Opc = VecOp.getOpcode();
10113
10114 // Assume target opcodes can't be scalarized.
10115 // TODO - do we have any exceptions?
10116 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
10117 return false;
10118
10119 // If the vector op is not supported, try to convert to scalar.
10120 EVT VecVT = VecOp.getValueType();
10122 return true;
10123
10124 // If the vector op is supported, but the scalar op is not, the transform may
10125 // not be worthwhile.
10126 EVT ScalarVT = VecVT.getScalarType();
10127 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
10128}
10129
10131 unsigned Index) const {
10133 return false;
10134
10135 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
10136 return Index == 0;
10137}
10138
10140 unsigned Index) const {
10141 EVT EltVT = VT.getScalarType();
10142
10143 // Extract a scalar FP value from index 0 of a vector is free.
10144 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
10145}
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1499
bool isZero() const
Definition APFloat.h:1512
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1406
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1400
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1064
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:956
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1697
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1403
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:494
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2812
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...