LLVM 20.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
141
145 Custom);
147 }
148
149 // Set operations for LA32 only.
150
151 if (!Subtarget.is64Bit()) {
157 }
158
160
161 static const ISD::CondCode FPCCToExpand[] = {
164
165 // Set operations for 'F' feature.
166
167 if (Subtarget.hasBasicF()) {
168 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
169 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
170 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
171
187
188 if (Subtarget.is64Bit())
190
191 if (!Subtarget.hasBasicD()) {
193 if (Subtarget.is64Bit()) {
196 }
197 }
198 }
199
200 // Set operations for 'D' feature.
201
202 if (Subtarget.hasBasicD()) {
203 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
204 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
205 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
206 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
207 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
208
224
225 if (Subtarget.is64Bit())
227 }
228
229 // Set operations for 'LSX' feature.
230
231 if (Subtarget.hasExtLSX()) {
233 // Expand all truncating stores and extending loads.
234 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
235 setTruncStoreAction(VT, InnerVT, Expand);
238 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
239 }
240 // By default everything must be expanded. Then we will selectively turn
241 // on ones that can be effectively codegen'd.
242 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
244 }
245
246 for (MVT VT : LSXVTs) {
250
254
258 }
259 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
262 Legal);
264 VT, Legal);
271 Expand);
272 }
273 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
275 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
277 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
280 }
281 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
289 VT, Expand);
290 }
292 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
293 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
294 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
295 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
296 }
297
298 // Set operations for 'LASX' feature.
299
300 if (Subtarget.hasExtLASX()) {
301 for (MVT VT : LASXVTs) {
305
310
314 }
315 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
318 Legal);
320 VT, Legal);
327 Expand);
328 }
329 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
331 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
333 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
336 }
337 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
345 VT, Expand);
346 }
347 }
348
349 // Set DAG combine for LA32 and LA64.
350
355
356 // Set DAG combine for 'LSX' feature.
357
358 if (Subtarget.hasExtLSX())
360
361 // Compute derived properties from the register classes.
363
365
368
370
372
373 // Function alignments.
375 // Set preferred alignments.
379
380 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
381 if (Subtarget.hasLAMCAS())
383}
384
386 const GlobalAddressSDNode *GA) const {
387 // In order to maximise the opportunity for common subexpression elimination,
388 // keep a separate ADD node for the global address offset instead of folding
389 // it in the global address node. Later peephole optimisations may choose to
390 // fold it back in when profitable.
391 return false;
392}
393
395 SelectionDAG &DAG) const {
396 switch (Op.getOpcode()) {
398 return lowerATOMIC_FENCE(Op, DAG);
400 return lowerEH_DWARF_CFA(Op, DAG);
402 return lowerGlobalAddress(Op, DAG);
404 return lowerGlobalTLSAddress(Op, DAG);
406 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
408 return lowerINTRINSIC_W_CHAIN(Op, DAG);
410 return lowerINTRINSIC_VOID(Op, DAG);
412 return lowerBlockAddress(Op, DAG);
413 case ISD::JumpTable:
414 return lowerJumpTable(Op, DAG);
415 case ISD::SHL_PARTS:
416 return lowerShiftLeftParts(Op, DAG);
417 case ISD::SRA_PARTS:
418 return lowerShiftRightParts(Op, DAG, true);
419 case ISD::SRL_PARTS:
420 return lowerShiftRightParts(Op, DAG, false);
422 return lowerConstantPool(Op, DAG);
423 case ISD::FP_TO_SINT:
424 return lowerFP_TO_SINT(Op, DAG);
425 case ISD::BITCAST:
426 return lowerBITCAST(Op, DAG);
427 case ISD::UINT_TO_FP:
428 return lowerUINT_TO_FP(Op, DAG);
429 case ISD::SINT_TO_FP:
430 return lowerSINT_TO_FP(Op, DAG);
431 case ISD::VASTART:
432 return lowerVASTART(Op, DAG);
433 case ISD::FRAMEADDR:
434 return lowerFRAMEADDR(Op, DAG);
435 case ISD::RETURNADDR:
436 return lowerRETURNADDR(Op, DAG);
438 return lowerWRITE_REGISTER(Op, DAG);
440 return lowerINSERT_VECTOR_ELT(Op, DAG);
442 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
444 return lowerBUILD_VECTOR(Op, DAG);
446 return lowerVECTOR_SHUFFLE(Op, DAG);
447 case ISD::BITREVERSE:
448 return lowerBITREVERSE(Op, DAG);
449 }
450 return SDValue();
451}
452
453SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
454 SelectionDAG &DAG) const {
455 EVT ResTy = Op->getValueType(0);
456 SDValue Src = Op->getOperand(0);
457 SDLoc DL(Op);
458
459 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
460 unsigned int OrigEltNum = ResTy.getVectorNumElements();
461 unsigned int NewEltNum = NewVT.getVectorNumElements();
462
463 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
464
466 for (unsigned int i = 0; i < NewEltNum; i++) {
467 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
468 DAG.getConstant(i, DL, MVT::i64));
469 SDValue RevOp = DAG.getNode((ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
472 DL, MVT::i64, Op);
473 Ops.push_back(RevOp);
474 }
475 SDValue Res =
476 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
477
478 switch (ResTy.getSimpleVT().SimpleTy) {
479 default:
480 return SDValue();
481 case MVT::v16i8:
482 case MVT::v32i8:
483 return Res;
484 case MVT::v8i16:
485 case MVT::v16i16:
486 case MVT::v4i32:
487 case MVT::v8i32: {
489 for (unsigned int i = 0; i < NewEltNum; i++)
490 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
491 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
492 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
493 }
494 }
495}
496
497/// Determine whether a range fits a regular pattern of values.
498/// This function accounts for the possibility of jumping over the End iterator.
499template <typename ValType>
500static bool
502 unsigned CheckStride,
504 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
505 auto &I = Begin;
506
507 while (I != End) {
508 if (*I != -1 && *I != ExpectedIndex)
509 return false;
510 ExpectedIndex += ExpectedIndexStride;
511
512 // Incrementing past End is undefined behaviour so we must increment one
513 // step at a time and check for End at each step.
514 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
515 ; // Empty loop body.
516 }
517 return true;
518}
519
520/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
521///
522/// VREPLVEI performs vector broadcast based on an element specified by an
523/// integer immediate, with its mask being similar to:
524/// <x, x, x, ...>
525/// where x is any valid index.
526///
527/// When undef's appear in the mask they are treated as if they were whatever
528/// value is necessary in order to fit the above form.
530 MVT VT, SDValue V1, SDValue V2,
531 SelectionDAG &DAG) {
532 int SplatIndex = -1;
533 for (const auto &M : Mask) {
534 if (M != -1) {
535 SplatIndex = M;
536 break;
537 }
538 }
539
540 if (SplatIndex == -1)
541 return DAG.getUNDEF(VT);
542
543 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
544 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
545 APInt Imm(64, SplatIndex);
546 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
547 DAG.getConstant(Imm, DL, MVT::i64));
548 }
549
550 return SDValue();
551}
552
553/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
554///
555/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
556/// elements according to a <4 x i2> constant (encoded as an integer immediate).
557///
558/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
559/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
560/// When undef's appear they are treated as if they were whatever value is
561/// necessary in order to fit the above forms.
562///
563/// For example:
564/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
565/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
566/// i32 7, i32 6, i32 5, i32 4>
567/// is lowered to:
568/// (VSHUF4I_H $v0, $v1, 27)
569/// where the 27 comes from:
570/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
572 MVT VT, SDValue V1, SDValue V2,
573 SelectionDAG &DAG) {
574
575 // When the size is less than 4, lower cost instructions may be used.
576 if (Mask.size() < 4)
577 return SDValue();
578
579 int SubMask[4] = {-1, -1, -1, -1};
580 for (unsigned i = 0; i < 4; ++i) {
581 for (unsigned j = i; j < Mask.size(); j += 4) {
582 int Idx = Mask[j];
583
584 // Convert from vector index to 4-element subvector index
585 // If an index refers to an element outside of the subvector then give up
586 if (Idx != -1) {
587 Idx -= 4 * (j / 4);
588 if (Idx < 0 || Idx >= 4)
589 return SDValue();
590 }
591
592 // If the mask has an undef, replace it with the current index.
593 // Note that it might still be undef if the current index is also undef
594 if (SubMask[i] == -1)
595 SubMask[i] = Idx;
596 // Check that non-undef values are the same as in the mask. If they
597 // aren't then give up
598 else if (Idx != -1 && Idx != SubMask[i])
599 return SDValue();
600 }
601 }
602
603 // Calculate the immediate. Replace any remaining undefs with zero
604 APInt Imm(64, 0);
605 for (int i = 3; i >= 0; --i) {
606 int Idx = SubMask[i];
607
608 if (Idx == -1)
609 Idx = 0;
610
611 Imm <<= 2;
612 Imm |= Idx & 0x3;
613 }
614
615 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
616 DAG.getConstant(Imm, DL, MVT::i64));
617}
618
619/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
620///
621/// VPACKEV interleaves the even elements from each vector.
622///
623/// It is possible to lower into VPACKEV when the mask consists of two of the
624/// following forms interleaved:
625/// <0, 2, 4, ...>
626/// <n, n+2, n+4, ...>
627/// where n is the number of elements in the vector.
628/// For example:
629/// <0, 0, 2, 2, 4, 4, ...>
630/// <0, n, 2, n+2, 4, n+4, ...>
631///
632/// When undef's appear in the mask they are treated as if they were whatever
633/// value is necessary in order to fit the above forms.
635 MVT VT, SDValue V1, SDValue V2,
636 SelectionDAG &DAG) {
637
638 const auto &Begin = Mask.begin();
639 const auto &End = Mask.end();
640 SDValue OriV1 = V1, OriV2 = V2;
641
642 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
643 V1 = OriV1;
644 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
645 V1 = OriV2;
646 else
647 return SDValue();
648
649 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
650 V2 = OriV1;
651 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
652 V2 = OriV2;
653 else
654 return SDValue();
655
656 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
657}
658
659/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
660///
661/// VPACKOD interleaves the odd elements from each vector.
662///
663/// It is possible to lower into VPACKOD when the mask consists of two of the
664/// following forms interleaved:
665/// <1, 3, 5, ...>
666/// <n+1, n+3, n+5, ...>
667/// where n is the number of elements in the vector.
668/// For example:
669/// <1, 1, 3, 3, 5, 5, ...>
670/// <1, n+1, 3, n+3, 5, n+5, ...>
671///
672/// When undef's appear in the mask they are treated as if they were whatever
673/// value is necessary in order to fit the above forms.
675 MVT VT, SDValue V1, SDValue V2,
676 SelectionDAG &DAG) {
677
678 const auto &Begin = Mask.begin();
679 const auto &End = Mask.end();
680 SDValue OriV1 = V1, OriV2 = V2;
681
682 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
683 V1 = OriV1;
684 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
685 V1 = OriV2;
686 else
687 return SDValue();
688
689 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
690 V2 = OriV1;
691 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
692 V2 = OriV2;
693 else
694 return SDValue();
695
696 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
697}
698
699/// Lower VECTOR_SHUFFLE into VILVH (if possible).
700///
701/// VILVH interleaves consecutive elements from the left (highest-indexed) half
702/// of each vector.
703///
704/// It is possible to lower into VILVH when the mask consists of two of the
705/// following forms interleaved:
706/// <x, x+1, x+2, ...>
707/// <n+x, n+x+1, n+x+2, ...>
708/// where n is the number of elements in the vector and x is half n.
709/// For example:
710/// <x, x, x+1, x+1, x+2, x+2, ...>
711/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
712///
713/// When undef's appear in the mask they are treated as if they were whatever
714/// value is necessary in order to fit the above forms.
716 MVT VT, SDValue V1, SDValue V2,
717 SelectionDAG &DAG) {
718
719 const auto &Begin = Mask.begin();
720 const auto &End = Mask.end();
721 unsigned HalfSize = Mask.size() / 2;
722 SDValue OriV1 = V1, OriV2 = V2;
723
724 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
725 V1 = OriV1;
726 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
727 V1 = OriV2;
728 else
729 return SDValue();
730
731 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
732 V2 = OriV1;
733 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
734 1))
735 V2 = OriV2;
736 else
737 return SDValue();
738
739 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
740}
741
742/// Lower VECTOR_SHUFFLE into VILVL (if possible).
743///
744/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
745/// of each vector.
746///
747/// It is possible to lower into VILVL when the mask consists of two of the
748/// following forms interleaved:
749/// <0, 1, 2, ...>
750/// <n, n+1, n+2, ...>
751/// where n is the number of elements in the vector.
752/// For example:
753/// <0, 0, 1, 1, 2, 2, ...>
754/// <0, n, 1, n+1, 2, n+2, ...>
755///
756/// When undef's appear in the mask they are treated as if they were whatever
757/// value is necessary in order to fit the above forms.
759 MVT VT, SDValue V1, SDValue V2,
760 SelectionDAG &DAG) {
761
762 const auto &Begin = Mask.begin();
763 const auto &End = Mask.end();
764 SDValue OriV1 = V1, OriV2 = V2;
765
766 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
767 V1 = OriV1;
768 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
769 V1 = OriV2;
770 else
771 return SDValue();
772
773 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
774 V2 = OriV1;
775 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
776 V2 = OriV2;
777 else
778 return SDValue();
779
780 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
781}
782
783/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
784///
785/// VPICKEV copies the even elements of each vector into the result vector.
786///
787/// It is possible to lower into VPICKEV when the mask consists of two of the
788/// following forms concatenated:
789/// <0, 2, 4, ...>
790/// <n, n+2, n+4, ...>
791/// where n is the number of elements in the vector.
792/// For example:
793/// <0, 2, 4, ..., 0, 2, 4, ...>
794/// <0, 2, 4, ..., n, n+2, n+4, ...>
795///
796/// When undef's appear in the mask they are treated as if they were whatever
797/// value is necessary in order to fit the above forms.
799 MVT VT, SDValue V1, SDValue V2,
800 SelectionDAG &DAG) {
801
802 const auto &Begin = Mask.begin();
803 const auto &Mid = Mask.begin() + Mask.size() / 2;
804 const auto &End = Mask.end();
805 SDValue OriV1 = V1, OriV2 = V2;
806
807 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
808 V1 = OriV1;
809 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
810 V1 = OriV2;
811 else
812 return SDValue();
813
814 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
815 V2 = OriV1;
816 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
817 V2 = OriV2;
818
819 else
820 return SDValue();
821
822 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
823}
824
825/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
826///
827/// VPICKOD copies the odd elements of each vector into the result vector.
828///
829/// It is possible to lower into VPICKOD when the mask consists of two of the
830/// following forms concatenated:
831/// <1, 3, 5, ...>
832/// <n+1, n+3, n+5, ...>
833/// where n is the number of elements in the vector.
834/// For example:
835/// <1, 3, 5, ..., 1, 3, 5, ...>
836/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
837///
838/// When undef's appear in the mask they are treated as if they were whatever
839/// value is necessary in order to fit the above forms.
841 MVT VT, SDValue V1, SDValue V2,
842 SelectionDAG &DAG) {
843
844 const auto &Begin = Mask.begin();
845 const auto &Mid = Mask.begin() + Mask.size() / 2;
846 const auto &End = Mask.end();
847 SDValue OriV1 = V1, OriV2 = V2;
848
849 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
850 V1 = OriV1;
851 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
852 V1 = OriV2;
853 else
854 return SDValue();
855
856 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
857 V2 = OriV1;
858 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
859 V2 = OriV2;
860 else
861 return SDValue();
862
863 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
864}
865
866/// Lower VECTOR_SHUFFLE into VSHUF.
867///
868/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
869/// adding it as an operand to the resulting VSHUF.
871 MVT VT, SDValue V1, SDValue V2,
872 SelectionDAG &DAG) {
873
875 for (auto M : Mask)
876 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
877
878 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
879 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
880
881 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
882 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
883 // VSHF concatenates the vectors in a bitwise fashion:
884 // <0b00, 0b01> + <0b10, 0b11> ->
885 // 0b0100 + 0b1110 -> 0b01001110
886 // <0b10, 0b11, 0b00, 0b01>
887 // We must therefore swap the operands to get the correct result.
888 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
889}
890
891/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
892///
893/// This routine breaks down the specific type of 128-bit shuffle and
894/// dispatches to the lowering routines accordingly.
896 SDValue V1, SDValue V2, SelectionDAG &DAG) {
897 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
898 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
899 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
900 "Vector type is unsupported for lsx!");
901 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
902 "Two operands have different types!");
903 assert(VT.getVectorNumElements() == Mask.size() &&
904 "Unexpected mask size for shuffle!");
905 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
906
907 SDValue Result;
908 // TODO: Add more comparison patterns.
909 if (V2.isUndef()) {
910 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
911 return Result;
912 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
913 return Result;
914
915 // TODO: This comment may be enabled in the future to better match the
916 // pattern for instruction selection.
917 /* V2 = V1; */
918 }
919
920 // It is recommended not to change the pattern comparison order for better
921 // performance.
922 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
923 return Result;
924 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
925 return Result;
926 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
927 return Result;
928 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
929 return Result;
930 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
931 return Result;
932 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
933 return Result;
934 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
935 return Result;
936
937 return SDValue();
938}
939
940/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
941///
942/// It is a XVREPLVEI when the mask is:
943/// <x, x, x, ..., x+n, x+n, x+n, ...>
944/// where the number of x is equal to n and n is half the length of vector.
945///
946/// When undef's appear in the mask they are treated as if they were whatever
947/// value is necessary in order to fit the above form.
949 ArrayRef<int> Mask, MVT VT,
950 SDValue V1, SDValue V2,
951 SelectionDAG &DAG) {
952 int SplatIndex = -1;
953 for (const auto &M : Mask) {
954 if (M != -1) {
955 SplatIndex = M;
956 break;
957 }
958 }
959
960 if (SplatIndex == -1)
961 return DAG.getUNDEF(VT);
962
963 const auto &Begin = Mask.begin();
964 const auto &End = Mask.end();
965 unsigned HalfSize = Mask.size() / 2;
966
967 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
968 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
969 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
970 0)) {
971 APInt Imm(64, SplatIndex);
972 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
973 DAG.getConstant(Imm, DL, MVT::i64));
974 }
975
976 return SDValue();
977}
978
979/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
981 MVT VT, SDValue V1, SDValue V2,
982 SelectionDAG &DAG) {
983 // When the size is less than or equal to 4, lower cost instructions may be
984 // used.
985 if (Mask.size() <= 4)
986 return SDValue();
987 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
988}
989
990/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
992 MVT VT, SDValue V1, SDValue V2,
993 SelectionDAG &DAG) {
994 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
995}
996
997/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
999 MVT VT, SDValue V1, SDValue V2,
1000 SelectionDAG &DAG) {
1001 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1002}
1003
1004/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1006 MVT VT, SDValue V1, SDValue V2,
1007 SelectionDAG &DAG) {
1008
1009 const auto &Begin = Mask.begin();
1010 const auto &End = Mask.end();
1011 unsigned HalfSize = Mask.size() / 2;
1012 unsigned LeftSize = HalfSize / 2;
1013 SDValue OriV1 = V1, OriV2 = V2;
1014
1015 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1016 1) &&
1017 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1018 V1 = OriV1;
1019 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1020 Mask.size() + HalfSize - LeftSize, 1) &&
1021 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1022 Mask.size() + HalfSize + LeftSize, 1))
1023 V1 = OriV2;
1024 else
1025 return SDValue();
1026
1027 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1028 1) &&
1029 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1030 1))
1031 V2 = OriV1;
1032 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1033 Mask.size() + HalfSize - LeftSize, 1) &&
1034 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1035 Mask.size() + HalfSize + LeftSize, 1))
1036 V2 = OriV2;
1037 else
1038 return SDValue();
1039
1040 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1041}
1042
1043/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1045 MVT VT, SDValue V1, SDValue V2,
1046 SelectionDAG &DAG) {
1047
1048 const auto &Begin = Mask.begin();
1049 const auto &End = Mask.end();
1050 unsigned HalfSize = Mask.size() / 2;
1051 SDValue OriV1 = V1, OriV2 = V2;
1052
1053 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1054 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1055 V1 = OriV1;
1056 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1057 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1058 Mask.size() + HalfSize, 1))
1059 V1 = OriV2;
1060 else
1061 return SDValue();
1062
1063 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1064 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1065 V2 = OriV1;
1066 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1067 1) &&
1068 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1069 Mask.size() + HalfSize, 1))
1070 V2 = OriV2;
1071 else
1072 return SDValue();
1073
1074 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1075}
1076
1077/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1079 MVT VT, SDValue V1, SDValue V2,
1080 SelectionDAG &DAG) {
1081
1082 const auto &Begin = Mask.begin();
1083 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1084 const auto &Mid = Mask.begin() + Mask.size() / 2;
1085 const auto &RightMid = Mask.end() - Mask.size() / 4;
1086 const auto &End = Mask.end();
1087 unsigned HalfSize = Mask.size() / 2;
1088 SDValue OriV1 = V1, OriV2 = V2;
1089
1090 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1091 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1092 V1 = OriV1;
1093 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1094 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1095 V1 = OriV2;
1096 else
1097 return SDValue();
1098
1099 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1100 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1101 V2 = OriV1;
1102 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1103 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1104 V2 = OriV2;
1105
1106 else
1107 return SDValue();
1108
1109 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1110}
1111
1112/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1114 MVT VT, SDValue V1, SDValue V2,
1115 SelectionDAG &DAG) {
1116
1117 const auto &Begin = Mask.begin();
1118 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1119 const auto &Mid = Mask.begin() + Mask.size() / 2;
1120 const auto &RightMid = Mask.end() - Mask.size() / 4;
1121 const auto &End = Mask.end();
1122 unsigned HalfSize = Mask.size() / 2;
1123 SDValue OriV1 = V1, OriV2 = V2;
1124
1125 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1126 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1127 V1 = OriV1;
1128 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1129 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1130 2))
1131 V1 = OriV2;
1132 else
1133 return SDValue();
1134
1135 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1136 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1137 V2 = OriV1;
1138 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1139 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1140 2))
1141 V2 = OriV2;
1142 else
1143 return SDValue();
1144
1145 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1146}
1147
1148/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1150 MVT VT, SDValue V1, SDValue V2,
1151 SelectionDAG &DAG) {
1152
1153 int MaskSize = Mask.size();
1154 int HalfSize = Mask.size() / 2;
1155 const auto &Begin = Mask.begin();
1156 const auto &Mid = Mask.begin() + HalfSize;
1157 const auto &End = Mask.end();
1158
1159 // VECTOR_SHUFFLE concatenates the vectors:
1160 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1161 // shuffling ->
1162 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1163 //
1164 // XVSHUF concatenates the vectors:
1165 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1166 // shuffling ->
1167 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1168 SmallVector<SDValue, 8> MaskAlloc;
1169 for (auto it = Begin; it < Mid; it++) {
1170 if (*it < 0) // UNDEF
1171 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1172 else if ((*it >= 0 && *it < HalfSize) ||
1173 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1174 int M = *it < HalfSize ? *it : *it - HalfSize;
1175 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1176 } else
1177 return SDValue();
1178 }
1179 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1180
1181 for (auto it = Mid; it < End; it++) {
1182 if (*it < 0) // UNDEF
1183 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1184 else if ((*it >= HalfSize && *it < MaskSize) ||
1185 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1186 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1187 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1188 } else
1189 return SDValue();
1190 }
1191 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1192
1193 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1194 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1195 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1196}
1197
1198/// Shuffle vectors by lane to generate more optimized instructions.
1199/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1200///
1201/// Therefore, except for the following four cases, other cases are regarded
1202/// as cross-lane shuffles, where optimization is relatively limited.
1203///
1204/// - Shuffle high, low lanes of two inputs vector
1205/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1206/// - Shuffle low, high lanes of two inputs vector
1207/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1208/// - Shuffle low, low lanes of two inputs vector
1209/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1210/// - Shuffle high, high lanes of two inputs vector
1211/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1212///
1213/// The first case is the closest to LoongArch instructions and the other
1214/// cases need to be converted to it for processing.
1215///
1216/// This function may modify V1, V2 and Mask
1218 MutableArrayRef<int> Mask, MVT VT,
1219 SDValue &V1, SDValue &V2,
1220 SelectionDAG &DAG) {
1221
1222 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1223
1224 int MaskSize = Mask.size();
1225 int HalfSize = Mask.size() / 2;
1226
1227 HalfMaskType preMask = None, postMask = None;
1228
1229 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1230 return M < 0 || (M >= 0 && M < HalfSize) ||
1231 (M >= MaskSize && M < MaskSize + HalfSize);
1232 }))
1233 preMask = HighLaneTy;
1234 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1235 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1236 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1237 }))
1238 preMask = LowLaneTy;
1239
1240 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1241 return M < 0 || (M >= 0 && M < HalfSize) ||
1242 (M >= MaskSize && M < MaskSize + HalfSize);
1243 }))
1244 postMask = HighLaneTy;
1245 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1246 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1247 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1248 }))
1249 postMask = LowLaneTy;
1250
1251 // The pre-half of mask is high lane type, and the post-half of mask
1252 // is low lane type, which is closest to the LoongArch instructions.
1253 //
1254 // Note: In the LoongArch architecture, the high lane of mask corresponds
1255 // to the lower 128-bit of vector register, and the low lane of mask
1256 // corresponds the higher 128-bit of vector register.
1257 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1258 return;
1259 }
1260 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1261 V1 = DAG.getBitcast(MVT::v4i64, V1);
1262 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1263 DAG.getConstant(0b01001110, DL, MVT::i64));
1264 V1 = DAG.getBitcast(VT, V1);
1265
1266 if (!V2.isUndef()) {
1267 V2 = DAG.getBitcast(MVT::v4i64, V2);
1268 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1269 DAG.getConstant(0b01001110, DL, MVT::i64));
1270 V2 = DAG.getBitcast(VT, V2);
1271 }
1272
1273 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1274 *it = *it < 0 ? *it : *it - HalfSize;
1275 }
1276 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1277 *it = *it < 0 ? *it : *it + HalfSize;
1278 }
1279 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1280 V1 = DAG.getBitcast(MVT::v4i64, V1);
1281 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1282 DAG.getConstant(0b11101110, DL, MVT::i64));
1283 V1 = DAG.getBitcast(VT, V1);
1284
1285 if (!V2.isUndef()) {
1286 V2 = DAG.getBitcast(MVT::v4i64, V2);
1287 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1288 DAG.getConstant(0b11101110, DL, MVT::i64));
1289 V2 = DAG.getBitcast(VT, V2);
1290 }
1291
1292 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1293 *it = *it < 0 ? *it : *it - HalfSize;
1294 }
1295 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1296 V1 = DAG.getBitcast(MVT::v4i64, V1);
1297 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1298 DAG.getConstant(0b01000100, DL, MVT::i64));
1299 V1 = DAG.getBitcast(VT, V1);
1300
1301 if (!V2.isUndef()) {
1302 V2 = DAG.getBitcast(MVT::v4i64, V2);
1303 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1304 DAG.getConstant(0b01000100, DL, MVT::i64));
1305 V2 = DAG.getBitcast(VT, V2);
1306 }
1307
1308 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1309 *it = *it < 0 ? *it : *it + HalfSize;
1310 }
1311 } else { // cross-lane
1312 return;
1313 }
1314}
1315
1316/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1317///
1318/// This routine breaks down the specific type of 256-bit shuffle and
1319/// dispatches to the lowering routines accordingly.
1321 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1322 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1323 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1324 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1325 "Vector type is unsupported for lasx!");
1326 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1327 "Two operands have different types!");
1328 assert(VT.getVectorNumElements() == Mask.size() &&
1329 "Unexpected mask size for shuffle!");
1330 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1331 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1332
1333 // canonicalize non cross-lane shuffle vector
1334 SmallVector<int> NewMask(Mask);
1335 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1336
1337 SDValue Result;
1338 // TODO: Add more comparison patterns.
1339 if (V2.isUndef()) {
1340 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1341 return Result;
1342 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1343 return Result;
1344
1345 // TODO: This comment may be enabled in the future to better match the
1346 // pattern for instruction selection.
1347 /* V2 = V1; */
1348 }
1349
1350 // It is recommended not to change the pattern comparison order for better
1351 // performance.
1352 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1353 return Result;
1354 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1355 return Result;
1356 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1357 return Result;
1358 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1359 return Result;
1360 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1361 return Result;
1362 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1363 return Result;
1364 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1365 return Result;
1366
1367 return SDValue();
1368}
1369
1370SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1371 SelectionDAG &DAG) const {
1372 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1373 ArrayRef<int> OrigMask = SVOp->getMask();
1374 SDValue V1 = Op.getOperand(0);
1375 SDValue V2 = Op.getOperand(1);
1376 MVT VT = Op.getSimpleValueType();
1377 int NumElements = VT.getVectorNumElements();
1378 SDLoc DL(Op);
1379
1380 bool V1IsUndef = V1.isUndef();
1381 bool V2IsUndef = V2.isUndef();
1382 if (V1IsUndef && V2IsUndef)
1383 return DAG.getUNDEF(VT);
1384
1385 // When we create a shuffle node we put the UNDEF node to second operand,
1386 // but in some cases the first operand may be transformed to UNDEF.
1387 // In this case we should just commute the node.
1388 if (V1IsUndef)
1389 return DAG.getCommutedVectorShuffle(*SVOp);
1390
1391 // Check for non-undef masks pointing at an undef vector and make the masks
1392 // undef as well. This makes it easier to match the shuffle based solely on
1393 // the mask.
1394 if (V2IsUndef &&
1395 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1396 SmallVector<int, 8> NewMask(OrigMask);
1397 for (int &M : NewMask)
1398 if (M >= NumElements)
1399 M = -1;
1400 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1401 }
1402
1403 // Check for illegal shuffle mask element index values.
1404 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1405 (void)MaskUpperLimit;
1406 assert(llvm::all_of(OrigMask,
1407 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1408 "Out of bounds shuffle index");
1409
1410 // For each vector width, delegate to a specialized lowering routine.
1411 if (VT.is128BitVector())
1412 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1413
1414 if (VT.is256BitVector())
1415 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1416
1417 return SDValue();
1418}
1419
1420static bool isConstantOrUndef(const SDValue Op) {
1421 if (Op->isUndef())
1422 return true;
1423 if (isa<ConstantSDNode>(Op))
1424 return true;
1425 if (isa<ConstantFPSDNode>(Op))
1426 return true;
1427 return false;
1428}
1429
1431 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1432 if (isConstantOrUndef(Op->getOperand(i)))
1433 return true;
1434 return false;
1435}
1436
1437SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1438 SelectionDAG &DAG) const {
1439 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1440 EVT ResTy = Op->getValueType(0);
1441 SDLoc DL(Op);
1442 APInt SplatValue, SplatUndef;
1443 unsigned SplatBitSize;
1444 bool HasAnyUndefs;
1445 bool Is128Vec = ResTy.is128BitVector();
1446 bool Is256Vec = ResTy.is256BitVector();
1447
1448 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1449 (!Subtarget.hasExtLASX() || !Is256Vec))
1450 return SDValue();
1451
1452 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1453 /*MinSplatBits=*/8) &&
1454 SplatBitSize <= 64) {
1455 // We can only cope with 8, 16, 32, or 64-bit elements.
1456 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1457 SplatBitSize != 64)
1458 return SDValue();
1459
1460 EVT ViaVecTy;
1461
1462 switch (SplatBitSize) {
1463 default:
1464 return SDValue();
1465 case 8:
1466 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1467 break;
1468 case 16:
1469 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1470 break;
1471 case 32:
1472 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1473 break;
1474 case 64:
1475 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1476 break;
1477 }
1478
1479 // SelectionDAG::getConstant will promote SplatValue appropriately.
1480 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1481
1482 // Bitcast to the type we originally wanted.
1483 if (ViaVecTy != ResTy)
1484 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1485
1486 return Result;
1487 }
1488
1489 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1490 return Op;
1491
1493 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1494 // The resulting code is the same length as the expansion, but it doesn't
1495 // use memory operations.
1496 EVT ResTy = Node->getValueType(0);
1497
1498 assert(ResTy.isVector());
1499
1500 unsigned NumElts = ResTy.getVectorNumElements();
1501 SDValue Vector = DAG.getUNDEF(ResTy);
1502 for (unsigned i = 0; i < NumElts; ++i) {
1504 Node->getOperand(i),
1505 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1506 }
1507 return Vector;
1508 }
1509
1510 return SDValue();
1511}
1512
1513SDValue
1514LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1515 SelectionDAG &DAG) const {
1516 EVT VecTy = Op->getOperand(0)->getValueType(0);
1517 SDValue Idx = Op->getOperand(1);
1518 EVT EltTy = VecTy.getVectorElementType();
1519 unsigned NumElts = VecTy.getVectorNumElements();
1520
1521 if (isa<ConstantSDNode>(Idx) &&
1522 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1523 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1524 return Op;
1525
1526 return SDValue();
1527}
1528
1529SDValue
1530LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1531 SelectionDAG &DAG) const {
1532 if (isa<ConstantSDNode>(Op->getOperand(2)))
1533 return Op;
1534 return SDValue();
1535}
1536
1537SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1538 SelectionDAG &DAG) const {
1539 SDLoc DL(Op);
1540 SyncScope::ID FenceSSID =
1541 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1542
1543 // singlethread fences only synchronize with signal handlers on the same
1544 // thread and thus only need to preserve instruction order, not actually
1545 // enforce memory ordering.
1546 if (FenceSSID == SyncScope::SingleThread)
1547 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1548 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1549
1550 return Op;
1551}
1552
1553SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1554 SelectionDAG &DAG) const {
1555
1556 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1557 DAG.getContext()->emitError(
1558 "On LA64, only 64-bit registers can be written.");
1559 return Op.getOperand(0);
1560 }
1561
1562 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1563 DAG.getContext()->emitError(
1564 "On LA32, only 32-bit registers can be written.");
1565 return Op.getOperand(0);
1566 }
1567
1568 return Op;
1569}
1570
1571SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1572 SelectionDAG &DAG) const {
1573 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1574 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1575 "be a constant integer");
1576 return SDValue();
1577 }
1578
1581 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1582 EVT VT = Op.getValueType();
1583 SDLoc DL(Op);
1584 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1585 unsigned Depth = Op.getConstantOperandVal(0);
1586 int GRLenInBytes = Subtarget.getGRLen() / 8;
1587
1588 while (Depth--) {
1589 int Offset = -(GRLenInBytes * 2);
1590 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1591 DAG.getSignedConstant(Offset, DL, VT));
1592 FrameAddr =
1593 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1594 }
1595 return FrameAddr;
1596}
1597
1598SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1599 SelectionDAG &DAG) const {
1601 return SDValue();
1602
1603 // Currently only support lowering return address for current frame.
1604 if (Op.getConstantOperandVal(0) != 0) {
1605 DAG.getContext()->emitError(
1606 "return address can only be determined for the current frame");
1607 return SDValue();
1608 }
1609
1612 MVT GRLenVT = Subtarget.getGRLenVT();
1613
1614 // Return the value of the return address register, marking it an implicit
1615 // live-in.
1616 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1617 getRegClassFor(GRLenVT));
1618 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1619}
1620
1621SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1622 SelectionDAG &DAG) const {
1624 auto Size = Subtarget.getGRLen() / 8;
1625 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1626 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1627}
1628
1629SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1630 SelectionDAG &DAG) const {
1632 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1633
1634 SDLoc DL(Op);
1635 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1637
1638 // vastart just stores the address of the VarArgsFrameIndex slot into the
1639 // memory location argument.
1640 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1641 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1642 MachinePointerInfo(SV));
1643}
1644
1645SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1646 SelectionDAG &DAG) const {
1647 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1648 !Subtarget.hasBasicD() && "unexpected target features");
1649
1650 SDLoc DL(Op);
1651 SDValue Op0 = Op.getOperand(0);
1652 if (Op0->getOpcode() == ISD::AND) {
1653 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1654 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1655 return Op;
1656 }
1657
1658 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1659 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1660 Op0.getConstantOperandVal(2) == UINT64_C(0))
1661 return Op;
1662
1663 if (Op0.getOpcode() == ISD::AssertZext &&
1664 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1665 return Op;
1666
1667 EVT OpVT = Op0.getValueType();
1668 EVT RetVT = Op.getValueType();
1669 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1670 MakeLibCallOptions CallOptions;
1671 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1672 SDValue Chain = SDValue();
1674 std::tie(Result, Chain) =
1675 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1676 return Result;
1677}
1678
1679SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1680 SelectionDAG &DAG) const {
1681 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1682 !Subtarget.hasBasicD() && "unexpected target features");
1683
1684 SDLoc DL(Op);
1685 SDValue Op0 = Op.getOperand(0);
1686
1687 if ((Op0.getOpcode() == ISD::AssertSext ||
1689 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1690 return Op;
1691
1692 EVT OpVT = Op0.getValueType();
1693 EVT RetVT = Op.getValueType();
1694 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1695 MakeLibCallOptions CallOptions;
1696 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1697 SDValue Chain = SDValue();
1699 std::tie(Result, Chain) =
1700 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1701 return Result;
1702}
1703
1704SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1705 SelectionDAG &DAG) const {
1706
1707 SDLoc DL(Op);
1708 SDValue Op0 = Op.getOperand(0);
1709
1710 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1711 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1712 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1713 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1714 }
1715 return Op;
1716}
1717
1718SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1719 SelectionDAG &DAG) const {
1720
1721 SDLoc DL(Op);
1722 SDValue Op0 = Op.getOperand(0);
1723
1724 if (Op0.getValueType() == MVT::f16)
1725 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
1726
1727 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1728 !Subtarget.hasBasicD()) {
1729 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
1730 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1731 }
1732
1733 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1734 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
1735 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1736}
1737
1739 SelectionDAG &DAG, unsigned Flags) {
1740 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1741}
1742
1744 SelectionDAG &DAG, unsigned Flags) {
1745 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1746 Flags);
1747}
1748
1750 SelectionDAG &DAG, unsigned Flags) {
1751 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1752 N->getOffset(), Flags);
1753}
1754
1756 SelectionDAG &DAG, unsigned Flags) {
1757 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1758}
1759
1760template <class NodeTy>
1761SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1763 bool IsLocal) const {
1764 SDLoc DL(N);
1765 EVT Ty = getPointerTy(DAG.getDataLayout());
1766 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1767 SDValue Load;
1768
1769 switch (M) {
1770 default:
1771 report_fatal_error("Unsupported code model");
1772
1773 case CodeModel::Large: {
1774 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1775
1776 // This is not actually used, but is necessary for successfully matching
1777 // the PseudoLA_*_LARGE nodes.
1778 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1779 if (IsLocal) {
1780 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1781 // eventually becomes the desired 5-insn code sequence.
1782 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1783 Tmp, Addr),
1784 0);
1785 } else {
1786 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1787 // eventually becomes the desired 5-insn code sequence.
1788 Load = SDValue(
1789 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1790 0);
1791 }
1792 break;
1793 }
1794
1795 case CodeModel::Small:
1796 case CodeModel::Medium:
1797 if (IsLocal) {
1798 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1799 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1800 Load = SDValue(
1801 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1802 } else {
1803 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1804 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1805 Load =
1806 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1807 }
1808 }
1809
1810 if (!IsLocal) {
1811 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1817 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1818 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1819 }
1820
1821 return Load;
1822}
1823
1824SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1825 SelectionDAG &DAG) const {
1826 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1827 DAG.getTarget().getCodeModel());
1828}
1829
1830SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1831 SelectionDAG &DAG) const {
1832 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1833 DAG.getTarget().getCodeModel());
1834}
1835
1836SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1837 SelectionDAG &DAG) const {
1838 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1839 DAG.getTarget().getCodeModel());
1840}
1841
1842SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1843 SelectionDAG &DAG) const {
1844 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1845 assert(N->getOffset() == 0 && "unexpected offset in global node");
1846 auto CM = DAG.getTarget().getCodeModel();
1847 const GlobalValue *GV = N->getGlobal();
1848
1849 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1850 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1851 CM = *GCM;
1852 }
1853
1854 return getAddr(N, DAG, CM, GV->isDSOLocal());
1855}
1856
1857SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1858 SelectionDAG &DAG,
1859 unsigned Opc, bool UseGOT,
1860 bool Large) const {
1861 SDLoc DL(N);
1862 EVT Ty = getPointerTy(DAG.getDataLayout());
1863 MVT GRLenVT = Subtarget.getGRLenVT();
1864
1865 // This is not actually used, but is necessary for successfully matching the
1866 // PseudoLA_*_LARGE nodes.
1867 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1868 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1869
1870 // Only IE needs an extra argument for large code model.
1871 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
1872 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1873 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1874
1875 // If it is LE for normal/medium code model, the add tp operation will occur
1876 // during the pseudo-instruction expansion.
1877 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
1878 return Offset;
1879
1880 if (UseGOT) {
1881 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1887 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1888 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1889 }
1890
1891 // Add the thread pointer.
1892 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1893 DAG.getRegister(LoongArch::R2, GRLenVT));
1894}
1895
1896SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1897 SelectionDAG &DAG,
1898 unsigned Opc,
1899 bool Large) const {
1900 SDLoc DL(N);
1901 EVT Ty = getPointerTy(DAG.getDataLayout());
1902 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1903
1904 // This is not actually used, but is necessary for successfully matching the
1905 // PseudoLA_*_LARGE nodes.
1906 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1907
1908 // Use a PC-relative addressing mode to access the dynamic GOT address.
1909 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1910 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1911 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1912
1913 // Prepare argument list to generate call.
1915 ArgListEntry Entry;
1916 Entry.Node = Load;
1917 Entry.Ty = CallTy;
1918 Args.push_back(Entry);
1919
1920 // Setup call to __tls_get_addr.
1922 CLI.setDebugLoc(DL)
1923 .setChain(DAG.getEntryNode())
1924 .setLibCallee(CallingConv::C, CallTy,
1925 DAG.getExternalSymbol("__tls_get_addr", Ty),
1926 std::move(Args));
1927
1928 return LowerCallTo(CLI).first;
1929}
1930
1931SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1932 SelectionDAG &DAG, unsigned Opc,
1933 bool Large) const {
1934 SDLoc DL(N);
1935 EVT Ty = getPointerTy(DAG.getDataLayout());
1936 const GlobalValue *GV = N->getGlobal();
1937
1938 // This is not actually used, but is necessary for successfully matching the
1939 // PseudoLA_*_LARGE nodes.
1940 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1941
1942 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1943 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1944 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1945 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1946 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1947}
1948
1949SDValue
1950LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1951 SelectionDAG &DAG) const {
1954 report_fatal_error("In GHC calling convention TLS is not supported");
1955
1956 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1957 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1958
1959 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1960 assert(N->getOffset() == 0 && "unexpected offset in global node");
1961
1962 if (DAG.getTarget().useEmulatedTLS())
1963 report_fatal_error("the emulated TLS is prohibited",
1964 /*GenCrashDiag=*/false);
1965
1966 bool IsDesc = DAG.getTarget().useTLSDESC();
1967
1968 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1970 // In this model, application code calls the dynamic linker function
1971 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1972 // runtime.
1973 if (!IsDesc)
1974 return getDynamicTLSAddr(N, DAG,
1975 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1976 : LoongArch::PseudoLA_TLS_GD,
1977 Large);
1978 break;
1980 // Same as GeneralDynamic, except for assembly modifiers and relocation
1981 // records.
1982 if (!IsDesc)
1983 return getDynamicTLSAddr(N, DAG,
1984 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1985 : LoongArch::PseudoLA_TLS_LD,
1986 Large);
1987 break;
1989 // This model uses the GOT to resolve TLS offsets.
1990 return getStaticTLSAddr(N, DAG,
1991 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1992 : LoongArch::PseudoLA_TLS_IE,
1993 /*UseGOT=*/true, Large);
1995 // This model is used when static linking as the TLS offsets are resolved
1996 // during program linking.
1997 //
1998 // This node doesn't need an extra argument for the large code model.
1999 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
2000 /*UseGOT=*/false, Large);
2001 }
2002
2003 return getTLSDescAddr(N, DAG,
2004 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
2005 : LoongArch::PseudoLA_TLS_DESC,
2006 Large);
2007}
2008
2009template <unsigned N>
2011 SelectionDAG &DAG, bool IsSigned = false) {
2012 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
2013 // Check the ImmArg.
2014 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2015 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2016 DAG.getContext()->emitError(Op->getOperationName(0) +
2017 ": argument out of range.");
2018 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
2019 }
2020 return SDValue();
2021}
2022
2023SDValue
2024LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
2025 SelectionDAG &DAG) const {
2026 SDLoc DL(Op);
2027 switch (Op.getConstantOperandVal(0)) {
2028 default:
2029 return SDValue(); // Don't custom lower most intrinsics.
2030 case Intrinsic::thread_pointer: {
2031 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2032 return DAG.getRegister(LoongArch::R2, PtrVT);
2033 }
2034 case Intrinsic::loongarch_lsx_vpickve2gr_d:
2035 case Intrinsic::loongarch_lsx_vpickve2gr_du:
2036 case Intrinsic::loongarch_lsx_vreplvei_d:
2037 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
2038 return checkIntrinsicImmArg<1>(Op, 2, DAG);
2039 case Intrinsic::loongarch_lsx_vreplvei_w:
2040 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
2041 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
2042 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
2043 case Intrinsic::loongarch_lasx_xvpickve_d:
2044 case Intrinsic::loongarch_lasx_xvpickve_d_f:
2045 return checkIntrinsicImmArg<2>(Op, 2, DAG);
2046 case Intrinsic::loongarch_lasx_xvinsve0_d:
2047 return checkIntrinsicImmArg<2>(Op, 3, DAG);
2048 case Intrinsic::loongarch_lsx_vsat_b:
2049 case Intrinsic::loongarch_lsx_vsat_bu:
2050 case Intrinsic::loongarch_lsx_vrotri_b:
2051 case Intrinsic::loongarch_lsx_vsllwil_h_b:
2052 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
2053 case Intrinsic::loongarch_lsx_vsrlri_b:
2054 case Intrinsic::loongarch_lsx_vsrari_b:
2055 case Intrinsic::loongarch_lsx_vreplvei_h:
2056 case Intrinsic::loongarch_lasx_xvsat_b:
2057 case Intrinsic::loongarch_lasx_xvsat_bu:
2058 case Intrinsic::loongarch_lasx_xvrotri_b:
2059 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
2060 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
2061 case Intrinsic::loongarch_lasx_xvsrlri_b:
2062 case Intrinsic::loongarch_lasx_xvsrari_b:
2063 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
2064 case Intrinsic::loongarch_lasx_xvpickve_w:
2065 case Intrinsic::loongarch_lasx_xvpickve_w_f:
2066 return checkIntrinsicImmArg<3>(Op, 2, DAG);
2067 case Intrinsic::loongarch_lasx_xvinsve0_w:
2068 return checkIntrinsicImmArg<3>(Op, 3, DAG);
2069 case Intrinsic::loongarch_lsx_vsat_h:
2070 case Intrinsic::loongarch_lsx_vsat_hu:
2071 case Intrinsic::loongarch_lsx_vrotri_h:
2072 case Intrinsic::loongarch_lsx_vsllwil_w_h:
2073 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
2074 case Intrinsic::loongarch_lsx_vsrlri_h:
2075 case Intrinsic::loongarch_lsx_vsrari_h:
2076 case Intrinsic::loongarch_lsx_vreplvei_b:
2077 case Intrinsic::loongarch_lasx_xvsat_h:
2078 case Intrinsic::loongarch_lasx_xvsat_hu:
2079 case Intrinsic::loongarch_lasx_xvrotri_h:
2080 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2081 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2082 case Intrinsic::loongarch_lasx_xvsrlri_h:
2083 case Intrinsic::loongarch_lasx_xvsrari_h:
2084 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2085 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2086 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2087 case Intrinsic::loongarch_lsx_vsrani_b_h:
2088 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2089 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2090 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2091 case Intrinsic::loongarch_lsx_vssrani_b_h:
2092 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2093 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2094 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2095 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2096 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2097 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2098 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2099 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2100 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2101 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2102 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2103 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2104 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2105 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2106 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2107 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2108 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2109 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2110 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2111 case Intrinsic::loongarch_lsx_vsat_w:
2112 case Intrinsic::loongarch_lsx_vsat_wu:
2113 case Intrinsic::loongarch_lsx_vrotri_w:
2114 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2115 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2116 case Intrinsic::loongarch_lsx_vsrlri_w:
2117 case Intrinsic::loongarch_lsx_vsrari_w:
2118 case Intrinsic::loongarch_lsx_vslei_bu:
2119 case Intrinsic::loongarch_lsx_vslei_hu:
2120 case Intrinsic::loongarch_lsx_vslei_wu:
2121 case Intrinsic::loongarch_lsx_vslei_du:
2122 case Intrinsic::loongarch_lsx_vslti_bu:
2123 case Intrinsic::loongarch_lsx_vslti_hu:
2124 case Intrinsic::loongarch_lsx_vslti_wu:
2125 case Intrinsic::loongarch_lsx_vslti_du:
2126 case Intrinsic::loongarch_lsx_vbsll_v:
2127 case Intrinsic::loongarch_lsx_vbsrl_v:
2128 case Intrinsic::loongarch_lasx_xvsat_w:
2129 case Intrinsic::loongarch_lasx_xvsat_wu:
2130 case Intrinsic::loongarch_lasx_xvrotri_w:
2131 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2132 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2133 case Intrinsic::loongarch_lasx_xvsrlri_w:
2134 case Intrinsic::loongarch_lasx_xvsrari_w:
2135 case Intrinsic::loongarch_lasx_xvslei_bu:
2136 case Intrinsic::loongarch_lasx_xvslei_hu:
2137 case Intrinsic::loongarch_lasx_xvslei_wu:
2138 case Intrinsic::loongarch_lasx_xvslei_du:
2139 case Intrinsic::loongarch_lasx_xvslti_bu:
2140 case Intrinsic::loongarch_lasx_xvslti_hu:
2141 case Intrinsic::loongarch_lasx_xvslti_wu:
2142 case Intrinsic::loongarch_lasx_xvslti_du:
2143 case Intrinsic::loongarch_lasx_xvbsll_v:
2144 case Intrinsic::loongarch_lasx_xvbsrl_v:
2145 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2146 case Intrinsic::loongarch_lsx_vseqi_b:
2147 case Intrinsic::loongarch_lsx_vseqi_h:
2148 case Intrinsic::loongarch_lsx_vseqi_w:
2149 case Intrinsic::loongarch_lsx_vseqi_d:
2150 case Intrinsic::loongarch_lsx_vslei_b:
2151 case Intrinsic::loongarch_lsx_vslei_h:
2152 case Intrinsic::loongarch_lsx_vslei_w:
2153 case Intrinsic::loongarch_lsx_vslei_d:
2154 case Intrinsic::loongarch_lsx_vslti_b:
2155 case Intrinsic::loongarch_lsx_vslti_h:
2156 case Intrinsic::loongarch_lsx_vslti_w:
2157 case Intrinsic::loongarch_lsx_vslti_d:
2158 case Intrinsic::loongarch_lasx_xvseqi_b:
2159 case Intrinsic::loongarch_lasx_xvseqi_h:
2160 case Intrinsic::loongarch_lasx_xvseqi_w:
2161 case Intrinsic::loongarch_lasx_xvseqi_d:
2162 case Intrinsic::loongarch_lasx_xvslei_b:
2163 case Intrinsic::loongarch_lasx_xvslei_h:
2164 case Intrinsic::loongarch_lasx_xvslei_w:
2165 case Intrinsic::loongarch_lasx_xvslei_d:
2166 case Intrinsic::loongarch_lasx_xvslti_b:
2167 case Intrinsic::loongarch_lasx_xvslti_h:
2168 case Intrinsic::loongarch_lasx_xvslti_w:
2169 case Intrinsic::loongarch_lasx_xvslti_d:
2170 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2171 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2172 case Intrinsic::loongarch_lsx_vsrani_h_w:
2173 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2174 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2175 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2176 case Intrinsic::loongarch_lsx_vssrani_h_w:
2177 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2178 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2179 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2180 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2181 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2182 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2183 case Intrinsic::loongarch_lsx_vfrstpi_b:
2184 case Intrinsic::loongarch_lsx_vfrstpi_h:
2185 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2186 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2187 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2188 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2189 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2190 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2191 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2192 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2193 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2194 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2195 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2196 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2197 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2198 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2199 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2200 case Intrinsic::loongarch_lsx_vsat_d:
2201 case Intrinsic::loongarch_lsx_vsat_du:
2202 case Intrinsic::loongarch_lsx_vrotri_d:
2203 case Intrinsic::loongarch_lsx_vsrlri_d:
2204 case Intrinsic::loongarch_lsx_vsrari_d:
2205 case Intrinsic::loongarch_lasx_xvsat_d:
2206 case Intrinsic::loongarch_lasx_xvsat_du:
2207 case Intrinsic::loongarch_lasx_xvrotri_d:
2208 case Intrinsic::loongarch_lasx_xvsrlri_d:
2209 case Intrinsic::loongarch_lasx_xvsrari_d:
2210 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2211 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2212 case Intrinsic::loongarch_lsx_vsrani_w_d:
2213 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2214 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2215 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2216 case Intrinsic::loongarch_lsx_vssrani_w_d:
2217 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2218 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2219 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2220 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2221 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2222 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2223 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2224 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2225 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2226 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2227 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2228 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2229 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2230 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2231 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2232 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2233 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2234 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2235 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2236 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2237 case Intrinsic::loongarch_lsx_vsrani_d_q:
2238 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2239 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2240 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2241 case Intrinsic::loongarch_lsx_vssrani_d_q:
2242 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2243 case Intrinsic::loongarch_lsx_vssrani_du_q:
2244 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2245 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2246 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2247 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2248 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2249 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2250 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2251 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2252 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2253 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2254 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2255 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2256 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2257 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2258 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2259 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2260 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2261 case Intrinsic::loongarch_lsx_vnori_b:
2262 case Intrinsic::loongarch_lsx_vshuf4i_b:
2263 case Intrinsic::loongarch_lsx_vshuf4i_h:
2264 case Intrinsic::loongarch_lsx_vshuf4i_w:
2265 case Intrinsic::loongarch_lasx_xvnori_b:
2266 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2267 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2268 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2269 case Intrinsic::loongarch_lasx_xvpermi_d:
2270 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2271 case Intrinsic::loongarch_lsx_vshuf4i_d:
2272 case Intrinsic::loongarch_lsx_vpermi_w:
2273 case Intrinsic::loongarch_lsx_vbitseli_b:
2274 case Intrinsic::loongarch_lsx_vextrins_b:
2275 case Intrinsic::loongarch_lsx_vextrins_h:
2276 case Intrinsic::loongarch_lsx_vextrins_w:
2277 case Intrinsic::loongarch_lsx_vextrins_d:
2278 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2279 case Intrinsic::loongarch_lasx_xvpermi_w:
2280 case Intrinsic::loongarch_lasx_xvpermi_q:
2281 case Intrinsic::loongarch_lasx_xvbitseli_b:
2282 case Intrinsic::loongarch_lasx_xvextrins_b:
2283 case Intrinsic::loongarch_lasx_xvextrins_h:
2284 case Intrinsic::loongarch_lasx_xvextrins_w:
2285 case Intrinsic::loongarch_lasx_xvextrins_d:
2286 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2287 case Intrinsic::loongarch_lsx_vrepli_b:
2288 case Intrinsic::loongarch_lsx_vrepli_h:
2289 case Intrinsic::loongarch_lsx_vrepli_w:
2290 case Intrinsic::loongarch_lsx_vrepli_d:
2291 case Intrinsic::loongarch_lasx_xvrepli_b:
2292 case Intrinsic::loongarch_lasx_xvrepli_h:
2293 case Intrinsic::loongarch_lasx_xvrepli_w:
2294 case Intrinsic::loongarch_lasx_xvrepli_d:
2295 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2296 case Intrinsic::loongarch_lsx_vldi:
2297 case Intrinsic::loongarch_lasx_xvldi:
2298 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2299 }
2300}
2301
2302// Helper function that emits error message for intrinsics with chain and return
2303// merge values of a UNDEF and the chain.
2305 StringRef ErrorMsg,
2306 SelectionDAG &DAG) {
2307 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2308 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2309 SDLoc(Op));
2310}
2311
2312SDValue
2313LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2314 SelectionDAG &DAG) const {
2315 SDLoc DL(Op);
2316 MVT GRLenVT = Subtarget.getGRLenVT();
2317 EVT VT = Op.getValueType();
2318 SDValue Chain = Op.getOperand(0);
2319 const StringRef ErrorMsgOOR = "argument out of range";
2320 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2321 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2322
2323 switch (Op.getConstantOperandVal(1)) {
2324 default:
2325 return Op;
2326 case Intrinsic::loongarch_crc_w_b_w:
2327 case Intrinsic::loongarch_crc_w_h_w:
2328 case Intrinsic::loongarch_crc_w_w_w:
2329 case Intrinsic::loongarch_crc_w_d_w:
2330 case Intrinsic::loongarch_crcc_w_b_w:
2331 case Intrinsic::loongarch_crcc_w_h_w:
2332 case Intrinsic::loongarch_crcc_w_w_w:
2333 case Intrinsic::loongarch_crcc_w_d_w:
2334 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2335 case Intrinsic::loongarch_csrrd_w:
2336 case Intrinsic::loongarch_csrrd_d: {
2337 unsigned Imm = Op.getConstantOperandVal(2);
2338 return !isUInt<14>(Imm)
2339 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2340 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2341 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2342 }
2343 case Intrinsic::loongarch_csrwr_w:
2344 case Intrinsic::loongarch_csrwr_d: {
2345 unsigned Imm = Op.getConstantOperandVal(3);
2346 return !isUInt<14>(Imm)
2347 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2348 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2349 {Chain, Op.getOperand(2),
2350 DAG.getConstant(Imm, DL, GRLenVT)});
2351 }
2352 case Intrinsic::loongarch_csrxchg_w:
2353 case Intrinsic::loongarch_csrxchg_d: {
2354 unsigned Imm = Op.getConstantOperandVal(4);
2355 return !isUInt<14>(Imm)
2356 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2357 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2358 {Chain, Op.getOperand(2), Op.getOperand(3),
2359 DAG.getConstant(Imm, DL, GRLenVT)});
2360 }
2361 case Intrinsic::loongarch_iocsrrd_d: {
2362 return DAG.getNode(
2363 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2364 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2365 }
2366#define IOCSRRD_CASE(NAME, NODE) \
2367 case Intrinsic::loongarch_##NAME: { \
2368 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2369 {Chain, Op.getOperand(2)}); \
2370 }
2371 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2372 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2373 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2374#undef IOCSRRD_CASE
2375 case Intrinsic::loongarch_cpucfg: {
2376 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2377 {Chain, Op.getOperand(2)});
2378 }
2379 case Intrinsic::loongarch_lddir_d: {
2380 unsigned Imm = Op.getConstantOperandVal(3);
2381 return !isUInt<8>(Imm)
2382 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2383 : Op;
2384 }
2385 case Intrinsic::loongarch_movfcsr2gr: {
2386 if (!Subtarget.hasBasicF())
2387 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2388 unsigned Imm = Op.getConstantOperandVal(2);
2389 return !isUInt<2>(Imm)
2390 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2391 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2392 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2393 }
2394 case Intrinsic::loongarch_lsx_vld:
2395 case Intrinsic::loongarch_lsx_vldrepl_b:
2396 case Intrinsic::loongarch_lasx_xvld:
2397 case Intrinsic::loongarch_lasx_xvldrepl_b:
2398 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2399 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2400 : SDValue();
2401 case Intrinsic::loongarch_lsx_vldrepl_h:
2402 case Intrinsic::loongarch_lasx_xvldrepl_h:
2403 return !isShiftedInt<11, 1>(
2404 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2406 Op, "argument out of range or not a multiple of 2", DAG)
2407 : SDValue();
2408 case Intrinsic::loongarch_lsx_vldrepl_w:
2409 case Intrinsic::loongarch_lasx_xvldrepl_w:
2410 return !isShiftedInt<10, 2>(
2411 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2413 Op, "argument out of range or not a multiple of 4", DAG)
2414 : SDValue();
2415 case Intrinsic::loongarch_lsx_vldrepl_d:
2416 case Intrinsic::loongarch_lasx_xvldrepl_d:
2417 return !isShiftedInt<9, 3>(
2418 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2420 Op, "argument out of range or not a multiple of 8", DAG)
2421 : SDValue();
2422 }
2423}
2424
2425// Helper function that emits error message for intrinsics with void return
2426// value and return the chain.
2428 SelectionDAG &DAG) {
2429
2430 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2431 return Op.getOperand(0);
2432}
2433
2434SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2435 SelectionDAG &DAG) const {
2436 SDLoc DL(Op);
2437 MVT GRLenVT = Subtarget.getGRLenVT();
2438 SDValue Chain = Op.getOperand(0);
2439 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2440 SDValue Op2 = Op.getOperand(2);
2441 const StringRef ErrorMsgOOR = "argument out of range";
2442 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2443 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2444 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2445
2446 switch (IntrinsicEnum) {
2447 default:
2448 // TODO: Add more Intrinsics.
2449 return SDValue();
2450 case Intrinsic::loongarch_cacop_d:
2451 case Intrinsic::loongarch_cacop_w: {
2452 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2453 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2454 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2455 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2456 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2457 unsigned Imm1 = Op2->getAsZExtVal();
2458 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2459 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2460 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2461 return Op;
2462 }
2463 case Intrinsic::loongarch_dbar: {
2464 unsigned Imm = Op2->getAsZExtVal();
2465 return !isUInt<15>(Imm)
2466 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2467 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2468 DAG.getConstant(Imm, DL, GRLenVT));
2469 }
2470 case Intrinsic::loongarch_ibar: {
2471 unsigned Imm = Op2->getAsZExtVal();
2472 return !isUInt<15>(Imm)
2473 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2474 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2475 DAG.getConstant(Imm, DL, GRLenVT));
2476 }
2477 case Intrinsic::loongarch_break: {
2478 unsigned Imm = Op2->getAsZExtVal();
2479 return !isUInt<15>(Imm)
2480 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2481 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2482 DAG.getConstant(Imm, DL, GRLenVT));
2483 }
2484 case Intrinsic::loongarch_movgr2fcsr: {
2485 if (!Subtarget.hasBasicF())
2486 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2487 unsigned Imm = Op2->getAsZExtVal();
2488 return !isUInt<2>(Imm)
2489 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2490 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2491 DAG.getConstant(Imm, DL, GRLenVT),
2492 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2493 Op.getOperand(3)));
2494 }
2495 case Intrinsic::loongarch_syscall: {
2496 unsigned Imm = Op2->getAsZExtVal();
2497 return !isUInt<15>(Imm)
2498 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2499 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2500 DAG.getConstant(Imm, DL, GRLenVT));
2501 }
2502#define IOCSRWR_CASE(NAME, NODE) \
2503 case Intrinsic::loongarch_##NAME: { \
2504 SDValue Op3 = Op.getOperand(3); \
2505 return Subtarget.is64Bit() \
2506 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2507 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2508 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2509 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2510 Op3); \
2511 }
2512 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2513 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2514 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2515#undef IOCSRWR_CASE
2516 case Intrinsic::loongarch_iocsrwr_d: {
2517 return !Subtarget.is64Bit()
2518 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2519 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2520 Op2,
2521 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2522 Op.getOperand(3)));
2523 }
2524#define ASRT_LE_GT_CASE(NAME) \
2525 case Intrinsic::loongarch_##NAME: { \
2526 return !Subtarget.is64Bit() \
2527 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2528 : Op; \
2529 }
2530 ASRT_LE_GT_CASE(asrtle_d)
2531 ASRT_LE_GT_CASE(asrtgt_d)
2532#undef ASRT_LE_GT_CASE
2533 case Intrinsic::loongarch_ldpte_d: {
2534 unsigned Imm = Op.getConstantOperandVal(3);
2535 return !Subtarget.is64Bit()
2536 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2537 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2538 : Op;
2539 }
2540 case Intrinsic::loongarch_lsx_vst:
2541 case Intrinsic::loongarch_lasx_xvst:
2542 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2543 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2544 : SDValue();
2545 case Intrinsic::loongarch_lasx_xvstelm_b:
2546 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2547 !isUInt<5>(Op.getConstantOperandVal(5)))
2548 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2549 : SDValue();
2550 case Intrinsic::loongarch_lsx_vstelm_b:
2551 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2552 !isUInt<4>(Op.getConstantOperandVal(5)))
2553 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2554 : SDValue();
2555 case Intrinsic::loongarch_lasx_xvstelm_h:
2556 return (!isShiftedInt<8, 1>(
2557 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2558 !isUInt<4>(Op.getConstantOperandVal(5)))
2560 Op, "argument out of range or not a multiple of 2", DAG)
2561 : SDValue();
2562 case Intrinsic::loongarch_lsx_vstelm_h:
2563 return (!isShiftedInt<8, 1>(
2564 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2565 !isUInt<3>(Op.getConstantOperandVal(5)))
2567 Op, "argument out of range or not a multiple of 2", DAG)
2568 : SDValue();
2569 case Intrinsic::loongarch_lasx_xvstelm_w:
2570 return (!isShiftedInt<8, 2>(
2571 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2572 !isUInt<3>(Op.getConstantOperandVal(5)))
2574 Op, "argument out of range or not a multiple of 4", DAG)
2575 : SDValue();
2576 case Intrinsic::loongarch_lsx_vstelm_w:
2577 return (!isShiftedInt<8, 2>(
2578 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2579 !isUInt<2>(Op.getConstantOperandVal(5)))
2581 Op, "argument out of range or not a multiple of 4", DAG)
2582 : SDValue();
2583 case Intrinsic::loongarch_lasx_xvstelm_d:
2584 return (!isShiftedInt<8, 3>(
2585 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2586 !isUInt<2>(Op.getConstantOperandVal(5)))
2588 Op, "argument out of range or not a multiple of 8", DAG)
2589 : SDValue();
2590 case Intrinsic::loongarch_lsx_vstelm_d:
2591 return (!isShiftedInt<8, 3>(
2592 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2593 !isUInt<1>(Op.getConstantOperandVal(5)))
2595 Op, "argument out of range or not a multiple of 8", DAG)
2596 : SDValue();
2597 }
2598}
2599
2600SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2601 SelectionDAG &DAG) const {
2602 SDLoc DL(Op);
2603 SDValue Lo = Op.getOperand(0);
2604 SDValue Hi = Op.getOperand(1);
2605 SDValue Shamt = Op.getOperand(2);
2606 EVT VT = Lo.getValueType();
2607
2608 // if Shamt-GRLen < 0: // Shamt < GRLen
2609 // Lo = Lo << Shamt
2610 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2611 // else:
2612 // Lo = 0
2613 // Hi = Lo << (Shamt-GRLen)
2614
2615 SDValue Zero = DAG.getConstant(0, DL, VT);
2616 SDValue One = DAG.getConstant(1, DL, VT);
2617 SDValue MinusGRLen =
2618 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2619 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2620 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2621 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2622
2623 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2624 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2625 SDValue ShiftRightLo =
2626 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2627 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2628 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2629 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2630
2631 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2632
2633 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2634 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2635
2636 SDValue Parts[2] = {Lo, Hi};
2637 return DAG.getMergeValues(Parts, DL);
2638}
2639
2640SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2641 SelectionDAG &DAG,
2642 bool IsSRA) const {
2643 SDLoc DL(Op);
2644 SDValue Lo = Op.getOperand(0);
2645 SDValue Hi = Op.getOperand(1);
2646 SDValue Shamt = Op.getOperand(2);
2647 EVT VT = Lo.getValueType();
2648
2649 // SRA expansion:
2650 // if Shamt-GRLen < 0: // Shamt < GRLen
2651 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2652 // Hi = Hi >>s Shamt
2653 // else:
2654 // Lo = Hi >>s (Shamt-GRLen);
2655 // Hi = Hi >>s (GRLen-1)
2656 //
2657 // SRL expansion:
2658 // if Shamt-GRLen < 0: // Shamt < GRLen
2659 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2660 // Hi = Hi >>u Shamt
2661 // else:
2662 // Lo = Hi >>u (Shamt-GRLen);
2663 // Hi = 0;
2664
2665 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2666
2667 SDValue Zero = DAG.getConstant(0, DL, VT);
2668 SDValue One = DAG.getConstant(1, DL, VT);
2669 SDValue MinusGRLen =
2670 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2671 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2672 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2673 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2674
2675 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2676 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2677 SDValue ShiftLeftHi =
2678 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2679 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2680 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2681 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2682 SDValue HiFalse =
2683 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2684
2685 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2686
2687 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2688 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2689
2690 SDValue Parts[2] = {Lo, Hi};
2691 return DAG.getMergeValues(Parts, DL);
2692}
2693
2694// Returns the opcode of the target-specific SDNode that implements the 32-bit
2695// form of the given Opcode.
2697 switch (Opcode) {
2698 default:
2699 llvm_unreachable("Unexpected opcode");
2700 case ISD::SDIV:
2701 return LoongArchISD::DIV_W;
2702 case ISD::UDIV:
2703 return LoongArchISD::DIV_WU;
2704 case ISD::SREM:
2705 return LoongArchISD::MOD_W;
2706 case ISD::UREM:
2707 return LoongArchISD::MOD_WU;
2708 case ISD::SHL:
2709 return LoongArchISD::SLL_W;
2710 case ISD::SRA:
2711 return LoongArchISD::SRA_W;
2712 case ISD::SRL:
2713 return LoongArchISD::SRL_W;
2714 case ISD::ROTL:
2715 case ISD::ROTR:
2716 return LoongArchISD::ROTR_W;
2717 case ISD::CTTZ:
2718 return LoongArchISD::CTZ_W;
2719 case ISD::CTLZ:
2720 return LoongArchISD::CLZ_W;
2721 }
2722}
2723
2724// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2725// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2726// otherwise be promoted to i64, making it difficult to select the
2727// SLL_W/.../*W later one because the fact the operation was originally of
2728// type i8/i16/i32 is lost.
2730 unsigned ExtOpc = ISD::ANY_EXTEND) {
2731 SDLoc DL(N);
2732 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2733 SDValue NewOp0, NewRes;
2734
2735 switch (NumOp) {
2736 default:
2737 llvm_unreachable("Unexpected NumOp");
2738 case 1: {
2739 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2740 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2741 break;
2742 }
2743 case 2: {
2744 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2745 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2746 if (N->getOpcode() == ISD::ROTL) {
2747 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2748 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2749 }
2750 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2751 break;
2752 }
2753 // TODO:Handle more NumOp.
2754 }
2755
2756 // ReplaceNodeResults requires we maintain the same type for the return
2757 // value.
2758 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2759}
2760
2761// Converts the given 32-bit operation to a i64 operation with signed extension
2762// semantic to reduce the signed extension instructions.
2764 SDLoc DL(N);
2765 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2766 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2767 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2768 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2769 DAG.getValueType(MVT::i32));
2770 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2771}
2772
2773// Helper function that emits error message for intrinsics with/without chain
2774// and return a UNDEF or and the chain as the results.
2777 StringRef ErrorMsg, bool WithChain = true) {
2778 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2779 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2780 if (!WithChain)
2781 return;
2782 Results.push_back(N->getOperand(0));
2783}
2784
2785template <unsigned N>
2786static void
2788 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2789 unsigned ResOp) {
2790 const StringRef ErrorMsgOOR = "argument out of range";
2791 unsigned Imm = Node->getConstantOperandVal(2);
2792 if (!isUInt<N>(Imm)) {
2794 /*WithChain=*/false);
2795 return;
2796 }
2797 SDLoc DL(Node);
2798 SDValue Vec = Node->getOperand(1);
2799
2800 SDValue PickElt =
2801 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2802 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2804 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2805 PickElt.getValue(0)));
2806}
2807
2810 SelectionDAG &DAG,
2811 const LoongArchSubtarget &Subtarget,
2812 unsigned ResOp) {
2813 SDLoc DL(N);
2814 SDValue Vec = N->getOperand(1);
2815
2816 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2817 Results.push_back(
2818 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2819}
2820
2821static void
2823 SelectionDAG &DAG,
2824 const LoongArchSubtarget &Subtarget) {
2825 switch (N->getConstantOperandVal(0)) {
2826 default:
2827 llvm_unreachable("Unexpected Intrinsic.");
2828 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2829 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2831 break;
2832 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2833 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2834 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2836 break;
2837 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2838 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2840 break;
2841 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2842 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2844 break;
2845 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2846 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2847 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2849 break;
2850 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2851 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2853 break;
2854 case Intrinsic::loongarch_lsx_bz_b:
2855 case Intrinsic::loongarch_lsx_bz_h:
2856 case Intrinsic::loongarch_lsx_bz_w:
2857 case Intrinsic::loongarch_lsx_bz_d:
2858 case Intrinsic::loongarch_lasx_xbz_b:
2859 case Intrinsic::loongarch_lasx_xbz_h:
2860 case Intrinsic::loongarch_lasx_xbz_w:
2861 case Intrinsic::loongarch_lasx_xbz_d:
2862 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2864 break;
2865 case Intrinsic::loongarch_lsx_bz_v:
2866 case Intrinsic::loongarch_lasx_xbz_v:
2867 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2869 break;
2870 case Intrinsic::loongarch_lsx_bnz_b:
2871 case Intrinsic::loongarch_lsx_bnz_h:
2872 case Intrinsic::loongarch_lsx_bnz_w:
2873 case Intrinsic::loongarch_lsx_bnz_d:
2874 case Intrinsic::loongarch_lasx_xbnz_b:
2875 case Intrinsic::loongarch_lasx_xbnz_h:
2876 case Intrinsic::loongarch_lasx_xbnz_w:
2877 case Intrinsic::loongarch_lasx_xbnz_d:
2878 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2880 break;
2881 case Intrinsic::loongarch_lsx_bnz_v:
2882 case Intrinsic::loongarch_lasx_xbnz_v:
2883 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2885 break;
2886 }
2887}
2888
2891 SDLoc DL(N);
2892 EVT VT = N->getValueType(0);
2893 switch (N->getOpcode()) {
2894 default:
2895 llvm_unreachable("Don't know how to legalize this operation");
2896 case ISD::ADD:
2897 case ISD::SUB:
2898 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2899 "Unexpected custom legalisation");
2900 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2901 break;
2902 case ISD::SDIV:
2903 case ISD::UDIV:
2904 case ISD::SREM:
2905 case ISD::UREM:
2906 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2907 "Unexpected custom legalisation");
2908 Results.push_back(customLegalizeToWOp(N, DAG, 2,
2909 Subtarget.hasDiv32() && VT == MVT::i32
2911 : ISD::SIGN_EXTEND));
2912 break;
2913 case ISD::SHL:
2914 case ISD::SRA:
2915 case ISD::SRL:
2916 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2917 "Unexpected custom legalisation");
2918 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2919 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2920 break;
2921 }
2922 break;
2923 case ISD::ROTL:
2924 case ISD::ROTR:
2925 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2926 "Unexpected custom legalisation");
2927 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2928 break;
2929 case ISD::FP_TO_SINT: {
2930 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2931 "Unexpected custom legalisation");
2932 SDValue Src = N->getOperand(0);
2933 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2934 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2936 if (Src.getValueType() == MVT::f16)
2937 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2938 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2939 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2940 return;
2941 }
2942 // If the FP type needs to be softened, emit a library call using the 'si'
2943 // version. If we left it to default legalization we'd end up with 'di'.
2944 RTLIB::Libcall LC;
2945 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2946 MakeLibCallOptions CallOptions;
2947 EVT OpVT = Src.getValueType();
2948 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2949 SDValue Chain = SDValue();
2950 SDValue Result;
2951 std::tie(Result, Chain) =
2952 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2953 Results.push_back(Result);
2954 break;
2955 }
2956 case ISD::BITCAST: {
2957 SDValue Src = N->getOperand(0);
2958 EVT SrcVT = Src.getValueType();
2959 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2960 Subtarget.hasBasicF()) {
2961 SDValue Dst =
2962 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2963 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2964 }
2965 break;
2966 }
2967 case ISD::FP_TO_UINT: {
2968 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2969 "Unexpected custom legalisation");
2970 auto &TLI = DAG.getTargetLoweringInfo();
2971 SDValue Tmp1, Tmp2;
2972 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2973 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2974 break;
2975 }
2976 case ISD::BSWAP: {
2977 SDValue Src = N->getOperand(0);
2978 assert((VT == MVT::i16 || VT == MVT::i32) &&
2979 "Unexpected custom legalization");
2980 MVT GRLenVT = Subtarget.getGRLenVT();
2981 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2982 SDValue Tmp;
2983 switch (VT.getSizeInBits()) {
2984 default:
2985 llvm_unreachable("Unexpected operand width");
2986 case 16:
2987 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2988 break;
2989 case 32:
2990 // Only LA64 will get to here due to the size mismatch between VT and
2991 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2992 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2993 break;
2994 }
2995 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2996 break;
2997 }
2998 case ISD::BITREVERSE: {
2999 SDValue Src = N->getOperand(0);
3000 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
3001 "Unexpected custom legalization");
3002 MVT GRLenVT = Subtarget.getGRLenVT();
3003 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
3004 SDValue Tmp;
3005 switch (VT.getSizeInBits()) {
3006 default:
3007 llvm_unreachable("Unexpected operand width");
3008 case 8:
3009 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
3010 break;
3011 case 32:
3012 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
3013 break;
3014 }
3015 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3016 break;
3017 }
3018 case ISD::CTLZ:
3019 case ISD::CTTZ: {
3020 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3021 "Unexpected custom legalisation");
3022 Results.push_back(customLegalizeToWOp(N, DAG, 1));
3023 break;
3024 }
3026 SDValue Chain = N->getOperand(0);
3027 SDValue Op2 = N->getOperand(2);
3028 MVT GRLenVT = Subtarget.getGRLenVT();
3029 const StringRef ErrorMsgOOR = "argument out of range";
3030 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3031 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3032
3033 switch (N->getConstantOperandVal(1)) {
3034 default:
3035 llvm_unreachable("Unexpected Intrinsic.");
3036 case Intrinsic::loongarch_movfcsr2gr: {
3037 if (!Subtarget.hasBasicF()) {
3038 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
3039 return;
3040 }
3041 unsigned Imm = Op2->getAsZExtVal();
3042 if (!isUInt<2>(Imm)) {
3043 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3044 return;
3045 }
3046 SDValue MOVFCSR2GRResults = DAG.getNode(
3047 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
3048 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3049 Results.push_back(
3050 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
3051 Results.push_back(MOVFCSR2GRResults.getValue(1));
3052 break;
3053 }
3054#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
3055 case Intrinsic::loongarch_##NAME: { \
3056 SDValue NODE = DAG.getNode( \
3057 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3058 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3059 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3060 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3061 Results.push_back(NODE.getValue(1)); \
3062 break; \
3063 }
3064 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
3065 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
3066 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
3067 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
3068 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
3069 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
3070#undef CRC_CASE_EXT_BINARYOP
3071
3072#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
3073 case Intrinsic::loongarch_##NAME: { \
3074 SDValue NODE = DAG.getNode( \
3075 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3076 {Chain, Op2, \
3077 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3078 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3079 Results.push_back(NODE.getValue(1)); \
3080 break; \
3081 }
3082 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
3083 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
3084#undef CRC_CASE_EXT_UNARYOP
3085#define CSR_CASE(ID) \
3086 case Intrinsic::loongarch_##ID: { \
3087 if (!Subtarget.is64Bit()) \
3088 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3089 break; \
3090 }
3091 CSR_CASE(csrrd_d);
3092 CSR_CASE(csrwr_d);
3093 CSR_CASE(csrxchg_d);
3094 CSR_CASE(iocsrrd_d);
3095#undef CSR_CASE
3096 case Intrinsic::loongarch_csrrd_w: {
3097 unsigned Imm = Op2->getAsZExtVal();
3098 if (!isUInt<14>(Imm)) {
3099 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3100 return;
3101 }
3102 SDValue CSRRDResults =
3103 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3104 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3105 Results.push_back(
3106 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3107 Results.push_back(CSRRDResults.getValue(1));
3108 break;
3109 }
3110 case Intrinsic::loongarch_csrwr_w: {
3111 unsigned Imm = N->getConstantOperandVal(3);
3112 if (!isUInt<14>(Imm)) {
3113 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3114 return;
3115 }
3116 SDValue CSRWRResults =
3117 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3118 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3119 DAG.getConstant(Imm, DL, GRLenVT)});
3120 Results.push_back(
3121 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3122 Results.push_back(CSRWRResults.getValue(1));
3123 break;
3124 }
3125 case Intrinsic::loongarch_csrxchg_w: {
3126 unsigned Imm = N->getConstantOperandVal(4);
3127 if (!isUInt<14>(Imm)) {
3128 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3129 return;
3130 }
3131 SDValue CSRXCHGResults = DAG.getNode(
3132 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3133 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3134 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3135 DAG.getConstant(Imm, DL, GRLenVT)});
3136 Results.push_back(
3137 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3138 Results.push_back(CSRXCHGResults.getValue(1));
3139 break;
3140 }
3141#define IOCSRRD_CASE(NAME, NODE) \
3142 case Intrinsic::loongarch_##NAME: { \
3143 SDValue IOCSRRDResults = \
3144 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3145 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3146 Results.push_back( \
3147 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3148 Results.push_back(IOCSRRDResults.getValue(1)); \
3149 break; \
3150 }
3151 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3152 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3153 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3154#undef IOCSRRD_CASE
3155 case Intrinsic::loongarch_cpucfg: {
3156 SDValue CPUCFGResults =
3157 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3158 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3159 Results.push_back(
3160 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3161 Results.push_back(CPUCFGResults.getValue(1));
3162 break;
3163 }
3164 case Intrinsic::loongarch_lddir_d: {
3165 if (!Subtarget.is64Bit()) {
3166 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3167 return;
3168 }
3169 break;
3170 }
3171 }
3172 break;
3173 }
3174 case ISD::READ_REGISTER: {
3175 if (Subtarget.is64Bit())
3176 DAG.getContext()->emitError(
3177 "On LA64, only 64-bit registers can be read.");
3178 else
3179 DAG.getContext()->emitError(
3180 "On LA32, only 32-bit registers can be read.");
3181 Results.push_back(DAG.getUNDEF(VT));
3182 Results.push_back(N->getOperand(0));
3183 break;
3184 }
3186 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3187 break;
3188 }
3189 case ISD::LROUND: {
3190 SDValue Op0 = N->getOperand(0);
3191 EVT OpVT = Op0.getValueType();
3192 RTLIB::Libcall LC =
3193 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
3194 MakeLibCallOptions CallOptions;
3195 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
3196 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
3197 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
3198 Results.push_back(Result);
3199 break;
3200 }
3201 }
3202}
3203
3206 const LoongArchSubtarget &Subtarget) {
3207 if (DCI.isBeforeLegalizeOps())
3208 return SDValue();
3209
3210 SDValue FirstOperand = N->getOperand(0);
3211 SDValue SecondOperand = N->getOperand(1);
3212 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3213 EVT ValTy = N->getValueType(0);
3214 SDLoc DL(N);
3215 uint64_t lsb, msb;
3216 unsigned SMIdx, SMLen;
3217 ConstantSDNode *CN;
3218 SDValue NewOperand;
3219 MVT GRLenVT = Subtarget.getGRLenVT();
3220
3221 // Op's second operand must be a shifted mask.
3222 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3223 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3224 return SDValue();
3225
3226 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3227 // Pattern match BSTRPICK.
3228 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3229 // => BSTRPICK $dst, $src, msb, lsb
3230 // where msb = lsb + len - 1
3231
3232 // The second operand of the shift must be an immediate.
3233 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3234 return SDValue();
3235
3236 lsb = CN->getZExtValue();
3237
3238 // Return if the shifted mask does not start at bit 0 or the sum of its
3239 // length and lsb exceeds the word's size.
3240 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3241 return SDValue();
3242
3243 NewOperand = FirstOperand.getOperand(0);
3244 } else {
3245 // Pattern match BSTRPICK.
3246 // $dst = and $src, (2**len- 1) , if len > 12
3247 // => BSTRPICK $dst, $src, msb, lsb
3248 // where lsb = 0 and msb = len - 1
3249
3250 // If the mask is <= 0xfff, andi can be used instead.
3251 if (CN->getZExtValue() <= 0xfff)
3252 return SDValue();
3253
3254 // Return if the MSB exceeds.
3255 if (SMIdx + SMLen > ValTy.getSizeInBits())
3256 return SDValue();
3257
3258 if (SMIdx > 0) {
3259 // Omit if the constant has more than 2 uses. This a conservative
3260 // decision. Whether it is a win depends on the HW microarchitecture.
3261 // However it should always be better for 1 and 2 uses.
3262 if (CN->use_size() > 2)
3263 return SDValue();
3264 // Return if the constant can be composed by a single LU12I.W.
3265 if ((CN->getZExtValue() & 0xfff) == 0)
3266 return SDValue();
3267 // Return if the constand can be composed by a single ADDI with
3268 // the zero register.
3269 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3270 return SDValue();
3271 }
3272
3273 lsb = SMIdx;
3274 NewOperand = FirstOperand;
3275 }
3276
3277 msb = lsb + SMLen - 1;
3278 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3279 DAG.getConstant(msb, DL, GRLenVT),
3280 DAG.getConstant(lsb, DL, GRLenVT));
3281 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3282 return NR0;
3283 // Try to optimize to
3284 // bstrpick $Rd, $Rs, msb, lsb
3285 // slli $Rd, $Rd, lsb
3286 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3287 DAG.getConstant(lsb, DL, GRLenVT));
3288}
3289
3292 const LoongArchSubtarget &Subtarget) {
3293 if (DCI.isBeforeLegalizeOps())
3294 return SDValue();
3295
3296 // $dst = srl (and $src, Mask), Shamt
3297 // =>
3298 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3299 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3300 //
3301
3302 SDValue FirstOperand = N->getOperand(0);
3303 ConstantSDNode *CN;
3304 EVT ValTy = N->getValueType(0);
3305 SDLoc DL(N);
3306 MVT GRLenVT = Subtarget.getGRLenVT();
3307 unsigned MaskIdx, MaskLen;
3308 uint64_t Shamt;
3309
3310 // The first operand must be an AND and the second operand of the AND must be
3311 // a shifted mask.
3312 if (FirstOperand.getOpcode() != ISD::AND ||
3313 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3314 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3315 return SDValue();
3316
3317 // The second operand (shift amount) must be an immediate.
3318 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3319 return SDValue();
3320
3321 Shamt = CN->getZExtValue();
3322 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3323 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3324 FirstOperand->getOperand(0),
3325 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3326 DAG.getConstant(Shamt, DL, GRLenVT));
3327
3328 return SDValue();
3329}
3330
3333 const LoongArchSubtarget &Subtarget) {
3334 MVT GRLenVT = Subtarget.getGRLenVT();
3335 EVT ValTy = N->getValueType(0);
3336 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3337 ConstantSDNode *CN0, *CN1;
3338 SDLoc DL(N);
3339 unsigned ValBits = ValTy.getSizeInBits();
3340 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3341 unsigned Shamt;
3342 bool SwapAndRetried = false;
3343
3344 if (DCI.isBeforeLegalizeOps())
3345 return SDValue();
3346
3347 if (ValBits != 32 && ValBits != 64)
3348 return SDValue();
3349
3350Retry:
3351 // 1st pattern to match BSTRINS:
3352 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3353 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3354 // =>
3355 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3356 if (N0.getOpcode() == ISD::AND &&
3357 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3358 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3359 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3360 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3361 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3362 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3363 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3364 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3365 (MaskIdx0 + MaskLen0 <= ValBits)) {
3366 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3367 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3368 N1.getOperand(0).getOperand(0),
3369 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3370 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3371 }
3372
3373 // 2nd pattern to match BSTRINS:
3374 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3375 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3376 // =>
3377 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3378 if (N0.getOpcode() == ISD::AND &&
3379 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3380 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3381 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3382 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3383 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3384 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3385 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3386 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3387 (MaskIdx0 + MaskLen0 <= ValBits)) {
3388 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3389 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3390 N1.getOperand(0).getOperand(0),
3391 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3392 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3393 }
3394
3395 // 3rd pattern to match BSTRINS:
3396 // R = or (and X, mask0), (and Y, mask1)
3397 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3398 // =>
3399 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3400 // where msb = lsb + size - 1
3401 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3402 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3403 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3404 (MaskIdx0 + MaskLen0 <= 64) &&
3405 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3406 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3407 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3408 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3409 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3410 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3411 DAG.getConstant(ValBits == 32
3412 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3413 : (MaskIdx0 + MaskLen0 - 1),
3414 DL, GRLenVT),
3415 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3416 }
3417
3418 // 4th pattern to match BSTRINS:
3419 // R = or (and X, mask), (shl Y, shamt)
3420 // where mask = (2**shamt - 1)
3421 // =>
3422 // R = BSTRINS X, Y, ValBits - 1, shamt
3423 // where ValBits = 32 or 64
3424 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3425 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3426 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3427 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3428 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3429 (MaskIdx0 + MaskLen0 <= ValBits)) {
3430 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3431 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3432 N1.getOperand(0),
3433 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3434 DAG.getConstant(Shamt, DL, GRLenVT));
3435 }
3436
3437 // 5th pattern to match BSTRINS:
3438 // R = or (and X, mask), const
3439 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3440 // =>
3441 // R = BSTRINS X, (const >> lsb), msb, lsb
3442 // where msb = lsb + size - 1
3443 if (N0.getOpcode() == ISD::AND &&
3444 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3445 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3446 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3447 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3448 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3449 return DAG.getNode(
3450 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3451 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3452 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3453 : (MaskIdx0 + MaskLen0 - 1),
3454 DL, GRLenVT),
3455 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3456 }
3457
3458 // 6th pattern.
3459 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3460 // by the incoming bits are known to be zero.
3461 // =>
3462 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3463 //
3464 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3465 // pattern is more common than the 1st. So we put the 1st before the 6th in
3466 // order to match as many nodes as possible.
3467 ConstantSDNode *CNMask, *CNShamt;
3468 unsigned MaskIdx, MaskLen;
3469 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3470 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3471 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3472 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3473 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3474 Shamt = CNShamt->getZExtValue();
3475 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3476 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3477 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3478 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3479 N1.getOperand(0).getOperand(0),
3480 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3481 DAG.getConstant(Shamt, DL, GRLenVT));
3482 }
3483 }
3484
3485 // 7th pattern.
3486 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3487 // overwritten by the incoming bits are known to be zero.
3488 // =>
3489 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3490 //
3491 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3492 // before the 7th in order to match as many nodes as possible.
3493 if (N1.getOpcode() == ISD::AND &&
3494 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3495 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3496 N1.getOperand(0).getOpcode() == ISD::SHL &&
3497 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3498 CNShamt->getZExtValue() == MaskIdx) {
3499 APInt ShMask(ValBits, CNMask->getZExtValue());
3500 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3501 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3502 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3503 N1.getOperand(0).getOperand(0),
3504 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3505 DAG.getConstant(MaskIdx, DL, GRLenVT));
3506 }
3507 }
3508
3509 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3510 if (!SwapAndRetried) {
3511 std::swap(N0, N1);
3512 SwapAndRetried = true;
3513 goto Retry;
3514 }
3515
3516 SwapAndRetried = false;
3517Retry2:
3518 // 8th pattern.
3519 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3520 // the incoming bits are known to be zero.
3521 // =>
3522 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3523 //
3524 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3525 // we put it here in order to match as many nodes as possible or generate less
3526 // instructions.
3527 if (N1.getOpcode() == ISD::AND &&
3528 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3529 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3530 APInt ShMask(ValBits, CNMask->getZExtValue());
3531 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3532 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3533 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3534 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3535 N1->getOperand(0),
3536 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3537 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3538 DAG.getConstant(MaskIdx, DL, GRLenVT));
3539 }
3540 }
3541 // Swap N0/N1 and retry.
3542 if (!SwapAndRetried) {
3543 std::swap(N0, N1);
3544 SwapAndRetried = true;
3545 goto Retry2;
3546 }
3547
3548 return SDValue();
3549}
3550
3551static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3552 ExtType = ISD::NON_EXTLOAD;
3553
3554 switch (V.getNode()->getOpcode()) {
3555 case ISD::LOAD: {
3556 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3557 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3558 (LoadNode->getMemoryVT() == MVT::i16)) {
3559 ExtType = LoadNode->getExtensionType();
3560 return true;
3561 }
3562 return false;
3563 }
3564 case ISD::AssertSext: {
3565 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3566 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3567 ExtType = ISD::SEXTLOAD;
3568 return true;
3569 }
3570 return false;
3571 }
3572 case ISD::AssertZext: {
3573 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3574 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3575 ExtType = ISD::ZEXTLOAD;
3576 return true;
3577 }
3578 return false;
3579 }
3580 default:
3581 return false;
3582 }
3583
3584 return false;
3585}
3586
3587// Eliminate redundant truncation and zero-extension nodes.
3588// * Case 1:
3589// +------------+ +------------+ +------------+
3590// | Input1 | | Input2 | | CC |
3591// +------------+ +------------+ +------------+
3592// | | |
3593// V V +----+
3594// +------------+ +------------+ |
3595// | TRUNCATE | | TRUNCATE | |
3596// +------------+ +------------+ |
3597// | | |
3598// V V |
3599// +------------+ +------------+ |
3600// | ZERO_EXT | | ZERO_EXT | |
3601// +------------+ +------------+ |
3602// | | |
3603// | +-------------+ |
3604// V V | |
3605// +----------------+ | |
3606// | AND | | |
3607// +----------------+ | |
3608// | | |
3609// +---------------+ | |
3610// | | |
3611// V V V
3612// +-------------+
3613// | CMP |
3614// +-------------+
3615// * Case 2:
3616// +------------+ +------------+ +-------------+ +------------+ +------------+
3617// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3618// +------------+ +------------+ +-------------+ +------------+ +------------+
3619// | | | | |
3620// V | | | |
3621// +------------+ | | | |
3622// | XOR |<---------------------+ | |
3623// +------------+ | | |
3624// | | | |
3625// V V +---------------+ |
3626// +------------+ +------------+ | |
3627// | TRUNCATE | | TRUNCATE | | +-------------------------+
3628// +------------+ +------------+ | |
3629// | | | |
3630// V V | |
3631// +------------+ +------------+ | |
3632// | ZERO_EXT | | ZERO_EXT | | |
3633// +------------+ +------------+ | |
3634// | | | |
3635// V V | |
3636// +----------------+ | |
3637// | AND | | |
3638// +----------------+ | |
3639// | | |
3640// +---------------+ | |
3641// | | |
3642// V V V
3643// +-------------+
3644// | CMP |
3645// +-------------+
3648 const LoongArchSubtarget &Subtarget) {
3649 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3650
3651 SDNode *AndNode = N->getOperand(0).getNode();
3652 if (AndNode->getOpcode() != ISD::AND)
3653 return SDValue();
3654
3655 SDValue AndInputValue2 = AndNode->getOperand(1);
3656 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3657 return SDValue();
3658
3659 SDValue CmpInputValue = N->getOperand(1);
3660 SDValue AndInputValue1 = AndNode->getOperand(0);
3661 if (AndInputValue1.getOpcode() == ISD::XOR) {
3662 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3663 return SDValue();
3664 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3665 if (!CN || CN->getSExtValue() != -1)
3666 return SDValue();
3667 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3668 if (!CN || CN->getSExtValue() != 0)
3669 return SDValue();
3670 AndInputValue1 = AndInputValue1.getOperand(0);
3671 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3672 return SDValue();
3673 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3674 if (AndInputValue2 != CmpInputValue)
3675 return SDValue();
3676 } else {
3677 return SDValue();
3678 }
3679
3680 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3681 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3682 return SDValue();
3683
3684 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3685 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3686 return SDValue();
3687
3688 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3689 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3690 ISD::LoadExtType ExtType1;
3691 ISD::LoadExtType ExtType2;
3692
3693 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3694 !checkValueWidth(TruncInputValue2, ExtType2))
3695 return SDValue();
3696
3697 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3698 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3699 return SDValue();
3700
3701 if ((ExtType2 != ISD::ZEXTLOAD) &&
3702 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3703 return SDValue();
3704
3705 // These truncation and zero-extension nodes are not necessary, remove them.
3706 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3707 TruncInputValue1, TruncInputValue2);
3708 SDValue NewSetCC =
3709 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3710 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3711 return SDValue(N, 0);
3712}
3713
3714// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3717 const LoongArchSubtarget &Subtarget) {
3718 if (DCI.isBeforeLegalizeOps())
3719 return SDValue();
3720
3721 SDValue Src = N->getOperand(0);
3722 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3723 return SDValue();
3724
3725 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3726 Src.getOperand(0));
3727}
3728
3729template <unsigned N>
3731 SelectionDAG &DAG,
3732 const LoongArchSubtarget &Subtarget,
3733 bool IsSigned = false) {
3734 SDLoc DL(Node);
3735 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3736 // Check the ImmArg.
3737 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3738 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3739 DAG.getContext()->emitError(Node->getOperationName(0) +
3740 ": argument out of range.");
3741 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3742 }
3743 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3744}
3745
3746template <unsigned N>
3747static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3748 SelectionDAG &DAG, bool IsSigned = false) {
3749 SDLoc DL(Node);
3750 EVT ResTy = Node->getValueType(0);
3751 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3752
3753 // Check the ImmArg.
3754 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3755 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3756 DAG.getContext()->emitError(Node->getOperationName(0) +
3757 ": argument out of range.");
3758 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3759 }
3760 return DAG.getConstant(
3762 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3763 DL, ResTy);
3764}
3765
3767 SDLoc DL(Node);
3768 EVT ResTy = Node->getValueType(0);
3769 SDValue Vec = Node->getOperand(2);
3770 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3771 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3772}
3773
3775 SDLoc DL(Node);
3776 EVT ResTy = Node->getValueType(0);
3777 SDValue One = DAG.getConstant(1, DL, ResTy);
3778 SDValue Bit =
3779 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3780
3781 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3782 DAG.getNOT(DL, Bit, ResTy));
3783}
3784
3785template <unsigned N>
3787 SDLoc DL(Node);
3788 EVT ResTy = Node->getValueType(0);
3789 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3790 // Check the unsigned ImmArg.
3791 if (!isUInt<N>(CImm->getZExtValue())) {
3792 DAG.getContext()->emitError(Node->getOperationName(0) +
3793 ": argument out of range.");
3794 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3795 }
3796
3797 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3798 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3799
3800 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3801}
3802
3803template <unsigned N>
3805 SDLoc DL(Node);
3806 EVT ResTy = Node->getValueType(0);
3807 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3808 // Check the unsigned ImmArg.
3809 if (!isUInt<N>(CImm->getZExtValue())) {
3810 DAG.getContext()->emitError(Node->getOperationName(0) +
3811 ": argument out of range.");
3812 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3813 }
3814
3815 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3816 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3817 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3818}
3819
3820template <unsigned N>
3822 SDLoc DL(Node);
3823 EVT ResTy = Node->getValueType(0);
3824 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3825 // Check the unsigned ImmArg.
3826 if (!isUInt<N>(CImm->getZExtValue())) {
3827 DAG.getContext()->emitError(Node->getOperationName(0) +
3828 ": argument out of range.");
3829 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3830 }
3831
3832 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3833 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3834 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3835}
3836
3837static SDValue
3840 const LoongArchSubtarget &Subtarget) {
3841 SDLoc DL(N);
3842 switch (N->getConstantOperandVal(0)) {
3843 default:
3844 break;
3845 case Intrinsic::loongarch_lsx_vadd_b:
3846 case Intrinsic::loongarch_lsx_vadd_h:
3847 case Intrinsic::loongarch_lsx_vadd_w:
3848 case Intrinsic::loongarch_lsx_vadd_d:
3849 case Intrinsic::loongarch_lasx_xvadd_b:
3850 case Intrinsic::loongarch_lasx_xvadd_h:
3851 case Intrinsic::loongarch_lasx_xvadd_w:
3852 case Intrinsic::loongarch_lasx_xvadd_d:
3853 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3854 N->getOperand(2));
3855 case Intrinsic::loongarch_lsx_vaddi_bu:
3856 case Intrinsic::loongarch_lsx_vaddi_hu:
3857 case Intrinsic::loongarch_lsx_vaddi_wu:
3858 case Intrinsic::loongarch_lsx_vaddi_du:
3859 case Intrinsic::loongarch_lasx_xvaddi_bu:
3860 case Intrinsic::loongarch_lasx_xvaddi_hu:
3861 case Intrinsic::loongarch_lasx_xvaddi_wu:
3862 case Intrinsic::loongarch_lasx_xvaddi_du:
3863 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3864 lowerVectorSplatImm<5>(N, 2, DAG));
3865 case Intrinsic::loongarch_lsx_vsub_b:
3866 case Intrinsic::loongarch_lsx_vsub_h:
3867 case Intrinsic::loongarch_lsx_vsub_w:
3868 case Intrinsic::loongarch_lsx_vsub_d:
3869 case Intrinsic::loongarch_lasx_xvsub_b:
3870 case Intrinsic::loongarch_lasx_xvsub_h:
3871 case Intrinsic::loongarch_lasx_xvsub_w:
3872 case Intrinsic::loongarch_lasx_xvsub_d:
3873 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3874 N->getOperand(2));
3875 case Intrinsic::loongarch_lsx_vsubi_bu:
3876 case Intrinsic::loongarch_lsx_vsubi_hu:
3877 case Intrinsic::loongarch_lsx_vsubi_wu:
3878 case Intrinsic::loongarch_lsx_vsubi_du:
3879 case Intrinsic::loongarch_lasx_xvsubi_bu:
3880 case Intrinsic::loongarch_lasx_xvsubi_hu:
3881 case Intrinsic::loongarch_lasx_xvsubi_wu:
3882 case Intrinsic::loongarch_lasx_xvsubi_du:
3883 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3884 lowerVectorSplatImm<5>(N, 2, DAG));
3885 case Intrinsic::loongarch_lsx_vneg_b:
3886 case Intrinsic::loongarch_lsx_vneg_h:
3887 case Intrinsic::loongarch_lsx_vneg_w:
3888 case Intrinsic::loongarch_lsx_vneg_d:
3889 case Intrinsic::loongarch_lasx_xvneg_b:
3890 case Intrinsic::loongarch_lasx_xvneg_h:
3891 case Intrinsic::loongarch_lasx_xvneg_w:
3892 case Intrinsic::loongarch_lasx_xvneg_d:
3893 return DAG.getNode(
3894 ISD::SUB, DL, N->getValueType(0),
3895 DAG.getConstant(
3896 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3897 /*isSigned=*/true),
3898 SDLoc(N), N->getValueType(0)),
3899 N->getOperand(1));
3900 case Intrinsic::loongarch_lsx_vmax_b:
3901 case Intrinsic::loongarch_lsx_vmax_h:
3902 case Intrinsic::loongarch_lsx_vmax_w:
3903 case Intrinsic::loongarch_lsx_vmax_d:
3904 case Intrinsic::loongarch_lasx_xvmax_b:
3905 case Intrinsic::loongarch_lasx_xvmax_h:
3906 case Intrinsic::loongarch_lasx_xvmax_w:
3907 case Intrinsic::loongarch_lasx_xvmax_d:
3908 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3909 N->getOperand(2));
3910 case Intrinsic::loongarch_lsx_vmax_bu:
3911 case Intrinsic::loongarch_lsx_vmax_hu:
3912 case Intrinsic::loongarch_lsx_vmax_wu:
3913 case Intrinsic::loongarch_lsx_vmax_du:
3914 case Intrinsic::loongarch_lasx_xvmax_bu:
3915 case Intrinsic::loongarch_lasx_xvmax_hu:
3916 case Intrinsic::loongarch_lasx_xvmax_wu:
3917 case Intrinsic::loongarch_lasx_xvmax_du:
3918 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3919 N->getOperand(2));
3920 case Intrinsic::loongarch_lsx_vmaxi_b:
3921 case Intrinsic::loongarch_lsx_vmaxi_h:
3922 case Intrinsic::loongarch_lsx_vmaxi_w:
3923 case Intrinsic::loongarch_lsx_vmaxi_d:
3924 case Intrinsic::loongarch_lasx_xvmaxi_b:
3925 case Intrinsic::loongarch_lasx_xvmaxi_h:
3926 case Intrinsic::loongarch_lasx_xvmaxi_w:
3927 case Intrinsic::loongarch_lasx_xvmaxi_d:
3928 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3929 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3930 case Intrinsic::loongarch_lsx_vmaxi_bu:
3931 case Intrinsic::loongarch_lsx_vmaxi_hu:
3932 case Intrinsic::loongarch_lsx_vmaxi_wu:
3933 case Intrinsic::loongarch_lsx_vmaxi_du:
3934 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3935 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3936 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3937 case Intrinsic::loongarch_lasx_xvmaxi_du:
3938 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3939 lowerVectorSplatImm<5>(N, 2, DAG));
3940 case Intrinsic::loongarch_lsx_vmin_b:
3941 case Intrinsic::loongarch_lsx_vmin_h:
3942 case Intrinsic::loongarch_lsx_vmin_w:
3943 case Intrinsic::loongarch_lsx_vmin_d:
3944 case Intrinsic::loongarch_lasx_xvmin_b:
3945 case Intrinsic::loongarch_lasx_xvmin_h:
3946 case Intrinsic::loongarch_lasx_xvmin_w:
3947 case Intrinsic::loongarch_lasx_xvmin_d:
3948 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3949 N->getOperand(2));
3950 case Intrinsic::loongarch_lsx_vmin_bu:
3951 case Intrinsic::loongarch_lsx_vmin_hu:
3952 case Intrinsic::loongarch_lsx_vmin_wu:
3953 case Intrinsic::loongarch_lsx_vmin_du:
3954 case Intrinsic::loongarch_lasx_xvmin_bu:
3955 case Intrinsic::loongarch_lasx_xvmin_hu:
3956 case Intrinsic::loongarch_lasx_xvmin_wu:
3957 case Intrinsic::loongarch_lasx_xvmin_du:
3958 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3959 N->getOperand(2));
3960 case Intrinsic::loongarch_lsx_vmini_b:
3961 case Intrinsic::loongarch_lsx_vmini_h:
3962 case Intrinsic::loongarch_lsx_vmini_w:
3963 case Intrinsic::loongarch_lsx_vmini_d:
3964 case Intrinsic::loongarch_lasx_xvmini_b:
3965 case Intrinsic::loongarch_lasx_xvmini_h:
3966 case Intrinsic::loongarch_lasx_xvmini_w:
3967 case Intrinsic::loongarch_lasx_xvmini_d:
3968 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3969 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3970 case Intrinsic::loongarch_lsx_vmini_bu:
3971 case Intrinsic::loongarch_lsx_vmini_hu:
3972 case Intrinsic::loongarch_lsx_vmini_wu:
3973 case Intrinsic::loongarch_lsx_vmini_du:
3974 case Intrinsic::loongarch_lasx_xvmini_bu:
3975 case Intrinsic::loongarch_lasx_xvmini_hu:
3976 case Intrinsic::loongarch_lasx_xvmini_wu:
3977 case Intrinsic::loongarch_lasx_xvmini_du:
3978 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3979 lowerVectorSplatImm<5>(N, 2, DAG));
3980 case Intrinsic::loongarch_lsx_vmul_b:
3981 case Intrinsic::loongarch_lsx_vmul_h:
3982 case Intrinsic::loongarch_lsx_vmul_w:
3983 case Intrinsic::loongarch_lsx_vmul_d:
3984 case Intrinsic::loongarch_lasx_xvmul_b:
3985 case Intrinsic::loongarch_lasx_xvmul_h:
3986 case Intrinsic::loongarch_lasx_xvmul_w:
3987 case Intrinsic::loongarch_lasx_xvmul_d:
3988 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3989 N->getOperand(2));
3990 case Intrinsic::loongarch_lsx_vmadd_b:
3991 case Intrinsic::loongarch_lsx_vmadd_h:
3992 case Intrinsic::loongarch_lsx_vmadd_w:
3993 case Intrinsic::loongarch_lsx_vmadd_d:
3994 case Intrinsic::loongarch_lasx_xvmadd_b:
3995 case Intrinsic::loongarch_lasx_xvmadd_h:
3996 case Intrinsic::loongarch_lasx_xvmadd_w:
3997 case Intrinsic::loongarch_lasx_xvmadd_d: {
3998 EVT ResTy = N->getValueType(0);
3999 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
4000 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4001 N->getOperand(3)));
4002 }
4003 case Intrinsic::loongarch_lsx_vmsub_b:
4004 case Intrinsic::loongarch_lsx_vmsub_h:
4005 case Intrinsic::loongarch_lsx_vmsub_w:
4006 case Intrinsic::loongarch_lsx_vmsub_d:
4007 case Intrinsic::loongarch_lasx_xvmsub_b:
4008 case Intrinsic::loongarch_lasx_xvmsub_h:
4009 case Intrinsic::loongarch_lasx_xvmsub_w:
4010 case Intrinsic::loongarch_lasx_xvmsub_d: {
4011 EVT ResTy = N->getValueType(0);
4012 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
4013 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4014 N->getOperand(3)));
4015 }
4016 case Intrinsic::loongarch_lsx_vdiv_b:
4017 case Intrinsic::loongarch_lsx_vdiv_h:
4018 case Intrinsic::loongarch_lsx_vdiv_w:
4019 case Intrinsic::loongarch_lsx_vdiv_d:
4020 case Intrinsic::loongarch_lasx_xvdiv_b:
4021 case Intrinsic::loongarch_lasx_xvdiv_h:
4022 case Intrinsic::loongarch_lasx_xvdiv_w:
4023 case Intrinsic::loongarch_lasx_xvdiv_d:
4024 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
4025 N->getOperand(2));
4026 case Intrinsic::loongarch_lsx_vdiv_bu:
4027 case Intrinsic::loongarch_lsx_vdiv_hu:
4028 case Intrinsic::loongarch_lsx_vdiv_wu:
4029 case Intrinsic::loongarch_lsx_vdiv_du:
4030 case Intrinsic::loongarch_lasx_xvdiv_bu:
4031 case Intrinsic::loongarch_lasx_xvdiv_hu:
4032 case Intrinsic::loongarch_lasx_xvdiv_wu:
4033 case Intrinsic::loongarch_lasx_xvdiv_du:
4034 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
4035 N->getOperand(2));
4036 case Intrinsic::loongarch_lsx_vmod_b:
4037 case Intrinsic::loongarch_lsx_vmod_h:
4038 case Intrinsic::loongarch_lsx_vmod_w:
4039 case Intrinsic::loongarch_lsx_vmod_d:
4040 case Intrinsic::loongarch_lasx_xvmod_b:
4041 case Intrinsic::loongarch_lasx_xvmod_h:
4042 case Intrinsic::loongarch_lasx_xvmod_w:
4043 case Intrinsic::loongarch_lasx_xvmod_d:
4044 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
4045 N->getOperand(2));
4046 case Intrinsic::loongarch_lsx_vmod_bu:
4047 case Intrinsic::loongarch_lsx_vmod_hu:
4048 case Intrinsic::loongarch_lsx_vmod_wu:
4049 case Intrinsic::loongarch_lsx_vmod_du:
4050 case Intrinsic::loongarch_lasx_xvmod_bu:
4051 case Intrinsic::loongarch_lasx_xvmod_hu:
4052 case Intrinsic::loongarch_lasx_xvmod_wu:
4053 case Intrinsic::loongarch_lasx_xvmod_du:
4054 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
4055 N->getOperand(2));
4056 case Intrinsic::loongarch_lsx_vand_v:
4057 case Intrinsic::loongarch_lasx_xvand_v:
4058 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4059 N->getOperand(2));
4060 case Intrinsic::loongarch_lsx_vor_v:
4061 case Intrinsic::loongarch_lasx_xvor_v:
4062 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4063 N->getOperand(2));
4064 case Intrinsic::loongarch_lsx_vxor_v:
4065 case Intrinsic::loongarch_lasx_xvxor_v:
4066 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4067 N->getOperand(2));
4068 case Intrinsic::loongarch_lsx_vnor_v:
4069 case Intrinsic::loongarch_lasx_xvnor_v: {
4070 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4071 N->getOperand(2));
4072 return DAG.getNOT(DL, Res, Res->getValueType(0));
4073 }
4074 case Intrinsic::loongarch_lsx_vandi_b:
4075 case Intrinsic::loongarch_lasx_xvandi_b:
4076 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4077 lowerVectorSplatImm<8>(N, 2, DAG));
4078 case Intrinsic::loongarch_lsx_vori_b:
4079 case Intrinsic::loongarch_lasx_xvori_b:
4080 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4081 lowerVectorSplatImm<8>(N, 2, DAG));
4082 case Intrinsic::loongarch_lsx_vxori_b:
4083 case Intrinsic::loongarch_lasx_xvxori_b:
4084 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4085 lowerVectorSplatImm<8>(N, 2, DAG));
4086 case Intrinsic::loongarch_lsx_vsll_b:
4087 case Intrinsic::loongarch_lsx_vsll_h:
4088 case Intrinsic::loongarch_lsx_vsll_w:
4089 case Intrinsic::loongarch_lsx_vsll_d:
4090 case Intrinsic::loongarch_lasx_xvsll_b:
4091 case Intrinsic::loongarch_lasx_xvsll_h:
4092 case Intrinsic::loongarch_lasx_xvsll_w:
4093 case Intrinsic::loongarch_lasx_xvsll_d:
4094 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4095 truncateVecElts(N, DAG));
4096 case Intrinsic::loongarch_lsx_vslli_b:
4097 case Intrinsic::loongarch_lasx_xvslli_b:
4098 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4099 lowerVectorSplatImm<3>(N, 2, DAG));
4100 case Intrinsic::loongarch_lsx_vslli_h:
4101 case Intrinsic::loongarch_lasx_xvslli_h:
4102 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4103 lowerVectorSplatImm<4>(N, 2, DAG));
4104 case Intrinsic::loongarch_lsx_vslli_w:
4105 case Intrinsic::loongarch_lasx_xvslli_w:
4106 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4107 lowerVectorSplatImm<5>(N, 2, DAG));
4108 case Intrinsic::loongarch_lsx_vslli_d:
4109 case Intrinsic::loongarch_lasx_xvslli_d:
4110 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4111 lowerVectorSplatImm<6>(N, 2, DAG));
4112 case Intrinsic::loongarch_lsx_vsrl_b:
4113 case Intrinsic::loongarch_lsx_vsrl_h:
4114 case Intrinsic::loongarch_lsx_vsrl_w:
4115 case Intrinsic::loongarch_lsx_vsrl_d:
4116 case Intrinsic::loongarch_lasx_xvsrl_b:
4117 case Intrinsic::loongarch_lasx_xvsrl_h:
4118 case Intrinsic::loongarch_lasx_xvsrl_w:
4119 case Intrinsic::loongarch_lasx_xvsrl_d:
4120 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4121 truncateVecElts(N, DAG));
4122 case Intrinsic::loongarch_lsx_vsrli_b:
4123 case Intrinsic::loongarch_lasx_xvsrli_b:
4124 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4125 lowerVectorSplatImm<3>(N, 2, DAG));
4126 case Intrinsic::loongarch_lsx_vsrli_h:
4127 case Intrinsic::loongarch_lasx_xvsrli_h:
4128 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4129 lowerVectorSplatImm<4>(N, 2, DAG));
4130 case Intrinsic::loongarch_lsx_vsrli_w:
4131 case Intrinsic::loongarch_lasx_xvsrli_w:
4132 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4133 lowerVectorSplatImm<5>(N, 2, DAG));
4134 case Intrinsic::loongarch_lsx_vsrli_d:
4135 case Intrinsic::loongarch_lasx_xvsrli_d:
4136 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4137 lowerVectorSplatImm<6>(N, 2, DAG));
4138 case Intrinsic::loongarch_lsx_vsra_b:
4139 case Intrinsic::loongarch_lsx_vsra_h:
4140 case Intrinsic::loongarch_lsx_vsra_w:
4141 case Intrinsic::loongarch_lsx_vsra_d:
4142 case Intrinsic::loongarch_lasx_xvsra_b:
4143 case Intrinsic::loongarch_lasx_xvsra_h:
4144 case Intrinsic::loongarch_lasx_xvsra_w:
4145 case Intrinsic::loongarch_lasx_xvsra_d:
4146 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4147 truncateVecElts(N, DAG));
4148 case Intrinsic::loongarch_lsx_vsrai_b:
4149 case Intrinsic::loongarch_lasx_xvsrai_b:
4150 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4151 lowerVectorSplatImm<3>(N, 2, DAG));
4152 case Intrinsic::loongarch_lsx_vsrai_h:
4153 case Intrinsic::loongarch_lasx_xvsrai_h:
4154 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4155 lowerVectorSplatImm<4>(N, 2, DAG));
4156 case Intrinsic::loongarch_lsx_vsrai_w:
4157 case Intrinsic::loongarch_lasx_xvsrai_w:
4158 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4159 lowerVectorSplatImm<5>(N, 2, DAG));
4160 case Intrinsic::loongarch_lsx_vsrai_d:
4161 case Intrinsic::loongarch_lasx_xvsrai_d:
4162 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4163 lowerVectorSplatImm<6>(N, 2, DAG));
4164 case Intrinsic::loongarch_lsx_vclz_b:
4165 case Intrinsic::loongarch_lsx_vclz_h:
4166 case Intrinsic::loongarch_lsx_vclz_w:
4167 case Intrinsic::loongarch_lsx_vclz_d:
4168 case Intrinsic::loongarch_lasx_xvclz_b:
4169 case Intrinsic::loongarch_lasx_xvclz_h:
4170 case Intrinsic::loongarch_lasx_xvclz_w:
4171 case Intrinsic::loongarch_lasx_xvclz_d:
4172 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4173 case Intrinsic::loongarch_lsx_vpcnt_b:
4174 case Intrinsic::loongarch_lsx_vpcnt_h:
4175 case Intrinsic::loongarch_lsx_vpcnt_w:
4176 case Intrinsic::loongarch_lsx_vpcnt_d:
4177 case Intrinsic::loongarch_lasx_xvpcnt_b:
4178 case Intrinsic::loongarch_lasx_xvpcnt_h:
4179 case Intrinsic::loongarch_lasx_xvpcnt_w:
4180 case Intrinsic::loongarch_lasx_xvpcnt_d:
4181 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4182 case Intrinsic::loongarch_lsx_vbitclr_b:
4183 case Intrinsic::loongarch_lsx_vbitclr_h:
4184 case Intrinsic::loongarch_lsx_vbitclr_w:
4185 case Intrinsic::loongarch_lsx_vbitclr_d:
4186 case Intrinsic::loongarch_lasx_xvbitclr_b:
4187 case Intrinsic::loongarch_lasx_xvbitclr_h:
4188 case Intrinsic::loongarch_lasx_xvbitclr_w:
4189 case Intrinsic::loongarch_lasx_xvbitclr_d:
4190 return lowerVectorBitClear(N, DAG);
4191 case Intrinsic::loongarch_lsx_vbitclri_b:
4192 case Intrinsic::loongarch_lasx_xvbitclri_b:
4193 return lowerVectorBitClearImm<3>(N, DAG);
4194 case Intrinsic::loongarch_lsx_vbitclri_h:
4195 case Intrinsic::loongarch_lasx_xvbitclri_h:
4196 return lowerVectorBitClearImm<4>(N, DAG);
4197 case Intrinsic::loongarch_lsx_vbitclri_w:
4198 case Intrinsic::loongarch_lasx_xvbitclri_w:
4199 return lowerVectorBitClearImm<5>(N, DAG);
4200 case Intrinsic::loongarch_lsx_vbitclri_d:
4201 case Intrinsic::loongarch_lasx_xvbitclri_d:
4202 return lowerVectorBitClearImm<6>(N, DAG);
4203 case Intrinsic::loongarch_lsx_vbitset_b:
4204 case Intrinsic::loongarch_lsx_vbitset_h:
4205 case Intrinsic::loongarch_lsx_vbitset_w:
4206 case Intrinsic::loongarch_lsx_vbitset_d:
4207 case Intrinsic::loongarch_lasx_xvbitset_b:
4208 case Intrinsic::loongarch_lasx_xvbitset_h:
4209 case Intrinsic::loongarch_lasx_xvbitset_w:
4210 case Intrinsic::loongarch_lasx_xvbitset_d: {
4211 EVT VecTy = N->getValueType(0);
4212 SDValue One = DAG.getConstant(1, DL, VecTy);
4213 return DAG.getNode(
4214 ISD::OR, DL, VecTy, N->getOperand(1),
4215 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4216 }
4217 case Intrinsic::loongarch_lsx_vbitseti_b:
4218 case Intrinsic::loongarch_lasx_xvbitseti_b:
4219 return lowerVectorBitSetImm<3>(N, DAG);
4220 case Intrinsic::loongarch_lsx_vbitseti_h:
4221 case Intrinsic::loongarch_lasx_xvbitseti_h:
4222 return lowerVectorBitSetImm<4>(N, DAG);
4223 case Intrinsic::loongarch_lsx_vbitseti_w:
4224 case Intrinsic::loongarch_lasx_xvbitseti_w:
4225 return lowerVectorBitSetImm<5>(N, DAG);
4226 case Intrinsic::loongarch_lsx_vbitseti_d:
4227 case Intrinsic::loongarch_lasx_xvbitseti_d:
4228 return lowerVectorBitSetImm<6>(N, DAG);
4229 case Intrinsic::loongarch_lsx_vbitrev_b:
4230 case Intrinsic::loongarch_lsx_vbitrev_h:
4231 case Intrinsic::loongarch_lsx_vbitrev_w:
4232 case Intrinsic::loongarch_lsx_vbitrev_d:
4233 case Intrinsic::loongarch_lasx_xvbitrev_b:
4234 case Intrinsic::loongarch_lasx_xvbitrev_h:
4235 case Intrinsic::loongarch_lasx_xvbitrev_w:
4236 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4237 EVT VecTy = N->getValueType(0);
4238 SDValue One = DAG.getConstant(1, DL, VecTy);
4239 return DAG.getNode(
4240 ISD::XOR, DL, VecTy, N->getOperand(1),
4241 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4242 }
4243 case Intrinsic::loongarch_lsx_vbitrevi_b:
4244 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4245 return lowerVectorBitRevImm<3>(N, DAG);
4246 case Intrinsic::loongarch_lsx_vbitrevi_h:
4247 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4248 return lowerVectorBitRevImm<4>(N, DAG);
4249 case Intrinsic::loongarch_lsx_vbitrevi_w:
4250 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4251 return lowerVectorBitRevImm<5>(N, DAG);
4252 case Intrinsic::loongarch_lsx_vbitrevi_d:
4253 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4254 return lowerVectorBitRevImm<6>(N, DAG);
4255 case Intrinsic::loongarch_lsx_vfadd_s:
4256 case Intrinsic::loongarch_lsx_vfadd_d:
4257 case Intrinsic::loongarch_lasx_xvfadd_s:
4258 case Intrinsic::loongarch_lasx_xvfadd_d:
4259 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4260 N->getOperand(2));
4261 case Intrinsic::loongarch_lsx_vfsub_s:
4262 case Intrinsic::loongarch_lsx_vfsub_d:
4263 case Intrinsic::loongarch_lasx_xvfsub_s:
4264 case Intrinsic::loongarch_lasx_xvfsub_d:
4265 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4266 N->getOperand(2));
4267 case Intrinsic::loongarch_lsx_vfmul_s:
4268 case Intrinsic::loongarch_lsx_vfmul_d:
4269 case Intrinsic::loongarch_lasx_xvfmul_s:
4270 case Intrinsic::loongarch_lasx_xvfmul_d:
4271 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4272 N->getOperand(2));
4273 case Intrinsic::loongarch_lsx_vfdiv_s:
4274 case Intrinsic::loongarch_lsx_vfdiv_d:
4275 case Intrinsic::loongarch_lasx_xvfdiv_s:
4276 case Intrinsic::loongarch_lasx_xvfdiv_d:
4277 return DAG.