LLVM 20.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
103
104 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
105 // we get to know which of sll and revb.2h is faster.
108
109 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
110 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
111 // and i32 could still be byte-swapped relatively cheaply.
113
119
122
123 // Set operations for LA64 only.
124
125 if (Subtarget.is64Bit()) {
143
147 Custom);
149 }
150
151 // Set operations for LA32 only.
152
153 if (!Subtarget.is64Bit()) {
159 }
160
162
163 static const ISD::CondCode FPCCToExpand[] = {
166
167 // Set operations for 'F' feature.
168
169 if (Subtarget.hasBasicF()) {
170 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
171 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
172 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
173
189
190 if (Subtarget.is64Bit())
192
193 if (!Subtarget.hasBasicD()) {
195 if (Subtarget.is64Bit()) {
198 }
199 }
200 }
201
202 // Set operations for 'D' feature.
203
204 if (Subtarget.hasBasicD()) {
205 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
206 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
207 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
208 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
209 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
210
226
227 if (Subtarget.is64Bit())
229 }
230
231 // Set operations for 'LSX' feature.
232
233 if (Subtarget.hasExtLSX()) {
235 // Expand all truncating stores and extending loads.
236 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
237 setTruncStoreAction(VT, InnerVT, Expand);
240 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
241 }
242 // By default everything must be expanded. Then we will selectively turn
243 // on ones that can be effectively codegen'd.
244 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
246 }
247
248 for (MVT VT : LSXVTs) {
252
256
260 }
261 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
264 Legal);
266 VT, Legal);
273 Expand);
274 }
275 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
277 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
279 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
282 }
283 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
291 VT, Expand);
292 }
294 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
295 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
296 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
297 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
298 }
299
300 // Set operations for 'LASX' feature.
301
302 if (Subtarget.hasExtLASX()) {
303 for (MVT VT : LASXVTs) {
307
312
316 }
317 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
320 Legal);
322 VT, Legal);
329 Expand);
330 }
331 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
333 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
335 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
338 }
339 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
347 VT, Expand);
348 }
349 }
350
351 // Set DAG combine for LA32 and LA64.
352
357
358 // Set DAG combine for 'LSX' feature.
359
360 if (Subtarget.hasExtLSX())
362
363 // Compute derived properties from the register classes.
365
367
370
372
374
375 // Function alignments.
377 // Set preferred alignments.
381
382 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
383 if (Subtarget.hasLAMCAS())
385}
386
388 const GlobalAddressSDNode *GA) const {
389 // In order to maximise the opportunity for common subexpression elimination,
390 // keep a separate ADD node for the global address offset instead of folding
391 // it in the global address node. Later peephole optimisations may choose to
392 // fold it back in when profitable.
393 return false;
394}
395
397 SelectionDAG &DAG) const {
398 switch (Op.getOpcode()) {
400 return lowerATOMIC_FENCE(Op, DAG);
402 return lowerEH_DWARF_CFA(Op, DAG);
404 return lowerGlobalAddress(Op, DAG);
406 return lowerGlobalTLSAddress(Op, DAG);
408 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
410 return lowerINTRINSIC_W_CHAIN(Op, DAG);
412 return lowerINTRINSIC_VOID(Op, DAG);
414 return lowerBlockAddress(Op, DAG);
415 case ISD::JumpTable:
416 return lowerJumpTable(Op, DAG);
417 case ISD::SHL_PARTS:
418 return lowerShiftLeftParts(Op, DAG);
419 case ISD::SRA_PARTS:
420 return lowerShiftRightParts(Op, DAG, true);
421 case ISD::SRL_PARTS:
422 return lowerShiftRightParts(Op, DAG, false);
424 return lowerConstantPool(Op, DAG);
425 case ISD::FP_TO_SINT:
426 return lowerFP_TO_SINT(Op, DAG);
427 case ISD::BITCAST:
428 return lowerBITCAST(Op, DAG);
429 case ISD::UINT_TO_FP:
430 return lowerUINT_TO_FP(Op, DAG);
431 case ISD::SINT_TO_FP:
432 return lowerSINT_TO_FP(Op, DAG);
433 case ISD::VASTART:
434 return lowerVASTART(Op, DAG);
435 case ISD::FRAMEADDR:
436 return lowerFRAMEADDR(Op, DAG);
437 case ISD::RETURNADDR:
438 return lowerRETURNADDR(Op, DAG);
440 return lowerWRITE_REGISTER(Op, DAG);
442 return lowerINSERT_VECTOR_ELT(Op, DAG);
444 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
446 return lowerBUILD_VECTOR(Op, DAG);
448 return lowerVECTOR_SHUFFLE(Op, DAG);
449 case ISD::BITREVERSE:
450 return lowerBITREVERSE(Op, DAG);
451 }
452 return SDValue();
453}
454
455SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
456 SelectionDAG &DAG) const {
457 EVT ResTy = Op->getValueType(0);
458 SDValue Src = Op->getOperand(0);
459 SDLoc DL(Op);
460
461 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
462 unsigned int OrigEltNum = ResTy.getVectorNumElements();
463 unsigned int NewEltNum = NewVT.getVectorNumElements();
464
465 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
466
468 for (unsigned int i = 0; i < NewEltNum; i++) {
469 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
470 DAG.getConstant(i, DL, MVT::i64));
471 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
472 ? (unsigned)LoongArchISD::BITREV_8B
474 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
475 }
476 SDValue Res =
477 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
478
479 switch (ResTy.getSimpleVT().SimpleTy) {
480 default:
481 return SDValue();
482 case MVT::v16i8:
483 case MVT::v32i8:
484 return Res;
485 case MVT::v8i16:
486 case MVT::v16i16:
487 case MVT::v4i32:
488 case MVT::v8i32: {
490 for (unsigned int i = 0; i < NewEltNum; i++)
491 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
492 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
493 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
494 }
495 }
496}
497
498/// Determine whether a range fits a regular pattern of values.
499/// This function accounts for the possibility of jumping over the End iterator.
500template <typename ValType>
501static bool
503 unsigned CheckStride,
505 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
506 auto &I = Begin;
507
508 while (I != End) {
509 if (*I != -1 && *I != ExpectedIndex)
510 return false;
511 ExpectedIndex += ExpectedIndexStride;
512
513 // Incrementing past End is undefined behaviour so we must increment one
514 // step at a time and check for End at each step.
515 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
516 ; // Empty loop body.
517 }
518 return true;
519}
520
521/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
522///
523/// VREPLVEI performs vector broadcast based on an element specified by an
524/// integer immediate, with its mask being similar to:
525/// <x, x, x, ...>
526/// where x is any valid index.
527///
528/// When undef's appear in the mask they are treated as if they were whatever
529/// value is necessary in order to fit the above form.
531 MVT VT, SDValue V1, SDValue V2,
532 SelectionDAG &DAG) {
533 int SplatIndex = -1;
534 for (const auto &M : Mask) {
535 if (M != -1) {
536 SplatIndex = M;
537 break;
538 }
539 }
540
541 if (SplatIndex == -1)
542 return DAG.getUNDEF(VT);
543
544 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
545 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
546 APInt Imm(64, SplatIndex);
547 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
548 DAG.getConstant(Imm, DL, MVT::i64));
549 }
550
551 return SDValue();
552}
553
554/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
555///
556/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
557/// elements according to a <4 x i2> constant (encoded as an integer immediate).
558///
559/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
560/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
561/// When undef's appear they are treated as if they were whatever value is
562/// necessary in order to fit the above forms.
563///
564/// For example:
565/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
566/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
567/// i32 7, i32 6, i32 5, i32 4>
568/// is lowered to:
569/// (VSHUF4I_H $v0, $v1, 27)
570/// where the 27 comes from:
571/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
573 MVT VT, SDValue V1, SDValue V2,
574 SelectionDAG &DAG) {
575
576 // When the size is less than 4, lower cost instructions may be used.
577 if (Mask.size() < 4)
578 return SDValue();
579
580 int SubMask[4] = {-1, -1, -1, -1};
581 for (unsigned i = 0; i < 4; ++i) {
582 for (unsigned j = i; j < Mask.size(); j += 4) {
583 int Idx = Mask[j];
584
585 // Convert from vector index to 4-element subvector index
586 // If an index refers to an element outside of the subvector then give up
587 if (Idx != -1) {
588 Idx -= 4 * (j / 4);
589 if (Idx < 0 || Idx >= 4)
590 return SDValue();
591 }
592
593 // If the mask has an undef, replace it with the current index.
594 // Note that it might still be undef if the current index is also undef
595 if (SubMask[i] == -1)
596 SubMask[i] = Idx;
597 // Check that non-undef values are the same as in the mask. If they
598 // aren't then give up
599 else if (Idx != -1 && Idx != SubMask[i])
600 return SDValue();
601 }
602 }
603
604 // Calculate the immediate. Replace any remaining undefs with zero
605 APInt Imm(64, 0);
606 for (int i = 3; i >= 0; --i) {
607 int Idx = SubMask[i];
608
609 if (Idx == -1)
610 Idx = 0;
611
612 Imm <<= 2;
613 Imm |= Idx & 0x3;
614 }
615
616 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
617 DAG.getConstant(Imm, DL, MVT::i64));
618}
619
620/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
621///
622/// VPACKEV interleaves the even elements from each vector.
623///
624/// It is possible to lower into VPACKEV when the mask consists of two of the
625/// following forms interleaved:
626/// <0, 2, 4, ...>
627/// <n, n+2, n+4, ...>
628/// where n is the number of elements in the vector.
629/// For example:
630/// <0, 0, 2, 2, 4, 4, ...>
631/// <0, n, 2, n+2, 4, n+4, ...>
632///
633/// When undef's appear in the mask they are treated as if they were whatever
634/// value is necessary in order to fit the above forms.
636 MVT VT, SDValue V1, SDValue V2,
637 SelectionDAG &DAG) {
638
639 const auto &Begin = Mask.begin();
640 const auto &End = Mask.end();
641 SDValue OriV1 = V1, OriV2 = V2;
642
643 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
644 V1 = OriV1;
645 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
646 V1 = OriV2;
647 else
648 return SDValue();
649
650 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
651 V2 = OriV1;
652 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
653 V2 = OriV2;
654 else
655 return SDValue();
656
657 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
658}
659
660/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
661///
662/// VPACKOD interleaves the odd elements from each vector.
663///
664/// It is possible to lower into VPACKOD when the mask consists of two of the
665/// following forms interleaved:
666/// <1, 3, 5, ...>
667/// <n+1, n+3, n+5, ...>
668/// where n is the number of elements in the vector.
669/// For example:
670/// <1, 1, 3, 3, 5, 5, ...>
671/// <1, n+1, 3, n+3, 5, n+5, ...>
672///
673/// When undef's appear in the mask they are treated as if they were whatever
674/// value is necessary in order to fit the above forms.
676 MVT VT, SDValue V1, SDValue V2,
677 SelectionDAG &DAG) {
678
679 const auto &Begin = Mask.begin();
680 const auto &End = Mask.end();
681 SDValue OriV1 = V1, OriV2 = V2;
682
683 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
684 V1 = OriV1;
685 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
686 V1 = OriV2;
687 else
688 return SDValue();
689
690 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
691 V2 = OriV1;
692 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
693 V2 = OriV2;
694 else
695 return SDValue();
696
697 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
698}
699
700/// Lower VECTOR_SHUFFLE into VILVH (if possible).
701///
702/// VILVH interleaves consecutive elements from the left (highest-indexed) half
703/// of each vector.
704///
705/// It is possible to lower into VILVH when the mask consists of two of the
706/// following forms interleaved:
707/// <x, x+1, x+2, ...>
708/// <n+x, n+x+1, n+x+2, ...>
709/// where n is the number of elements in the vector and x is half n.
710/// For example:
711/// <x, x, x+1, x+1, x+2, x+2, ...>
712/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
713///
714/// When undef's appear in the mask they are treated as if they were whatever
715/// value is necessary in order to fit the above forms.
717 MVT VT, SDValue V1, SDValue V2,
718 SelectionDAG &DAG) {
719
720 const auto &Begin = Mask.begin();
721 const auto &End = Mask.end();
722 unsigned HalfSize = Mask.size() / 2;
723 SDValue OriV1 = V1, OriV2 = V2;
724
725 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
726 V1 = OriV1;
727 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
728 V1 = OriV2;
729 else
730 return SDValue();
731
732 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
733 V2 = OriV1;
734 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
735 1))
736 V2 = OriV2;
737 else
738 return SDValue();
739
740 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
741}
742
743/// Lower VECTOR_SHUFFLE into VILVL (if possible).
744///
745/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
746/// of each vector.
747///
748/// It is possible to lower into VILVL when the mask consists of two of the
749/// following forms interleaved:
750/// <0, 1, 2, ...>
751/// <n, n+1, n+2, ...>
752/// where n is the number of elements in the vector.
753/// For example:
754/// <0, 0, 1, 1, 2, 2, ...>
755/// <0, n, 1, n+1, 2, n+2, ...>
756///
757/// When undef's appear in the mask they are treated as if they were whatever
758/// value is necessary in order to fit the above forms.
760 MVT VT, SDValue V1, SDValue V2,
761 SelectionDAG &DAG) {
762
763 const auto &Begin = Mask.begin();
764 const auto &End = Mask.end();
765 SDValue OriV1 = V1, OriV2 = V2;
766
767 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
768 V1 = OriV1;
769 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
770 V1 = OriV2;
771 else
772 return SDValue();
773
774 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
775 V2 = OriV1;
776 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
777 V2 = OriV2;
778 else
779 return SDValue();
780
781 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
782}
783
784/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
785///
786/// VPICKEV copies the even elements of each vector into the result vector.
787///
788/// It is possible to lower into VPICKEV when the mask consists of two of the
789/// following forms concatenated:
790/// <0, 2, 4, ...>
791/// <n, n+2, n+4, ...>
792/// where n is the number of elements in the vector.
793/// For example:
794/// <0, 2, 4, ..., 0, 2, 4, ...>
795/// <0, 2, 4, ..., n, n+2, n+4, ...>
796///
797/// When undef's appear in the mask they are treated as if they were whatever
798/// value is necessary in order to fit the above forms.
800 MVT VT, SDValue V1, SDValue V2,
801 SelectionDAG &DAG) {
802
803 const auto &Begin = Mask.begin();
804 const auto &Mid = Mask.begin() + Mask.size() / 2;
805 const auto &End = Mask.end();
806 SDValue OriV1 = V1, OriV2 = V2;
807
808 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
809 V1 = OriV1;
810 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
811 V1 = OriV2;
812 else
813 return SDValue();
814
815 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
816 V2 = OriV1;
817 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
818 V2 = OriV2;
819
820 else
821 return SDValue();
822
823 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
824}
825
826/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
827///
828/// VPICKOD copies the odd elements of each vector into the result vector.
829///
830/// It is possible to lower into VPICKOD when the mask consists of two of the
831/// following forms concatenated:
832/// <1, 3, 5, ...>
833/// <n+1, n+3, n+5, ...>
834/// where n is the number of elements in the vector.
835/// For example:
836/// <1, 3, 5, ..., 1, 3, 5, ...>
837/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
838///
839/// When undef's appear in the mask they are treated as if they were whatever
840/// value is necessary in order to fit the above forms.
842 MVT VT, SDValue V1, SDValue V2,
843 SelectionDAG &DAG) {
844
845 const auto &Begin = Mask.begin();
846 const auto &Mid = Mask.begin() + Mask.size() / 2;
847 const auto &End = Mask.end();
848 SDValue OriV1 = V1, OriV2 = V2;
849
850 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
851 V1 = OriV1;
852 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
853 V1 = OriV2;
854 else
855 return SDValue();
856
857 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
858 V2 = OriV1;
859 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
860 V2 = OriV2;
861 else
862 return SDValue();
863
864 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
865}
866
867/// Lower VECTOR_SHUFFLE into VSHUF.
868///
869/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
870/// adding it as an operand to the resulting VSHUF.
872 MVT VT, SDValue V1, SDValue V2,
873 SelectionDAG &DAG) {
874
876 for (auto M : Mask)
877 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
878
879 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
880 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
881
882 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
883 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
884 // VSHF concatenates the vectors in a bitwise fashion:
885 // <0b00, 0b01> + <0b10, 0b11> ->
886 // 0b0100 + 0b1110 -> 0b01001110
887 // <0b10, 0b11, 0b00, 0b01>
888 // We must therefore swap the operands to get the correct result.
889 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
890}
891
892/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
893///
894/// This routine breaks down the specific type of 128-bit shuffle and
895/// dispatches to the lowering routines accordingly.
897 SDValue V1, SDValue V2, SelectionDAG &DAG) {
898 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
899 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
900 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
901 "Vector type is unsupported for lsx!");
902 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
903 "Two operands have different types!");
904 assert(VT.getVectorNumElements() == Mask.size() &&
905 "Unexpected mask size for shuffle!");
906 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
907
908 SDValue Result;
909 // TODO: Add more comparison patterns.
910 if (V2.isUndef()) {
911 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
912 return Result;
913 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
914 return Result;
915
916 // TODO: This comment may be enabled in the future to better match the
917 // pattern for instruction selection.
918 /* V2 = V1; */
919 }
920
921 // It is recommended not to change the pattern comparison order for better
922 // performance.
923 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
924 return Result;
925 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
926 return Result;
927 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
928 return Result;
929 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
930 return Result;
931 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
932 return Result;
933 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
934 return Result;
935 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
936 return Result;
937
938 return SDValue();
939}
940
941/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
942///
943/// It is a XVREPLVEI when the mask is:
944/// <x, x, x, ..., x+n, x+n, x+n, ...>
945/// where the number of x is equal to n and n is half the length of vector.
946///
947/// When undef's appear in the mask they are treated as if they were whatever
948/// value is necessary in order to fit the above form.
950 ArrayRef<int> Mask, MVT VT,
951 SDValue V1, SDValue V2,
952 SelectionDAG &DAG) {
953 int SplatIndex = -1;
954 for (const auto &M : Mask) {
955 if (M != -1) {
956 SplatIndex = M;
957 break;
958 }
959 }
960
961 if (SplatIndex == -1)
962 return DAG.getUNDEF(VT);
963
964 const auto &Begin = Mask.begin();
965 const auto &End = Mask.end();
966 unsigned HalfSize = Mask.size() / 2;
967
968 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
969 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
970 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
971 0)) {
972 APInt Imm(64, SplatIndex);
973 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
974 DAG.getConstant(Imm, DL, MVT::i64));
975 }
976
977 return SDValue();
978}
979
980/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
982 MVT VT, SDValue V1, SDValue V2,
983 SelectionDAG &DAG) {
984 // When the size is less than or equal to 4, lower cost instructions may be
985 // used.
986 if (Mask.size() <= 4)
987 return SDValue();
988 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
989}
990
991/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
993 MVT VT, SDValue V1, SDValue V2,
994 SelectionDAG &DAG) {
995 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
996}
997
998/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
1000 MVT VT, SDValue V1, SDValue V2,
1001 SelectionDAG &DAG) {
1002 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1003}
1004
1005/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1007 MVT VT, SDValue V1, SDValue V2,
1008 SelectionDAG &DAG) {
1009
1010 const auto &Begin = Mask.begin();
1011 const auto &End = Mask.end();
1012 unsigned HalfSize = Mask.size() / 2;
1013 unsigned LeftSize = HalfSize / 2;
1014 SDValue OriV1 = V1, OriV2 = V2;
1015
1016 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1017 1) &&
1018 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1019 V1 = OriV1;
1020 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1021 Mask.size() + HalfSize - LeftSize, 1) &&
1022 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1023 Mask.size() + HalfSize + LeftSize, 1))
1024 V1 = OriV2;
1025 else
1026 return SDValue();
1027
1028 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1029 1) &&
1030 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1031 1))
1032 V2 = OriV1;
1033 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1034 Mask.size() + HalfSize - LeftSize, 1) &&
1035 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1036 Mask.size() + HalfSize + LeftSize, 1))
1037 V2 = OriV2;
1038 else
1039 return SDValue();
1040
1041 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1042}
1043
1044/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1046 MVT VT, SDValue V1, SDValue V2,
1047 SelectionDAG &DAG) {
1048
1049 const auto &Begin = Mask.begin();
1050 const auto &End = Mask.end();
1051 unsigned HalfSize = Mask.size() / 2;
1052 SDValue OriV1 = V1, OriV2 = V2;
1053
1054 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1055 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1056 V1 = OriV1;
1057 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1058 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1059 Mask.size() + HalfSize, 1))
1060 V1 = OriV2;
1061 else
1062 return SDValue();
1063
1064 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1065 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1066 V2 = OriV1;
1067 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1068 1) &&
1069 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1070 Mask.size() + HalfSize, 1))
1071 V2 = OriV2;
1072 else
1073 return SDValue();
1074
1075 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1076}
1077
1078/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1080 MVT VT, SDValue V1, SDValue V2,
1081 SelectionDAG &DAG) {
1082
1083 const auto &Begin = Mask.begin();
1084 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1085 const auto &Mid = Mask.begin() + Mask.size() / 2;
1086 const auto &RightMid = Mask.end() - Mask.size() / 4;
1087 const auto &End = Mask.end();
1088 unsigned HalfSize = Mask.size() / 2;
1089 SDValue OriV1 = V1, OriV2 = V2;
1090
1091 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1092 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1093 V1 = OriV1;
1094 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1095 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1096 V1 = OriV2;
1097 else
1098 return SDValue();
1099
1100 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1101 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1102 V2 = OriV1;
1103 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1104 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1105 V2 = OriV2;
1106
1107 else
1108 return SDValue();
1109
1110 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1111}
1112
1113/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1115 MVT VT, SDValue V1, SDValue V2,
1116 SelectionDAG &DAG) {
1117
1118 const auto &Begin = Mask.begin();
1119 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1120 const auto &Mid = Mask.begin() + Mask.size() / 2;
1121 const auto &RightMid = Mask.end() - Mask.size() / 4;
1122 const auto &End = Mask.end();
1123 unsigned HalfSize = Mask.size() / 2;
1124 SDValue OriV1 = V1, OriV2 = V2;
1125
1126 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1127 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1128 V1 = OriV1;
1129 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1130 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1131 2))
1132 V1 = OriV2;
1133 else
1134 return SDValue();
1135
1136 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1137 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1138 V2 = OriV1;
1139 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1140 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1141 2))
1142 V2 = OriV2;
1143 else
1144 return SDValue();
1145
1146 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1147}
1148
1149/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1151 MVT VT, SDValue V1, SDValue V2,
1152 SelectionDAG &DAG) {
1153
1154 int MaskSize = Mask.size();
1155 int HalfSize = Mask.size() / 2;
1156 const auto &Begin = Mask.begin();
1157 const auto &Mid = Mask.begin() + HalfSize;
1158 const auto &End = Mask.end();
1159
1160 // VECTOR_SHUFFLE concatenates the vectors:
1161 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1162 // shuffling ->
1163 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1164 //
1165 // XVSHUF concatenates the vectors:
1166 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1167 // shuffling ->
1168 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1169 SmallVector<SDValue, 8> MaskAlloc;
1170 for (auto it = Begin; it < Mid; it++) {
1171 if (*it < 0) // UNDEF
1172 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1173 else if ((*it >= 0 && *it < HalfSize) ||
1174 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1175 int M = *it < HalfSize ? *it : *it - HalfSize;
1176 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1177 } else
1178 return SDValue();
1179 }
1180 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1181
1182 for (auto it = Mid; it < End; it++) {
1183 if (*it < 0) // UNDEF
1184 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1185 else if ((*it >= HalfSize && *it < MaskSize) ||
1186 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1187 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1188 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1189 } else
1190 return SDValue();
1191 }
1192 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1193
1194 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1195 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1196 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1197}
1198
1199/// Shuffle vectors by lane to generate more optimized instructions.
1200/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1201///
1202/// Therefore, except for the following four cases, other cases are regarded
1203/// as cross-lane shuffles, where optimization is relatively limited.
1204///
1205/// - Shuffle high, low lanes of two inputs vector
1206/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1207/// - Shuffle low, high lanes of two inputs vector
1208/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1209/// - Shuffle low, low lanes of two inputs vector
1210/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1211/// - Shuffle high, high lanes of two inputs vector
1212/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1213///
1214/// The first case is the closest to LoongArch instructions and the other
1215/// cases need to be converted to it for processing.
1216///
1217/// This function may modify V1, V2 and Mask
1219 MutableArrayRef<int> Mask, MVT VT,
1220 SDValue &V1, SDValue &V2,
1221 SelectionDAG &DAG) {
1222
1223 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1224
1225 int MaskSize = Mask.size();
1226 int HalfSize = Mask.size() / 2;
1227
1228 HalfMaskType preMask = None, postMask = None;
1229
1230 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1231 return M < 0 || (M >= 0 && M < HalfSize) ||
1232 (M >= MaskSize && M < MaskSize + HalfSize);
1233 }))
1234 preMask = HighLaneTy;
1235 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1236 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1237 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1238 }))
1239 preMask = LowLaneTy;
1240
1241 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1242 return M < 0 || (M >= 0 && M < HalfSize) ||
1243 (M >= MaskSize && M < MaskSize + HalfSize);
1244 }))
1245 postMask = HighLaneTy;
1246 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1247 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1248 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1249 }))
1250 postMask = LowLaneTy;
1251
1252 // The pre-half of mask is high lane type, and the post-half of mask
1253 // is low lane type, which is closest to the LoongArch instructions.
1254 //
1255 // Note: In the LoongArch architecture, the high lane of mask corresponds
1256 // to the lower 128-bit of vector register, and the low lane of mask
1257 // corresponds the higher 128-bit of vector register.
1258 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1259 return;
1260 }
1261 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1262 V1 = DAG.getBitcast(MVT::v4i64, V1);
1263 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1264 DAG.getConstant(0b01001110, DL, MVT::i64));
1265 V1 = DAG.getBitcast(VT, V1);
1266
1267 if (!V2.isUndef()) {
1268 V2 = DAG.getBitcast(MVT::v4i64, V2);
1269 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1270 DAG.getConstant(0b01001110, DL, MVT::i64));
1271 V2 = DAG.getBitcast(VT, V2);
1272 }
1273
1274 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1275 *it = *it < 0 ? *it : *it - HalfSize;
1276 }
1277 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1278 *it = *it < 0 ? *it : *it + HalfSize;
1279 }
1280 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1281 V1 = DAG.getBitcast(MVT::v4i64, V1);
1282 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1283 DAG.getConstant(0b11101110, DL, MVT::i64));
1284 V1 = DAG.getBitcast(VT, V1);
1285
1286 if (!V2.isUndef()) {
1287 V2 = DAG.getBitcast(MVT::v4i64, V2);
1288 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1289 DAG.getConstant(0b11101110, DL, MVT::i64));
1290 V2 = DAG.getBitcast(VT, V2);
1291 }
1292
1293 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1294 *it = *it < 0 ? *it : *it - HalfSize;
1295 }
1296 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1297 V1 = DAG.getBitcast(MVT::v4i64, V1);
1298 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1299 DAG.getConstant(0b01000100, DL, MVT::i64));
1300 V1 = DAG.getBitcast(VT, V1);
1301
1302 if (!V2.isUndef()) {
1303 V2 = DAG.getBitcast(MVT::v4i64, V2);
1304 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1305 DAG.getConstant(0b01000100, DL, MVT::i64));
1306 V2 = DAG.getBitcast(VT, V2);
1307 }
1308
1309 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1310 *it = *it < 0 ? *it : *it + HalfSize;
1311 }
1312 } else { // cross-lane
1313 return;
1314 }
1315}
1316
1317/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1318///
1319/// This routine breaks down the specific type of 256-bit shuffle and
1320/// dispatches to the lowering routines accordingly.
1322 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1323 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1324 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1325 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1326 "Vector type is unsupported for lasx!");
1327 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1328 "Two operands have different types!");
1329 assert(VT.getVectorNumElements() == Mask.size() &&
1330 "Unexpected mask size for shuffle!");
1331 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1332 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1333
1334 // canonicalize non cross-lane shuffle vector
1335 SmallVector<int> NewMask(Mask);
1336 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1337
1338 SDValue Result;
1339 // TODO: Add more comparison patterns.
1340 if (V2.isUndef()) {
1341 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1342 return Result;
1343 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1344 return Result;
1345
1346 // TODO: This comment may be enabled in the future to better match the
1347 // pattern for instruction selection.
1348 /* V2 = V1; */
1349 }
1350
1351 // It is recommended not to change the pattern comparison order for better
1352 // performance.
1353 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1354 return Result;
1355 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1356 return Result;
1357 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1358 return Result;
1359 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1360 return Result;
1361 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1362 return Result;
1363 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1364 return Result;
1365 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1366 return Result;
1367
1368 return SDValue();
1369}
1370
1371SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1372 SelectionDAG &DAG) const {
1373 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1374 ArrayRef<int> OrigMask = SVOp->getMask();
1375 SDValue V1 = Op.getOperand(0);
1376 SDValue V2 = Op.getOperand(1);
1377 MVT VT = Op.getSimpleValueType();
1378 int NumElements = VT.getVectorNumElements();
1379 SDLoc DL(Op);
1380
1381 bool V1IsUndef = V1.isUndef();
1382 bool V2IsUndef = V2.isUndef();
1383 if (V1IsUndef && V2IsUndef)
1384 return DAG.getUNDEF(VT);
1385
1386 // When we create a shuffle node we put the UNDEF node to second operand,
1387 // but in some cases the first operand may be transformed to UNDEF.
1388 // In this case we should just commute the node.
1389 if (V1IsUndef)
1390 return DAG.getCommutedVectorShuffle(*SVOp);
1391
1392 // Check for non-undef masks pointing at an undef vector and make the masks
1393 // undef as well. This makes it easier to match the shuffle based solely on
1394 // the mask.
1395 if (V2IsUndef &&
1396 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1397 SmallVector<int, 8> NewMask(OrigMask);
1398 for (int &M : NewMask)
1399 if (M >= NumElements)
1400 M = -1;
1401 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1402 }
1403
1404 // Check for illegal shuffle mask element index values.
1405 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1406 (void)MaskUpperLimit;
1407 assert(llvm::all_of(OrigMask,
1408 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1409 "Out of bounds shuffle index");
1410
1411 // For each vector width, delegate to a specialized lowering routine.
1412 if (VT.is128BitVector())
1413 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1414
1415 if (VT.is256BitVector())
1416 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1417
1418 return SDValue();
1419}
1420
1421static bool isConstantOrUndef(const SDValue Op) {
1422 if (Op->isUndef())
1423 return true;
1424 if (isa<ConstantSDNode>(Op))
1425 return true;
1426 if (isa<ConstantFPSDNode>(Op))
1427 return true;
1428 return false;
1429}
1430
1432 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1433 if (isConstantOrUndef(Op->getOperand(i)))
1434 return true;
1435 return false;
1436}
1437
1438SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1439 SelectionDAG &DAG) const {
1440 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1441 EVT ResTy = Op->getValueType(0);
1442 SDLoc DL(Op);
1443 APInt SplatValue, SplatUndef;
1444 unsigned SplatBitSize;
1445 bool HasAnyUndefs;
1446 bool Is128Vec = ResTy.is128BitVector();
1447 bool Is256Vec = ResTy.is256BitVector();
1448
1449 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1450 (!Subtarget.hasExtLASX() || !Is256Vec))
1451 return SDValue();
1452
1453 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1454 /*MinSplatBits=*/8) &&
1455 SplatBitSize <= 64) {
1456 // We can only cope with 8, 16, 32, or 64-bit elements.
1457 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1458 SplatBitSize != 64)
1459 return SDValue();
1460
1461 EVT ViaVecTy;
1462
1463 switch (SplatBitSize) {
1464 default:
1465 return SDValue();
1466 case 8:
1467 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1468 break;
1469 case 16:
1470 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1471 break;
1472 case 32:
1473 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1474 break;
1475 case 64:
1476 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1477 break;
1478 }
1479
1480 // SelectionDAG::getConstant will promote SplatValue appropriately.
1481 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1482
1483 // Bitcast to the type we originally wanted.
1484 if (ViaVecTy != ResTy)
1485 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1486
1487 return Result;
1488 }
1489
1490 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1491 return Op;
1492
1494 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1495 // The resulting code is the same length as the expansion, but it doesn't
1496 // use memory operations.
1497 EVT ResTy = Node->getValueType(0);
1498
1499 assert(ResTy.isVector());
1500
1501 unsigned NumElts = ResTy.getVectorNumElements();
1502 SDValue Vector = DAG.getUNDEF(ResTy);
1503 for (unsigned i = 0; i < NumElts; ++i) {
1505 Node->getOperand(i),
1506 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1507 }
1508 return Vector;
1509 }
1510
1511 return SDValue();
1512}
1513
1514SDValue
1515LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1516 SelectionDAG &DAG) const {
1517 EVT VecTy = Op->getOperand(0)->getValueType(0);
1518 SDValue Idx = Op->getOperand(1);
1519 EVT EltTy = VecTy.getVectorElementType();
1520 unsigned NumElts = VecTy.getVectorNumElements();
1521
1522 if (isa<ConstantSDNode>(Idx) &&
1523 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1524 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1525 return Op;
1526
1527 return SDValue();
1528}
1529
1530SDValue
1531LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1532 SelectionDAG &DAG) const {
1533 if (isa<ConstantSDNode>(Op->getOperand(2)))
1534 return Op;
1535 return SDValue();
1536}
1537
1538SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1539 SelectionDAG &DAG) const {
1540 SDLoc DL(Op);
1541 SyncScope::ID FenceSSID =
1542 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1543
1544 // singlethread fences only synchronize with signal handlers on the same
1545 // thread and thus only need to preserve instruction order, not actually
1546 // enforce memory ordering.
1547 if (FenceSSID == SyncScope::SingleThread)
1548 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1549 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1550
1551 return Op;
1552}
1553
1554SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1555 SelectionDAG &DAG) const {
1556
1557 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1558 DAG.getContext()->emitError(
1559 "On LA64, only 64-bit registers can be written.");
1560 return Op.getOperand(0);
1561 }
1562
1563 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1564 DAG.getContext()->emitError(
1565 "On LA32, only 32-bit registers can be written.");
1566 return Op.getOperand(0);
1567 }
1568
1569 return Op;
1570}
1571
1572SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1573 SelectionDAG &DAG) const {
1574 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1575 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1576 "be a constant integer");
1577 return SDValue();
1578 }
1579
1582 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1583 EVT VT = Op.getValueType();
1584 SDLoc DL(Op);
1585 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1586 unsigned Depth = Op.getConstantOperandVal(0);
1587 int GRLenInBytes = Subtarget.getGRLen() / 8;
1588
1589 while (Depth--) {
1590 int Offset = -(GRLenInBytes * 2);
1591 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1592 DAG.getSignedConstant(Offset, DL, VT));
1593 FrameAddr =
1594 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1595 }
1596 return FrameAddr;
1597}
1598
1599SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1600 SelectionDAG &DAG) const {
1602 return SDValue();
1603
1604 // Currently only support lowering return address for current frame.
1605 if (Op.getConstantOperandVal(0) != 0) {
1606 DAG.getContext()->emitError(
1607 "return address can only be determined for the current frame");
1608 return SDValue();
1609 }
1610
1613 MVT GRLenVT = Subtarget.getGRLenVT();
1614
1615 // Return the value of the return address register, marking it an implicit
1616 // live-in.
1617 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1618 getRegClassFor(GRLenVT));
1619 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1620}
1621
1622SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1623 SelectionDAG &DAG) const {
1625 auto Size = Subtarget.getGRLen() / 8;
1626 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1627 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1628}
1629
1630SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1631 SelectionDAG &DAG) const {
1633 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1634
1635 SDLoc DL(Op);
1636 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1638
1639 // vastart just stores the address of the VarArgsFrameIndex slot into the
1640 // memory location argument.
1641 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1642 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1643 MachinePointerInfo(SV));
1644}
1645
1646SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1647 SelectionDAG &DAG) const {
1648 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1649 !Subtarget.hasBasicD() && "unexpected target features");
1650
1651 SDLoc DL(Op);
1652 SDValue Op0 = Op.getOperand(0);
1653 if (Op0->getOpcode() == ISD::AND) {
1654 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1655 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1656 return Op;
1657 }
1658
1659 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1660 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1661 Op0.getConstantOperandVal(2) == UINT64_C(0))
1662 return Op;
1663
1664 if (Op0.getOpcode() == ISD::AssertZext &&
1665 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1666 return Op;
1667
1668 EVT OpVT = Op0.getValueType();
1669 EVT RetVT = Op.getValueType();
1670 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1671 MakeLibCallOptions CallOptions;
1672 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1673 SDValue Chain = SDValue();
1675 std::tie(Result, Chain) =
1676 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1677 return Result;
1678}
1679
1680SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1681 SelectionDAG &DAG) const {
1682 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1683 !Subtarget.hasBasicD() && "unexpected target features");
1684
1685 SDLoc DL(Op);
1686 SDValue Op0 = Op.getOperand(0);
1687
1688 if ((Op0.getOpcode() == ISD::AssertSext ||
1690 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1691 return Op;
1692
1693 EVT OpVT = Op0.getValueType();
1694 EVT RetVT = Op.getValueType();
1695 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1696 MakeLibCallOptions CallOptions;
1697 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1698 SDValue Chain = SDValue();
1700 std::tie(Result, Chain) =
1701 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1702 return Result;
1703}
1704
1705SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1706 SelectionDAG &DAG) const {
1707
1708 SDLoc DL(Op);
1709 SDValue Op0 = Op.getOperand(0);
1710
1711 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1712 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1713 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1714 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1715 }
1716 return Op;
1717}
1718
1719SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1720 SelectionDAG &DAG) const {
1721
1722 SDLoc DL(Op);
1723 SDValue Op0 = Op.getOperand(0);
1724
1725 if (Op0.getValueType() == MVT::f16)
1726 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
1727
1728 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1729 !Subtarget.hasBasicD()) {
1730 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
1731 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1732 }
1733
1734 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1735 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
1736 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1737}
1738
1740 SelectionDAG &DAG, unsigned Flags) {
1741 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1742}
1743
1745 SelectionDAG &DAG, unsigned Flags) {
1746 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1747 Flags);
1748}
1749
1751 SelectionDAG &DAG, unsigned Flags) {
1752 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1753 N->getOffset(), Flags);
1754}
1755
1757 SelectionDAG &DAG, unsigned Flags) {
1758 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1759}
1760
1761template <class NodeTy>
1762SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1764 bool IsLocal) const {
1765 SDLoc DL(N);
1766 EVT Ty = getPointerTy(DAG.getDataLayout());
1767 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1768 SDValue Load;
1769
1770 switch (M) {
1771 default:
1772 report_fatal_error("Unsupported code model");
1773
1774 case CodeModel::Large: {
1775 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1776
1777 // This is not actually used, but is necessary for successfully matching
1778 // the PseudoLA_*_LARGE nodes.
1779 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1780 if (IsLocal) {
1781 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1782 // eventually becomes the desired 5-insn code sequence.
1783 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1784 Tmp, Addr),
1785 0);
1786 } else {
1787 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1788 // eventually becomes the desired 5-insn code sequence.
1789 Load = SDValue(
1790 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1791 0);
1792 }
1793 break;
1794 }
1795
1796 case CodeModel::Small:
1797 case CodeModel::Medium:
1798 if (IsLocal) {
1799 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1800 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1801 Load = SDValue(
1802 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1803 } else {
1804 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1805 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1806 Load =
1807 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1808 }
1809 }
1810
1811 if (!IsLocal) {
1812 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1818 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1819 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1820 }
1821
1822 return Load;
1823}
1824
1825SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1826 SelectionDAG &DAG) const {
1827 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1828 DAG.getTarget().getCodeModel());
1829}
1830
1831SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1832 SelectionDAG &DAG) const {
1833 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1834 DAG.getTarget().getCodeModel());
1835}
1836
1837SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1838 SelectionDAG &DAG) const {
1839 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1840 DAG.getTarget().getCodeModel());
1841}
1842
1843SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1844 SelectionDAG &DAG) const {
1845 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1846 assert(N->getOffset() == 0 && "unexpected offset in global node");
1847 auto CM = DAG.getTarget().getCodeModel();
1848 const GlobalValue *GV = N->getGlobal();
1849
1850 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1851 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1852 CM = *GCM;
1853 }
1854
1855 return getAddr(N, DAG, CM, GV->isDSOLocal());
1856}
1857
1858SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1859 SelectionDAG &DAG,
1860 unsigned Opc, bool UseGOT,
1861 bool Large) const {
1862 SDLoc DL(N);
1863 EVT Ty = getPointerTy(DAG.getDataLayout());
1864 MVT GRLenVT = Subtarget.getGRLenVT();
1865
1866 // This is not actually used, but is necessary for successfully matching the
1867 // PseudoLA_*_LARGE nodes.
1868 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1869 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1870
1871 // Only IE needs an extra argument for large code model.
1872 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
1873 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1874 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1875
1876 // If it is LE for normal/medium code model, the add tp operation will occur
1877 // during the pseudo-instruction expansion.
1878 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
1879 return Offset;
1880
1881 if (UseGOT) {
1882 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1888 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1889 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1890 }
1891
1892 // Add the thread pointer.
1893 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1894 DAG.getRegister(LoongArch::R2, GRLenVT));
1895}
1896
1897SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1898 SelectionDAG &DAG,
1899 unsigned Opc,
1900 bool Large) const {
1901 SDLoc DL(N);
1902 EVT Ty = getPointerTy(DAG.getDataLayout());
1903 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1904
1905 // This is not actually used, but is necessary for successfully matching the
1906 // PseudoLA_*_LARGE nodes.
1907 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1908
1909 // Use a PC-relative addressing mode to access the dynamic GOT address.
1910 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1911 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1912 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1913
1914 // Prepare argument list to generate call.
1916 ArgListEntry Entry;
1917 Entry.Node = Load;
1918 Entry.Ty = CallTy;
1919 Args.push_back(Entry);
1920
1921 // Setup call to __tls_get_addr.
1923 CLI.setDebugLoc(DL)
1924 .setChain(DAG.getEntryNode())
1925 .setLibCallee(CallingConv::C, CallTy,
1926 DAG.getExternalSymbol("__tls_get_addr", Ty),
1927 std::move(Args));
1928
1929 return LowerCallTo(CLI).first;
1930}
1931
1932SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1933 SelectionDAG &DAG, unsigned Opc,
1934 bool Large) const {
1935 SDLoc DL(N);
1936 EVT Ty = getPointerTy(DAG.getDataLayout());
1937 const GlobalValue *GV = N->getGlobal();
1938
1939 // This is not actually used, but is necessary for successfully matching the
1940 // PseudoLA_*_LARGE nodes.
1941 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1942
1943 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1944 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1945 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1946 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1947 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1948}
1949
1950SDValue
1951LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1952 SelectionDAG &DAG) const {
1955 report_fatal_error("In GHC calling convention TLS is not supported");
1956
1957 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1958 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1959
1960 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1961 assert(N->getOffset() == 0 && "unexpected offset in global node");
1962
1963 if (DAG.getTarget().useEmulatedTLS())
1964 report_fatal_error("the emulated TLS is prohibited",
1965 /*GenCrashDiag=*/false);
1966
1967 bool IsDesc = DAG.getTarget().useTLSDESC();
1968
1969 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1971 // In this model, application code calls the dynamic linker function
1972 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1973 // runtime.
1974 if (!IsDesc)
1975 return getDynamicTLSAddr(N, DAG,
1976 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1977 : LoongArch::PseudoLA_TLS_GD,
1978 Large);
1979 break;
1981 // Same as GeneralDynamic, except for assembly modifiers and relocation
1982 // records.
1983 if (!IsDesc)
1984 return getDynamicTLSAddr(N, DAG,
1985 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1986 : LoongArch::PseudoLA_TLS_LD,
1987 Large);
1988 break;
1990 // This model uses the GOT to resolve TLS offsets.
1991 return getStaticTLSAddr(N, DAG,
1992 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1993 : LoongArch::PseudoLA_TLS_IE,
1994 /*UseGOT=*/true, Large);
1996 // This model is used when static linking as the TLS offsets are resolved
1997 // during program linking.
1998 //
1999 // This node doesn't need an extra argument for the large code model.
2000 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
2001 /*UseGOT=*/false, Large);
2002 }
2003
2004 return getTLSDescAddr(N, DAG,
2005 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
2006 : LoongArch::PseudoLA_TLS_DESC,
2007 Large);
2008}
2009
2010template <unsigned N>
2012 SelectionDAG &DAG, bool IsSigned = false) {
2013 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
2014 // Check the ImmArg.
2015 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2016 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2017 DAG.getContext()->emitError(Op->getOperationName(0) +
2018 ": argument out of range.");
2019 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
2020 }
2021 return SDValue();
2022}
2023
2024SDValue
2025LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
2026 SelectionDAG &DAG) const {
2027 SDLoc DL(Op);
2028 switch (Op.getConstantOperandVal(0)) {
2029 default:
2030 return SDValue(); // Don't custom lower most intrinsics.
2031 case Intrinsic::thread_pointer: {
2032 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2033 return DAG.getRegister(LoongArch::R2, PtrVT);
2034 }
2035 case Intrinsic::loongarch_lsx_vpickve2gr_d:
2036 case Intrinsic::loongarch_lsx_vpickve2gr_du:
2037 case Intrinsic::loongarch_lsx_vreplvei_d:
2038 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
2039 return checkIntrinsicImmArg<1>(Op, 2, DAG);
2040 case Intrinsic::loongarch_lsx_vreplvei_w:
2041 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
2042 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
2043 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
2044 case Intrinsic::loongarch_lasx_xvpickve_d:
2045 case Intrinsic::loongarch_lasx_xvpickve_d_f:
2046 return checkIntrinsicImmArg<2>(Op, 2, DAG);
2047 case Intrinsic::loongarch_lasx_xvinsve0_d:
2048 return checkIntrinsicImmArg<2>(Op, 3, DAG);
2049 case Intrinsic::loongarch_lsx_vsat_b:
2050 case Intrinsic::loongarch_lsx_vsat_bu:
2051 case Intrinsic::loongarch_lsx_vrotri_b:
2052 case Intrinsic::loongarch_lsx_vsllwil_h_b:
2053 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
2054 case Intrinsic::loongarch_lsx_vsrlri_b:
2055 case Intrinsic::loongarch_lsx_vsrari_b:
2056 case Intrinsic::loongarch_lsx_vreplvei_h:
2057 case Intrinsic::loongarch_lasx_xvsat_b:
2058 case Intrinsic::loongarch_lasx_xvsat_bu:
2059 case Intrinsic::loongarch_lasx_xvrotri_b:
2060 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
2061 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
2062 case Intrinsic::loongarch_lasx_xvsrlri_b:
2063 case Intrinsic::loongarch_lasx_xvsrari_b:
2064 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
2065 case Intrinsic::loongarch_lasx_xvpickve_w:
2066 case Intrinsic::loongarch_lasx_xvpickve_w_f:
2067 return checkIntrinsicImmArg<3>(Op, 2, DAG);
2068 case Intrinsic::loongarch_lasx_xvinsve0_w:
2069 return checkIntrinsicImmArg<3>(Op, 3, DAG);
2070 case Intrinsic::loongarch_lsx_vsat_h:
2071 case Intrinsic::loongarch_lsx_vsat_hu:
2072 case Intrinsic::loongarch_lsx_vrotri_h:
2073 case Intrinsic::loongarch_lsx_vsllwil_w_h:
2074 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
2075 case Intrinsic::loongarch_lsx_vsrlri_h:
2076 case Intrinsic::loongarch_lsx_vsrari_h:
2077 case Intrinsic::loongarch_lsx_vreplvei_b:
2078 case Intrinsic::loongarch_lasx_xvsat_h:
2079 case Intrinsic::loongarch_lasx_xvsat_hu:
2080 case Intrinsic::loongarch_lasx_xvrotri_h:
2081 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2082 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2083 case Intrinsic::loongarch_lasx_xvsrlri_h:
2084 case Intrinsic::loongarch_lasx_xvsrari_h:
2085 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2086 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2087 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2088 case Intrinsic::loongarch_lsx_vsrani_b_h:
2089 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2090 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2091 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2092 case Intrinsic::loongarch_lsx_vssrani_b_h:
2093 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2094 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2095 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2096 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2097 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2098 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2099 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2100 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2101 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2102 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2103 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2104 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2105 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2106 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2107 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2108 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2109 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2110 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2111 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2112 case Intrinsic::loongarch_lsx_vsat_w:
2113 case Intrinsic::loongarch_lsx_vsat_wu:
2114 case Intrinsic::loongarch_lsx_vrotri_w:
2115 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2116 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2117 case Intrinsic::loongarch_lsx_vsrlri_w:
2118 case Intrinsic::loongarch_lsx_vsrari_w:
2119 case Intrinsic::loongarch_lsx_vslei_bu:
2120 case Intrinsic::loongarch_lsx_vslei_hu:
2121 case Intrinsic::loongarch_lsx_vslei_wu:
2122 case Intrinsic::loongarch_lsx_vslei_du:
2123 case Intrinsic::loongarch_lsx_vslti_bu:
2124 case Intrinsic::loongarch_lsx_vslti_hu:
2125 case Intrinsic::loongarch_lsx_vslti_wu:
2126 case Intrinsic::loongarch_lsx_vslti_du:
2127 case Intrinsic::loongarch_lsx_vbsll_v:
2128 case Intrinsic::loongarch_lsx_vbsrl_v:
2129 case Intrinsic::loongarch_lasx_xvsat_w:
2130 case Intrinsic::loongarch_lasx_xvsat_wu:
2131 case Intrinsic::loongarch_lasx_xvrotri_w:
2132 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2133 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2134 case Intrinsic::loongarch_lasx_xvsrlri_w:
2135 case Intrinsic::loongarch_lasx_xvsrari_w:
2136 case Intrinsic::loongarch_lasx_xvslei_bu:
2137 case Intrinsic::loongarch_lasx_xvslei_hu:
2138 case Intrinsic::loongarch_lasx_xvslei_wu:
2139 case Intrinsic::loongarch_lasx_xvslei_du:
2140 case Intrinsic::loongarch_lasx_xvslti_bu:
2141 case Intrinsic::loongarch_lasx_xvslti_hu:
2142 case Intrinsic::loongarch_lasx_xvslti_wu:
2143 case Intrinsic::loongarch_lasx_xvslti_du:
2144 case Intrinsic::loongarch_lasx_xvbsll_v:
2145 case Intrinsic::loongarch_lasx_xvbsrl_v:
2146 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2147 case Intrinsic::loongarch_lsx_vseqi_b:
2148 case Intrinsic::loongarch_lsx_vseqi_h:
2149 case Intrinsic::loongarch_lsx_vseqi_w:
2150 case Intrinsic::loongarch_lsx_vseqi_d:
2151 case Intrinsic::loongarch_lsx_vslei_b:
2152 case Intrinsic::loongarch_lsx_vslei_h:
2153 case Intrinsic::loongarch_lsx_vslei_w:
2154 case Intrinsic::loongarch_lsx_vslei_d:
2155 case Intrinsic::loongarch_lsx_vslti_b:
2156 case Intrinsic::loongarch_lsx_vslti_h:
2157 case Intrinsic::loongarch_lsx_vslti_w:
2158 case Intrinsic::loongarch_lsx_vslti_d:
2159 case Intrinsic::loongarch_lasx_xvseqi_b:
2160 case Intrinsic::loongarch_lasx_xvseqi_h:
2161 case Intrinsic::loongarch_lasx_xvseqi_w:
2162 case Intrinsic::loongarch_lasx_xvseqi_d:
2163 case Intrinsic::loongarch_lasx_xvslei_b:
2164 case Intrinsic::loongarch_lasx_xvslei_h:
2165 case Intrinsic::loongarch_lasx_xvslei_w:
2166 case Intrinsic::loongarch_lasx_xvslei_d:
2167 case Intrinsic::loongarch_lasx_xvslti_b:
2168 case Intrinsic::loongarch_lasx_xvslti_h:
2169 case Intrinsic::loongarch_lasx_xvslti_w:
2170 case Intrinsic::loongarch_lasx_xvslti_d:
2171 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2172 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2173 case Intrinsic::loongarch_lsx_vsrani_h_w:
2174 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2175 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2176 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2177 case Intrinsic::loongarch_lsx_vssrani_h_w:
2178 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2179 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2180 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2181 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2182 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2183 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2184 case Intrinsic::loongarch_lsx_vfrstpi_b:
2185 case Intrinsic::loongarch_lsx_vfrstpi_h:
2186 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2187 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2188 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2189 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2190 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2191 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2192 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2193 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2194 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2195 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2196 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2197 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2198 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2199 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2200 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2201 case Intrinsic::loongarch_lsx_vsat_d:
2202 case Intrinsic::loongarch_lsx_vsat_du:
2203 case Intrinsic::loongarch_lsx_vrotri_d:
2204 case Intrinsic::loongarch_lsx_vsrlri_d:
2205 case Intrinsic::loongarch_lsx_vsrari_d:
2206 case Intrinsic::loongarch_lasx_xvsat_d:
2207 case Intrinsic::loongarch_lasx_xvsat_du:
2208 case Intrinsic::loongarch_lasx_xvrotri_d:
2209 case Intrinsic::loongarch_lasx_xvsrlri_d:
2210 case Intrinsic::loongarch_lasx_xvsrari_d:
2211 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2212 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2213 case Intrinsic::loongarch_lsx_vsrani_w_d:
2214 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2215 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2216 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2217 case Intrinsic::loongarch_lsx_vssrani_w_d:
2218 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2219 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2220 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2221 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2222 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2223 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2224 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2225 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2226 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2227 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2228 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2229 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2230 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2231 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2232 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2233 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2234 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2235 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2236 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2237 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2238 case Intrinsic::loongarch_lsx_vsrani_d_q:
2239 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2240 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2241 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2242 case Intrinsic::loongarch_lsx_vssrani_d_q:
2243 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2244 case Intrinsic::loongarch_lsx_vssrani_du_q:
2245 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2246 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2247 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2248 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2249 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2250 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2251 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2252 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2253 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2254 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2255 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2256 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2257 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2258 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2259 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2260 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2261 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2262 case Intrinsic::loongarch_lsx_vnori_b:
2263 case Intrinsic::loongarch_lsx_vshuf4i_b:
2264 case Intrinsic::loongarch_lsx_vshuf4i_h:
2265 case Intrinsic::loongarch_lsx_vshuf4i_w:
2266 case Intrinsic::loongarch_lasx_xvnori_b:
2267 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2268 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2269 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2270 case Intrinsic::loongarch_lasx_xvpermi_d:
2271 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2272 case Intrinsic::loongarch_lsx_vshuf4i_d:
2273 case Intrinsic::loongarch_lsx_vpermi_w:
2274 case Intrinsic::loongarch_lsx_vbitseli_b:
2275 case Intrinsic::loongarch_lsx_vextrins_b:
2276 case Intrinsic::loongarch_lsx_vextrins_h:
2277 case Intrinsic::loongarch_lsx_vextrins_w:
2278 case Intrinsic::loongarch_lsx_vextrins_d:
2279 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2280 case Intrinsic::loongarch_lasx_xvpermi_w:
2281 case Intrinsic::loongarch_lasx_xvpermi_q:
2282 case Intrinsic::loongarch_lasx_xvbitseli_b:
2283 case Intrinsic::loongarch_lasx_xvextrins_b:
2284 case Intrinsic::loongarch_lasx_xvextrins_h:
2285 case Intrinsic::loongarch_lasx_xvextrins_w:
2286 case Intrinsic::loongarch_lasx_xvextrins_d:
2287 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2288 case Intrinsic::loongarch_lsx_vrepli_b:
2289 case Intrinsic::loongarch_lsx_vrepli_h:
2290 case Intrinsic::loongarch_lsx_vrepli_w:
2291 case Intrinsic::loongarch_lsx_vrepli_d:
2292 case Intrinsic::loongarch_lasx_xvrepli_b:
2293 case Intrinsic::loongarch_lasx_xvrepli_h:
2294 case Intrinsic::loongarch_lasx_xvrepli_w:
2295 case Intrinsic::loongarch_lasx_xvrepli_d:
2296 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2297 case Intrinsic::loongarch_lsx_vldi:
2298 case Intrinsic::loongarch_lasx_xvldi:
2299 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2300 }
2301}
2302
2303// Helper function that emits error message for intrinsics with chain and return
2304// merge values of a UNDEF and the chain.
2306 StringRef ErrorMsg,
2307 SelectionDAG &DAG) {
2308 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2309 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2310 SDLoc(Op));
2311}
2312
2313SDValue
2314LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2315 SelectionDAG &DAG) const {
2316 SDLoc DL(Op);
2317 MVT GRLenVT = Subtarget.getGRLenVT();
2318 EVT VT = Op.getValueType();
2319 SDValue Chain = Op.getOperand(0);
2320 const StringRef ErrorMsgOOR = "argument out of range";
2321 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2322 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2323
2324 switch (Op.getConstantOperandVal(1)) {
2325 default:
2326 return Op;
2327 case Intrinsic::loongarch_crc_w_b_w:
2328 case Intrinsic::loongarch_crc_w_h_w:
2329 case Intrinsic::loongarch_crc_w_w_w:
2330 case Intrinsic::loongarch_crc_w_d_w:
2331 case Intrinsic::loongarch_crcc_w_b_w:
2332 case Intrinsic::loongarch_crcc_w_h_w:
2333 case Intrinsic::loongarch_crcc_w_w_w:
2334 case Intrinsic::loongarch_crcc_w_d_w:
2335 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2336 case Intrinsic::loongarch_csrrd_w:
2337 case Intrinsic::loongarch_csrrd_d: {
2338 unsigned Imm = Op.getConstantOperandVal(2);
2339 return !isUInt<14>(Imm)
2340 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2341 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2342 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2343 }
2344 case Intrinsic::loongarch_csrwr_w:
2345 case Intrinsic::loongarch_csrwr_d: {
2346 unsigned Imm = Op.getConstantOperandVal(3);
2347 return !isUInt<14>(Imm)
2348 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2349 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2350 {Chain, Op.getOperand(2),
2351 DAG.getConstant(Imm, DL, GRLenVT)});
2352 }
2353 case Intrinsic::loongarch_csrxchg_w:
2354 case Intrinsic::loongarch_csrxchg_d: {
2355 unsigned Imm = Op.getConstantOperandVal(4);
2356 return !isUInt<14>(Imm)
2357 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2358 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2359 {Chain, Op.getOperand(2), Op.getOperand(3),
2360 DAG.getConstant(Imm, DL, GRLenVT)});
2361 }
2362 case Intrinsic::loongarch_iocsrrd_d: {
2363 return DAG.getNode(
2364 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2365 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2366 }
2367#define IOCSRRD_CASE(NAME, NODE) \
2368 case Intrinsic::loongarch_##NAME: { \
2369 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2370 {Chain, Op.getOperand(2)}); \
2371 }
2372 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2373 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2374 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2375#undef IOCSRRD_CASE
2376 case Intrinsic::loongarch_cpucfg: {
2377 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2378 {Chain, Op.getOperand(2)});
2379 }
2380 case Intrinsic::loongarch_lddir_d: {
2381 unsigned Imm = Op.getConstantOperandVal(3);
2382 return !isUInt<8>(Imm)
2383 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2384 : Op;
2385 }
2386 case Intrinsic::loongarch_movfcsr2gr: {
2387 if (!Subtarget.hasBasicF())
2388 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2389 unsigned Imm = Op.getConstantOperandVal(2);
2390 return !isUInt<2>(Imm)
2391 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2392 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2393 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2394 }
2395 case Intrinsic::loongarch_lsx_vld:
2396 case Intrinsic::loongarch_lsx_vldrepl_b:
2397 case Intrinsic::loongarch_lasx_xvld:
2398 case Intrinsic::loongarch_lasx_xvldrepl_b:
2399 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2400 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2401 : SDValue();
2402 case Intrinsic::loongarch_lsx_vldrepl_h:
2403 case Intrinsic::loongarch_lasx_xvldrepl_h:
2404 return !isShiftedInt<11, 1>(
2405 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2407 Op, "argument out of range or not a multiple of 2", DAG)
2408 : SDValue();
2409 case Intrinsic::loongarch_lsx_vldrepl_w:
2410 case Intrinsic::loongarch_lasx_xvldrepl_w:
2411 return !isShiftedInt<10, 2>(
2412 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2414 Op, "argument out of range or not a multiple of 4", DAG)
2415 : SDValue();
2416 case Intrinsic::loongarch_lsx_vldrepl_d:
2417 case Intrinsic::loongarch_lasx_xvldrepl_d:
2418 return !isShiftedInt<9, 3>(
2419 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2421 Op, "argument out of range or not a multiple of 8", DAG)
2422 : SDValue();
2423 }
2424}
2425
2426// Helper function that emits error message for intrinsics with void return
2427// value and return the chain.
2429 SelectionDAG &DAG) {
2430
2431 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2432 return Op.getOperand(0);
2433}
2434
2435SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2436 SelectionDAG &DAG) const {
2437 SDLoc DL(Op);
2438 MVT GRLenVT = Subtarget.getGRLenVT();
2439 SDValue Chain = Op.getOperand(0);
2440 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2441 SDValue Op2 = Op.getOperand(2);
2442 const StringRef ErrorMsgOOR = "argument out of range";
2443 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2444 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2445 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2446
2447 switch (IntrinsicEnum) {
2448 default:
2449 // TODO: Add more Intrinsics.
2450 return SDValue();
2451 case Intrinsic::loongarch_cacop_d:
2452 case Intrinsic::loongarch_cacop_w: {
2453 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2454 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2455 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2456 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2457 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2458 unsigned Imm1 = Op2->getAsZExtVal();
2459 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2460 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2461 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2462 return Op;
2463 }
2464 case Intrinsic::loongarch_dbar: {
2465 unsigned Imm = Op2->getAsZExtVal();
2466 return !isUInt<15>(Imm)
2467 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2468 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2469 DAG.getConstant(Imm, DL, GRLenVT));
2470 }
2471 case Intrinsic::loongarch_ibar: {
2472 unsigned Imm = Op2->getAsZExtVal();
2473 return !isUInt<15>(Imm)
2474 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2475 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2476 DAG.getConstant(Imm, DL, GRLenVT));
2477 }
2478 case Intrinsic::loongarch_break: {
2479 unsigned Imm = Op2->getAsZExtVal();
2480 return !isUInt<15>(Imm)
2481 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2482 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2483 DAG.getConstant(Imm, DL, GRLenVT));
2484 }
2485 case Intrinsic::loongarch_movgr2fcsr: {
2486 if (!Subtarget.hasBasicF())
2487 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2488 unsigned Imm = Op2->getAsZExtVal();
2489 return !isUInt<2>(Imm)
2490 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2491 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2492 DAG.getConstant(Imm, DL, GRLenVT),
2493 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2494 Op.getOperand(3)));
2495 }
2496 case Intrinsic::loongarch_syscall: {
2497 unsigned Imm = Op2->getAsZExtVal();
2498 return !isUInt<15>(Imm)
2499 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2500 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2501 DAG.getConstant(Imm, DL, GRLenVT));
2502 }
2503#define IOCSRWR_CASE(NAME, NODE) \
2504 case Intrinsic::loongarch_##NAME: { \
2505 SDValue Op3 = Op.getOperand(3); \
2506 return Subtarget.is64Bit() \
2507 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2508 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2509 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2510 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2511 Op3); \
2512 }
2513 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2514 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2515 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2516#undef IOCSRWR_CASE
2517 case Intrinsic::loongarch_iocsrwr_d: {
2518 return !Subtarget.is64Bit()
2519 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2520 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2521 Op2,
2522 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2523 Op.getOperand(3)));
2524 }
2525#define ASRT_LE_GT_CASE(NAME) \
2526 case Intrinsic::loongarch_##NAME: { \
2527 return !Subtarget.is64Bit() \
2528 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2529 : Op; \
2530 }
2531 ASRT_LE_GT_CASE(asrtle_d)
2532 ASRT_LE_GT_CASE(asrtgt_d)
2533#undef ASRT_LE_GT_CASE
2534 case Intrinsic::loongarch_ldpte_d: {
2535 unsigned Imm = Op.getConstantOperandVal(3);
2536 return !Subtarget.is64Bit()
2537 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2538 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2539 : Op;
2540 }
2541 case Intrinsic::loongarch_lsx_vst:
2542 case Intrinsic::loongarch_lasx_xvst:
2543 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2544 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2545 : SDValue();
2546 case Intrinsic::loongarch_lasx_xvstelm_b:
2547 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2548 !isUInt<5>(Op.getConstantOperandVal(5)))
2549 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2550 : SDValue();
2551 case Intrinsic::loongarch_lsx_vstelm_b:
2552 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2553 !isUInt<4>(Op.getConstantOperandVal(5)))
2554 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2555 : SDValue();
2556 case Intrinsic::loongarch_lasx_xvstelm_h:
2557 return (!isShiftedInt<8, 1>(
2558 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2559 !isUInt<4>(Op.getConstantOperandVal(5)))
2561 Op, "argument out of range or not a multiple of 2", DAG)
2562 : SDValue();
2563 case Intrinsic::loongarch_lsx_vstelm_h:
2564 return (!isShiftedInt<8, 1>(
2565 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2566 !isUInt<3>(Op.getConstantOperandVal(5)))
2568 Op, "argument out of range or not a multiple of 2", DAG)
2569 : SDValue();
2570 case Intrinsic::loongarch_lasx_xvstelm_w:
2571 return (!isShiftedInt<8, 2>(
2572 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2573 !isUInt<3>(Op.getConstantOperandVal(5)))
2575 Op, "argument out of range or not a multiple of 4", DAG)
2576 : SDValue();
2577 case Intrinsic::loongarch_lsx_vstelm_w:
2578 return (!isShiftedInt<8, 2>(
2579 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2580 !isUInt<2>(Op.getConstantOperandVal(5)))
2582 Op, "argument out of range or not a multiple of 4", DAG)
2583 : SDValue();
2584 case Intrinsic::loongarch_lasx_xvstelm_d:
2585 return (!isShiftedInt<8, 3>(
2586 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2587 !isUInt<2>(Op.getConstantOperandVal(5)))
2589 Op, "argument out of range or not a multiple of 8", DAG)
2590 : SDValue();
2591 case Intrinsic::loongarch_lsx_vstelm_d:
2592 return (!isShiftedInt<8, 3>(
2593 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2594 !isUInt<1>(Op.getConstantOperandVal(5)))
2596 Op, "argument out of range or not a multiple of 8", DAG)
2597 : SDValue();
2598 }
2599}
2600
2601SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2602 SelectionDAG &DAG) const {
2603 SDLoc DL(Op);
2604 SDValue Lo = Op.getOperand(0);
2605 SDValue Hi = Op.getOperand(1);
2606 SDValue Shamt = Op.getOperand(2);
2607 EVT VT = Lo.getValueType();
2608
2609 // if Shamt-GRLen < 0: // Shamt < GRLen
2610 // Lo = Lo << Shamt
2611 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2612 // else:
2613 // Lo = 0
2614 // Hi = Lo << (Shamt-GRLen)
2615
2616 SDValue Zero = DAG.getConstant(0, DL, VT);
2617 SDValue One = DAG.getConstant(1, DL, VT);
2618 SDValue MinusGRLen =
2619 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2620 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2621 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2622 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2623
2624 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2625 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2626 SDValue ShiftRightLo =
2627 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2628 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2629 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2630 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2631
2632 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2633
2634 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2635 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2636
2637 SDValue Parts[2] = {Lo, Hi};
2638 return DAG.getMergeValues(Parts, DL);
2639}
2640
2641SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2642 SelectionDAG &DAG,
2643 bool IsSRA) const {
2644 SDLoc DL(Op);
2645 SDValue Lo = Op.getOperand(0);
2646 SDValue Hi = Op.getOperand(1);
2647 SDValue Shamt = Op.getOperand(2);
2648 EVT VT = Lo.getValueType();
2649
2650 // SRA expansion:
2651 // if Shamt-GRLen < 0: // Shamt < GRLen
2652 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2653 // Hi = Hi >>s Shamt
2654 // else:
2655 // Lo = Hi >>s (Shamt-GRLen);
2656 // Hi = Hi >>s (GRLen-1)
2657 //
2658 // SRL expansion:
2659 // if Shamt-GRLen < 0: // Shamt < GRLen
2660 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2661 // Hi = Hi >>u Shamt
2662 // else:
2663 // Lo = Hi >>u (Shamt-GRLen);
2664 // Hi = 0;
2665
2666 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2667
2668 SDValue Zero = DAG.getConstant(0, DL, VT);
2669 SDValue One = DAG.getConstant(1, DL, VT);
2670 SDValue MinusGRLen =
2671 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2672 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2673 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2674 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2675
2676 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2677 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2678 SDValue ShiftLeftHi =
2679 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2680 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2681 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2682 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2683 SDValue HiFalse =
2684 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2685
2686 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2687
2688 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2689 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2690
2691 SDValue Parts[2] = {Lo, Hi};
2692 return DAG.getMergeValues(Parts, DL);
2693}
2694
2695// Returns the opcode of the target-specific SDNode that implements the 32-bit
2696// form of the given Opcode.
2698 switch (Opcode) {
2699 default:
2700 llvm_unreachable("Unexpected opcode");
2701 case ISD::SDIV:
2702 return LoongArchISD::DIV_W;
2703 case ISD::UDIV:
2704 return LoongArchISD::DIV_WU;
2705 case ISD::SREM:
2706 return LoongArchISD::MOD_W;
2707 case ISD::UREM:
2708 return LoongArchISD::MOD_WU;
2709 case ISD::SHL:
2710 return LoongArchISD::SLL_W;
2711 case ISD::SRA:
2712 return LoongArchISD::SRA_W;
2713 case ISD::SRL:
2714 return LoongArchISD::SRL_W;
2715 case ISD::ROTL:
2716 case ISD::ROTR:
2717 return LoongArchISD::ROTR_W;
2718 case ISD::CTTZ:
2719 return LoongArchISD::CTZ_W;
2720 case ISD::CTLZ:
2721 return LoongArchISD::CLZ_W;
2722 }
2723}
2724
2725// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2726// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2727// otherwise be promoted to i64, making it difficult to select the
2728// SLL_W/.../*W later one because the fact the operation was originally of
2729// type i8/i16/i32 is lost.
2731 unsigned ExtOpc = ISD::ANY_EXTEND) {
2732 SDLoc DL(N);
2733 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2734 SDValue NewOp0, NewRes;
2735
2736 switch (NumOp) {
2737 default:
2738 llvm_unreachable("Unexpected NumOp");
2739 case 1: {
2740 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2741 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2742 break;
2743 }
2744 case 2: {
2745 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2746 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2747 if (N->getOpcode() == ISD::ROTL) {
2748 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2749 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2750 }
2751 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2752 break;
2753 }
2754 // TODO:Handle more NumOp.
2755 }
2756
2757 // ReplaceNodeResults requires we maintain the same type for the return
2758 // value.
2759 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2760}
2761
2762// Converts the given 32-bit operation to a i64 operation with signed extension
2763// semantic to reduce the signed extension instructions.
2765 SDLoc DL(N);
2766 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2767 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2768 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2769 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2770 DAG.getValueType(MVT::i32));
2771 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2772}
2773
2774// Helper function that emits error message for intrinsics with/without chain
2775// and return a UNDEF or and the chain as the results.
2778 StringRef ErrorMsg, bool WithChain = true) {
2779 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2780 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2781 if (!WithChain)
2782 return;
2783 Results.push_back(N->getOperand(0));
2784}
2785
2786template <unsigned N>
2787static void
2789 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2790 unsigned ResOp) {
2791 const StringRef ErrorMsgOOR = "argument out of range";
2792 unsigned Imm = Node->getConstantOperandVal(2);
2793 if (!isUInt<N>(Imm)) {
2795 /*WithChain=*/false);
2796 return;
2797 }
2798 SDLoc DL(Node);
2799 SDValue Vec = Node->getOperand(1);
2800
2801 SDValue PickElt =
2802 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2803 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2805 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2806 PickElt.getValue(0)));
2807}
2808
2811 SelectionDAG &DAG,
2812 const LoongArchSubtarget &Subtarget,
2813 unsigned ResOp) {
2814 SDLoc DL(N);
2815 SDValue Vec = N->getOperand(1);
2816
2817 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2818 Results.push_back(
2819 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2820}
2821
2822static void
2824 SelectionDAG &DAG,
2825 const LoongArchSubtarget &Subtarget) {
2826 switch (N->getConstantOperandVal(0)) {
2827 default:
2828 llvm_unreachable("Unexpected Intrinsic.");
2829 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2830 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2832 break;
2833 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2834 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2835 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2837 break;
2838 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2839 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2841 break;
2842 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2843 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2845 break;
2846 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2847 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2848 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2850 break;
2851 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2852 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2854 break;
2855 case Intrinsic::loongarch_lsx_bz_b:
2856 case Intrinsic::loongarch_lsx_bz_h:
2857 case Intrinsic::loongarch_lsx_bz_w:
2858 case Intrinsic::loongarch_lsx_bz_d:
2859 case Intrinsic::loongarch_lasx_xbz_b:
2860 case Intrinsic::loongarch_lasx_xbz_h:
2861 case Intrinsic::loongarch_lasx_xbz_w:
2862 case Intrinsic::loongarch_lasx_xbz_d:
2863 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2865 break;
2866 case Intrinsic::loongarch_lsx_bz_v:
2867 case Intrinsic::loongarch_lasx_xbz_v:
2868 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2870 break;
2871 case Intrinsic::loongarch_lsx_bnz_b:
2872 case Intrinsic::loongarch_lsx_bnz_h:
2873 case Intrinsic::loongarch_lsx_bnz_w:
2874 case Intrinsic::loongarch_lsx_bnz_d:
2875 case Intrinsic::loongarch_lasx_xbnz_b:
2876 case Intrinsic::loongarch_lasx_xbnz_h:
2877 case Intrinsic::loongarch_lasx_xbnz_w:
2878 case Intrinsic::loongarch_lasx_xbnz_d:
2879 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2881 break;
2882 case Intrinsic::loongarch_lsx_bnz_v:
2883 case Intrinsic::loongarch_lasx_xbnz_v:
2884 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2886 break;
2887 }
2888}
2889
2892 SDLoc DL(N);
2893 EVT VT = N->getValueType(0);
2894 switch (N->getOpcode()) {
2895 default:
2896 llvm_unreachable("Don't know how to legalize this operation");
2897 case ISD::ADD:
2898 case ISD::SUB:
2899 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2900 "Unexpected custom legalisation");
2901 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2902 break;
2903 case ISD::SDIV:
2904 case ISD::UDIV:
2905 case ISD::SREM:
2906 case ISD::UREM:
2907 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2908 "Unexpected custom legalisation");
2909 Results.push_back(customLegalizeToWOp(N, DAG, 2,
2910 Subtarget.hasDiv32() && VT == MVT::i32
2912 : ISD::SIGN_EXTEND));
2913 break;
2914 case ISD::SHL:
2915 case ISD::SRA:
2916 case ISD::SRL:
2917 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2918 "Unexpected custom legalisation");
2919 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2920 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2921 break;
2922 }
2923 break;
2924 case ISD::ROTL:
2925 case ISD::ROTR:
2926 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2927 "Unexpected custom legalisation");
2928 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2929 break;
2930 case ISD::FP_TO_SINT: {
2931 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2932 "Unexpected custom legalisation");
2933 SDValue Src = N->getOperand(0);
2934 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2935 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2937 if (Src.getValueType() == MVT::f16)
2938 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2939 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2940 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2941 return;
2942 }
2943 // If the FP type needs to be softened, emit a library call using the 'si'
2944 // version. If we left it to default legalization we'd end up with 'di'.
2945 RTLIB::Libcall LC;
2946 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2947 MakeLibCallOptions CallOptions;
2948 EVT OpVT = Src.getValueType();
2949 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2950 SDValue Chain = SDValue();
2951 SDValue Result;
2952 std::tie(Result, Chain) =
2953 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2954 Results.push_back(Result);
2955 break;
2956 }
2957 case ISD::BITCAST: {
2958 SDValue Src = N->getOperand(0);
2959 EVT SrcVT = Src.getValueType();
2960 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2961 Subtarget.hasBasicF()) {
2962 SDValue Dst =
2963 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2964 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2965 }
2966 break;
2967 }
2968 case ISD::FP_TO_UINT: {
2969 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2970 "Unexpected custom legalisation");
2971 auto &TLI = DAG.getTargetLoweringInfo();
2972 SDValue Tmp1, Tmp2;
2973 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2974 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2975 break;
2976 }
2977 case ISD::BSWAP: {
2978 SDValue Src = N->getOperand(0);
2979 assert((VT == MVT::i16 || VT == MVT::i32) &&
2980 "Unexpected custom legalization");
2981 MVT GRLenVT = Subtarget.getGRLenVT();
2982 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2983 SDValue Tmp;
2984 switch (VT.getSizeInBits()) {
2985 default:
2986 llvm_unreachable("Unexpected operand width");
2987 case 16:
2988 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2989 break;
2990 case 32:
2991 // Only LA64 will get to here due to the size mismatch between VT and
2992 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2993 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2994 break;
2995 }
2996 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2997 break;
2998 }
2999 case ISD::BITREVERSE: {
3000 SDValue Src = N->getOperand(0);
3001 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
3002 "Unexpected custom legalization");
3003 MVT GRLenVT = Subtarget.getGRLenVT();
3004 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
3005 SDValue Tmp;
3006 switch (VT.getSizeInBits()) {
3007 default:
3008 llvm_unreachable("Unexpected operand width");
3009 case 8:
3010 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
3011 break;
3012 case 32:
3013 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
3014 break;
3015 }
3016 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3017 break;
3018 }
3019 case ISD::CTLZ:
3020 case ISD::CTTZ: {
3021 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3022 "Unexpected custom legalisation");
3023 Results.push_back(customLegalizeToWOp(N, DAG, 1));
3024 break;
3025 }
3027 SDValue Chain = N->getOperand(0);
3028 SDValue Op2 = N->getOperand(2);
3029 MVT GRLenVT = Subtarget.getGRLenVT();
3030 const StringRef ErrorMsgOOR = "argument out of range";
3031 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3032 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3033
3034 switch (N->getConstantOperandVal(1)) {
3035 default:
3036 llvm_unreachable("Unexpected Intrinsic.");
3037 case Intrinsic::loongarch_movfcsr2gr: {
3038 if (!Subtarget.hasBasicF()) {
3039 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
3040 return;
3041 }
3042 unsigned Imm = Op2->getAsZExtVal();
3043 if (!isUInt<2>(Imm)) {
3044 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3045 return;
3046 }
3047 SDValue MOVFCSR2GRResults = DAG.getNode(
3048 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
3049 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3050 Results.push_back(
3051 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
3052 Results.push_back(MOVFCSR2GRResults.getValue(1));
3053 break;
3054 }
3055#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
3056 case Intrinsic::loongarch_##NAME: { \
3057 SDValue NODE = DAG.getNode( \
3058 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3059 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3060 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3061 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3062 Results.push_back(NODE.getValue(1)); \
3063 break; \
3064 }
3065 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
3066 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
3067 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
3068 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
3069 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
3070 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
3071#undef CRC_CASE_EXT_BINARYOP
3072
3073#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
3074 case Intrinsic::loongarch_##NAME: { \
3075 SDValue NODE = DAG.getNode( \
3076 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3077 {Chain, Op2, \
3078 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3079 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3080 Results.push_back(NODE.getValue(1)); \
3081 break; \
3082 }
3083 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
3084 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
3085#undef CRC_CASE_EXT_UNARYOP
3086#define CSR_CASE(ID) \
3087 case Intrinsic::loongarch_##ID: { \
3088 if (!Subtarget.is64Bit()) \
3089 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3090 break; \
3091 }
3092 CSR_CASE(csrrd_d);
3093 CSR_CASE(csrwr_d);
3094 CSR_CASE(csrxchg_d);
3095 CSR_CASE(iocsrrd_d);
3096#undef CSR_CASE
3097 case Intrinsic::loongarch_csrrd_w: {
3098 unsigned Imm = Op2->getAsZExtVal();
3099 if (!isUInt<14>(Imm)) {
3100 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3101 return;
3102 }
3103 SDValue CSRRDResults =
3104 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3105 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3106 Results.push_back(
3107 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3108 Results.push_back(CSRRDResults.getValue(1));
3109 break;
3110 }
3111 case Intrinsic::loongarch_csrwr_w: {
3112 unsigned Imm = N->getConstantOperandVal(3);
3113 if (!isUInt<14>(Imm)) {
3114 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3115 return;
3116 }
3117 SDValue CSRWRResults =
3118 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3119 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3120 DAG.getConstant(Imm, DL, GRLenVT)});
3121 Results.push_back(
3122 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3123 Results.push_back(CSRWRResults.getValue(1));
3124 break;
3125 }
3126 case Intrinsic::loongarch_csrxchg_w: {
3127 unsigned Imm = N->getConstantOperandVal(4);
3128 if (!isUInt<14>(Imm)) {
3129 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3130 return;
3131 }
3132 SDValue CSRXCHGResults = DAG.getNode(
3133 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3134 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3135 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3136 DAG.getConstant(Imm, DL, GRLenVT)});
3137 Results.push_back(
3138 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3139 Results.push_back(CSRXCHGResults.getValue(1));
3140 break;
3141 }
3142#define IOCSRRD_CASE(NAME, NODE) \
3143 case Intrinsic::loongarch_##NAME: { \
3144 SDValue IOCSRRDResults = \
3145 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3146 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3147 Results.push_back( \
3148 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3149 Results.push_back(IOCSRRDResults.getValue(1)); \
3150 break; \
3151 }
3152 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3153 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3154 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3155#undef IOCSRRD_CASE
3156 case Intrinsic::loongarch_cpucfg: {
3157 SDValue CPUCFGResults =
3158 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3159 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3160 Results.push_back(
3161 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3162 Results.push_back(CPUCFGResults.getValue(1));
3163 break;
3164 }
3165 case Intrinsic::loongarch_lddir_d: {
3166 if (!Subtarget.is64Bit()) {
3167 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3168 return;
3169 }
3170 break;
3171 }
3172 }
3173 break;
3174 }
3175 case ISD::READ_REGISTER: {
3176 if (Subtarget.is64Bit())
3177 DAG.getContext()->emitError(
3178 "On LA64, only 64-bit registers can be read.");
3179 else
3180 DAG.getContext()->emitError(
3181 "On LA32, only 32-bit registers can be read.");
3182 Results.push_back(DAG.getUNDEF(VT));
3183 Results.push_back(N->getOperand(0));
3184 break;
3185 }
3187 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3188 break;
3189 }
3190 case ISD::LROUND: {
3191 SDValue Op0 = N->getOperand(0);
3192 EVT OpVT = Op0.getValueType();
3193 RTLIB::Libcall LC =
3194 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
3195 MakeLibCallOptions CallOptions;
3196 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
3197 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
3198 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
3199 Results.push_back(Result);
3200 break;
3201 }
3202 }
3203}
3204
3207 const LoongArchSubtarget &Subtarget) {
3208 if (DCI.isBeforeLegalizeOps())
3209 return SDValue();
3210
3211 SDValue FirstOperand = N->getOperand(0);
3212 SDValue SecondOperand = N->getOperand(1);
3213 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3214 EVT ValTy = N->getValueType(0);
3215 SDLoc DL(N);
3216 uint64_t lsb, msb;
3217 unsigned SMIdx, SMLen;
3218 ConstantSDNode *CN;
3219 SDValue NewOperand;
3220 MVT GRLenVT = Subtarget.getGRLenVT();
3221
3222 // Op's second operand must be a shifted mask.
3223 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3224 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3225 return SDValue();
3226
3227 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3228 // Pattern match BSTRPICK.
3229 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3230 // => BSTRPICK $dst, $src, msb, lsb
3231 // where msb = lsb + len - 1
3232
3233 // The second operand of the shift must be an immediate.
3234 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3235 return SDValue();
3236
3237 lsb = CN->getZExtValue();
3238
3239 // Return if the shifted mask does not start at bit 0 or the sum of its
3240 // length and lsb exceeds the word's size.
3241 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3242 return SDValue();
3243
3244 NewOperand = FirstOperand.getOperand(0);
3245 } else {
3246 // Pattern match BSTRPICK.
3247 // $dst = and $src, (2**len- 1) , if len > 12
3248 // => BSTRPICK $dst, $src, msb, lsb
3249 // where lsb = 0 and msb = len - 1
3250
3251 // If the mask is <= 0xfff, andi can be used instead.
3252 if (CN->getZExtValue() <= 0xfff)
3253 return SDValue();
3254
3255 // Return if the MSB exceeds.
3256 if (SMIdx + SMLen > ValTy.getSizeInBits())
3257 return SDValue();
3258
3259 if (SMIdx > 0) {
3260 // Omit if the constant has more than 2 uses. This a conservative
3261 // decision. Whether it is a win depends on the HW microarchitecture.
3262 // However it should always be better for 1 and 2 uses.
3263 if (CN->use_size() > 2)
3264 return SDValue();
3265 // Return if the constant can be composed by a single LU12I.W.
3266 if ((CN->getZExtValue() & 0xfff) == 0)
3267 return SDValue();
3268 // Return if the constand can be composed by a single ADDI with
3269 // the zero register.
3270 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3271 return SDValue();
3272 }
3273
3274 lsb = SMIdx;
3275 NewOperand = FirstOperand;
3276 }
3277
3278 msb = lsb + SMLen - 1;
3279 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3280 DAG.getConstant(msb, DL, GRLenVT),
3281 DAG.getConstant(lsb, DL, GRLenVT));
3282 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3283 return NR0;
3284 // Try to optimize to
3285 // bstrpick $Rd, $Rs, msb, lsb
3286 // slli $Rd, $Rd, lsb
3287 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3288 DAG.getConstant(lsb, DL, GRLenVT));
3289}
3290
3293 const LoongArchSubtarget &Subtarget) {
3294 if (DCI.isBeforeLegalizeOps())
3295 return SDValue();
3296
3297 // $dst = srl (and $src, Mask), Shamt
3298 // =>
3299 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3300 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3301 //
3302
3303 SDValue FirstOperand = N->getOperand(0);
3304 ConstantSDNode *CN;
3305 EVT ValTy = N->getValueType(0);
3306 SDLoc DL(N);
3307 MVT GRLenVT = Subtarget.getGRLenVT();
3308 unsigned MaskIdx, MaskLen;
3309 uint64_t Shamt;
3310
3311 // The first operand must be an AND and the second operand of the AND must be
3312 // a shifted mask.
3313 if (FirstOperand.getOpcode() != ISD::AND ||
3314 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3315 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3316 return SDValue();
3317
3318 // The second operand (shift amount) must be an immediate.
3319 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3320 return SDValue();
3321
3322 Shamt = CN->getZExtValue();
3323 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3324 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3325 FirstOperand->getOperand(0),
3326 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3327 DAG.getConstant(Shamt, DL, GRLenVT));
3328
3329 return SDValue();
3330}
3331
3334 const LoongArchSubtarget &Subtarget) {
3335 MVT GRLenVT = Subtarget.getGRLenVT();
3336 EVT ValTy = N->getValueType(0);
3337 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3338 ConstantSDNode *CN0, *CN1;
3339 SDLoc DL(N);
3340 unsigned ValBits = ValTy.getSizeInBits();
3341 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3342 unsigned Shamt;
3343 bool SwapAndRetried = false;
3344
3345 if (DCI.isBeforeLegalizeOps())
3346 return SDValue();
3347
3348 if (ValBits != 32 && ValBits != 64)
3349 return SDValue();
3350
3351Retry:
3352 // 1st pattern to match BSTRINS:
3353 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3354 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3355 // =>
3356 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3357 if (N0.getOpcode() == ISD::AND &&
3358 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3359 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3360 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3361 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3362 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3363 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3364 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3365 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3366 (MaskIdx0 + MaskLen0 <= ValBits)) {
3367 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3368 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3369 N1.getOperand(0).getOperand(0),
3370 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3371 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3372 }
3373
3374 // 2nd pattern to match BSTRINS:
3375 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3376 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3377 // =>
3378 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3379 if (N0.getOpcode() == ISD::AND &&
3380 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3381 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3382 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3383 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3384 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3385 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3386 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3387 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3388 (MaskIdx0 + MaskLen0 <= ValBits)) {
3389 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3390 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3391 N1.getOperand(0).getOperand(0),
3392 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3393 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3394 }
3395
3396 // 3rd pattern to match BSTRINS:
3397 // R = or (and X, mask0), (and Y, mask1)
3398 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3399 // =>
3400 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3401 // where msb = lsb + size - 1
3402 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3403 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3404 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3405 (MaskIdx0 + MaskLen0 <= 64) &&
3406 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3407 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3408 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3409 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3410 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3411 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3412 DAG.getConstant(ValBits == 32
3413 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3414 : (MaskIdx0 + MaskLen0 - 1),
3415 DL, GRLenVT),
3416 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3417 }
3418
3419 // 4th pattern to match BSTRINS:
3420 // R = or (and X, mask), (shl Y, shamt)
3421 // where mask = (2**shamt - 1)
3422 // =>
3423 // R = BSTRINS X, Y, ValBits - 1, shamt
3424 // where ValBits = 32 or 64
3425 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3426 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3427 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3428 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3429 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3430 (MaskIdx0 + MaskLen0 <= ValBits)) {
3431 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3432 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3433 N1.getOperand(0),
3434 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3435 DAG.getConstant(Shamt, DL, GRLenVT));
3436 }
3437
3438 // 5th pattern to match BSTRINS:
3439 // R = or (and X, mask), const
3440 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3441 // =>
3442 // R = BSTRINS X, (const >> lsb), msb, lsb
3443 // where msb = lsb + size - 1
3444 if (N0.getOpcode() == ISD::AND &&
3445 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3446 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3447 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3448 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3449 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3450 return DAG.getNode(
3451 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3452 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3453 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3454 : (MaskIdx0 + MaskLen0 - 1),
3455 DL, GRLenVT),
3456 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3457 }
3458
3459 // 6th pattern.
3460 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3461 // by the incoming bits are known to be zero.
3462 // =>
3463 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3464 //
3465 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3466 // pattern is more common than the 1st. So we put the 1st before the 6th in
3467 // order to match as many nodes as possible.
3468 ConstantSDNode *CNMask, *CNShamt;
3469 unsigned MaskIdx, MaskLen;
3470 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3471 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3472 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3473 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3474 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3475 Shamt = CNShamt->getZExtValue();
3476 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3477 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3478 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3479 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3480 N1.getOperand(0).getOperand(0),
3481 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3482 DAG.getConstant(Shamt, DL, GRLenVT));
3483 }
3484 }
3485
3486 // 7th pattern.
3487 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3488 // overwritten by the incoming bits are known to be zero.
3489 // =>
3490 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3491 //
3492 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3493 // before the 7th in order to match as many nodes as possible.
3494 if (N1.getOpcode() == ISD::AND &&
3495 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3496 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3497 N1.getOperand(0).getOpcode() == ISD::SHL &&
3498 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3499 CNShamt->getZExtValue() == MaskIdx) {
3500 APInt ShMask(ValBits, CNMask->getZExtValue());
3501 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3502 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3503 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3504 N1.getOperand(0).getOperand(0),
3505 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3506 DAG.getConstant(MaskIdx, DL, GRLenVT));
3507 }
3508 }
3509
3510 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3511 if (!SwapAndRetried) {
3512 std::swap(N0, N1);
3513 SwapAndRetried = true;
3514 goto Retry;
3515 }
3516
3517 SwapAndRetried = false;
3518Retry2:
3519 // 8th pattern.
3520 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3521 // the incoming bits are known to be zero.
3522 // =>
3523 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3524 //
3525 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3526 // we put it here in order to match as many nodes as possible or generate less
3527 // instructions.
3528 if (N1.getOpcode() == ISD::AND &&
3529 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3530 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3531 APInt ShMask(ValBits, CNMask->getZExtValue());
3532 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3533 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3534 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3535 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3536 N1->getOperand(0),
3537 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3538 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3539 DAG.getConstant(MaskIdx, DL, GRLenVT));
3540 }
3541 }
3542 // Swap N0/N1 and retry.
3543 if (!SwapAndRetried) {
3544 std::swap(N0, N1);
3545 SwapAndRetried = true;
3546 goto Retry2;
3547 }
3548
3549 return SDValue();
3550}
3551
3552static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3553 ExtType = ISD::NON_EXTLOAD;
3554
3555 switch (V.getNode()->getOpcode()) {
3556 case ISD::LOAD: {
3557 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3558 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3559 (LoadNode->getMemoryVT() == MVT::i16)) {
3560 ExtType = LoadNode->getExtensionType();
3561 return true;
3562 }
3563 return false;
3564 }
3565 case ISD::AssertSext: {
3566 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3567 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3568 ExtType = ISD::SEXTLOAD;
3569 return true;
3570 }
3571 return false;
3572 }
3573 case ISD::AssertZext: {
3574 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3575 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3576 ExtType = ISD::ZEXTLOAD;
3577 return true;
3578 }
3579 return false;
3580 }
3581 default:
3582 return false;
3583 }
3584
3585 return false;
3586}
3587
3588// Eliminate redundant truncation and zero-extension nodes.
3589// * Case 1:
3590// +------------+ +------------+ +------------+
3591// | Input1 | | Input2 | | CC |
3592// +------------+ +------------+ +------------+
3593// | | |
3594// V V +----+
3595// +------------+ +------------+ |
3596// | TRUNCATE | | TRUNCATE | |
3597// +------------+ +------------+ |
3598// | | |
3599// V V |
3600// +------------+ +------------+ |
3601// | ZERO_EXT | | ZERO_EXT | |
3602// +------------+ +------------+ |
3603// | | |
3604// | +-------------+ |
3605// V V | |
3606// +----------------+ | |
3607// | AND | | |
3608// +----------------+ | |
3609// | | |
3610// +---------------+ | |
3611// | | |
3612// V V V
3613// +-------------+
3614// | CMP |
3615// +-------------+
3616// * Case 2:
3617// +------------+ +------------+ +-------------+ +------------+ +------------+
3618// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3619// +------------+ +------------+ +-------------+ +------------+ +------------+
3620// | | | | |
3621// V | | | |
3622// +------------+ | | | |
3623// | XOR |<---------------------+ | |
3624// +------------+ | | |
3625// | | | |
3626// V V +---------------+ |
3627// +------------+ +------------+ | |
3628// | TRUNCATE | | TRUNCATE | | +-------------------------+
3629// +------------+ +------------+ | |
3630// | | | |
3631// V V | |
3632// +------------+ +------------+ | |
3633// | ZERO_EXT | | ZERO_EXT | | |
3634// +------------+ +------------+ | |
3635// | | | |
3636// V V | |
3637// +----------------+ | |
3638// | AND | | |
3639// +----------------+ | |
3640// | | |
3641// +---------------+ | |
3642// | | |
3643// V V V
3644// +-------------+
3645// | CMP |
3646// +-------------+
3649 const LoongArchSubtarget &Subtarget) {
3650 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3651
3652 SDNode *AndNode = N->getOperand(0).getNode();
3653 if (AndNode->getOpcode() != ISD::AND)
3654 return SDValue();
3655
3656 SDValue AndInputValue2 = AndNode->getOperand(1);
3657 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3658 return SDValue();
3659
3660 SDValue CmpInputValue = N->getOperand(1);
3661 SDValue AndInputValue1 = AndNode->getOperand(0);
3662 if (AndInputValue1.getOpcode() == ISD::XOR) {
3663 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3664 return SDValue();
3665 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3666 if (!CN || CN->getSExtValue() != -1)
3667 return SDValue();
3668 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3669 if (!CN || CN->getSExtValue() != 0)
3670 return SDValue();
3671 AndInputValue1 = AndInputValue1.getOperand(0);
3672 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3673 return SDValue();
3674 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3675 if (AndInputValue2 != CmpInputValue)
3676 return SDValue();
3677 } else {
3678 return SDValue();
3679 }
3680
3681 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3682 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3683 return SDValue();
3684
3685 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3686 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3687 return SDValue();
3688
3689 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3690 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3691 ISD::LoadExtType ExtType1;
3692 ISD::LoadExtType ExtType2;
3693
3694 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3695 !checkValueWidth(TruncInputValue2, ExtType2))
3696 return SDValue();
3697
3698 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3699 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3700 return SDValue();
3701
3702 if ((ExtType2 != ISD::ZEXTLOAD) &&
3703 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3704 return SDValue();
3705
3706 // These truncation and zero-extension nodes are not necessary, remove them.
3707 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3708 TruncInputValue1, TruncInputValue2);
3709 SDValue NewSetCC =
3710 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3711 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3712 return SDValue(N, 0);
3713}
3714
3715// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3718 const LoongArchSubtarget &Subtarget) {
3719 if (DCI.isBeforeLegalizeOps())
3720 return SDValue();
3721
3722 SDValue Src = N->getOperand(0);
3723 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3724 return SDValue();
3725
3726 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3727 Src.getOperand(0));
3728}
3729
3730template <unsigned N>
3732 SelectionDAG &DAG,
3733 const LoongArchSubtarget &Subtarget,
3734 bool IsSigned = false) {
3735 SDLoc DL(Node);
3736 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3737 // Check the ImmArg.
3738 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3739 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3740 DAG.getContext()->emitError(Node->getOperationName(0) +
3741 ": argument out of range.");
3742 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3743 }
3744 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3745}
3746
3747template <unsigned N>
3748static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3749 SelectionDAG &DAG, bool IsSigned = false) {
3750 SDLoc DL(Node);
3751 EVT ResTy = Node->getValueType(0);
3752 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3753
3754 // Check the ImmArg.
3755 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3756 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3757 DAG.getContext()->emitError(Node->getOperationName(0) +
3758 ": argument out of range.");
3759 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3760 }
3761 return DAG.getConstant(
3763 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3764 DL, ResTy);
3765}
3766
3768 SDLoc DL(Node);
3769 EVT ResTy = Node->getValueType(0);
3770 SDValue Vec = Node->getOperand(2);
3771 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3772 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3773}
3774
3776 SDLoc DL(Node);
3777 EVT ResTy = Node->getValueType(0);
3778 SDValue One = DAG.getConstant(1, DL, ResTy);
3779 SDValue Bit =
3780 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3781
3782 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3783 DAG.getNOT(DL, Bit, ResTy));
3784}
3785
3786template <unsigned N>
3788 SDLoc DL(Node);
3789 EVT ResTy = Node->getValueType(0);
3790 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3791 // Check the unsigned ImmArg.
3792 if (!isUInt<N>(CImm->getZExtValue())) {
3793 DAG.getContext()->emitError(Node->getOperationName(0) +
3794 ": argument out of range.");
3795 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3796 }
3797
3798 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3799 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3800
3801 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3802}
3803
3804template <unsigned N>
3806 SDLoc DL(Node);
3807 EVT ResTy = Node->getValueType(0);
3808 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3809 // Check the unsigned ImmArg.
3810 if (!isUInt<N>(CImm->getZExtValue())) {
3811 DAG.getContext()->emitError(Node->getOperationName(0) +
3812 ": argument out of range.");
3813 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3814 }
3815
3816 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3817 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3818 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3819}
3820
3821template <unsigned N>
3823 SDLoc DL(Node);
3824 EVT ResTy = Node->getValueType(0);
3825 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3826 // Check the unsigned ImmArg.
3827 if (!isUInt<N>(CImm->getZExtValue())) {
3828 DAG.getContext()->emitError(Node->getOperationName(0) +
3829 ": argument out of range.");
3830 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3831 }
3832
3833 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3834 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3835 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3836}
3837
3838static SDValue
3841 const LoongArchSubtarget &Subtarget) {
3842 SDLoc DL(N);
3843 switch (N->getConstantOperandVal(0)) {
3844 default:
3845 break;
3846 case Intrinsic::loongarch_lsx_vadd_b:
3847 case Intrinsic::loongarch_lsx_vadd_h:
3848 case Intrinsic::loongarch_lsx_vadd_w:
3849 case Intrinsic::loongarch_lsx_vadd_d:
3850 case Intrinsic::loongarch_lasx_xvadd_b:
3851 case Intrinsic::loongarch_lasx_xvadd_h:
3852 case Intrinsic::loongarch_lasx_xvadd_w:
3853 case Intrinsic::loongarch_lasx_xvadd_d:
3854 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3855 N->getOperand(2));
3856 case Intrinsic::loongarch_lsx_vaddi_bu:
3857 case Intrinsic::loongarch_lsx_vaddi_hu:
3858 case Intrinsic::loongarch_lsx_vaddi_wu:
3859 case Intrinsic::loongarch_lsx_vaddi_du:
3860 case Intrinsic::loongarch_lasx_xvaddi_bu:
3861 case Intrinsic::loongarch_lasx_xvaddi_hu:
3862 case Intrinsic::loongarch_lasx_xvaddi_wu:
3863 case Intrinsic::loongarch_lasx_xvaddi_du:
3864 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3865 lowerVectorSplatImm<5>(N, 2, DAG));
3866 case Intrinsic::loongarch_lsx_vsub_b:
3867 case Intrinsic::loongarch_lsx_vsub_h:
3868 case Intrinsic::loongarch_lsx_vsub_w:
3869 case Intrinsic::loongarch_lsx_vsub_d:
3870 case Intrinsic::loongarch_lasx_xvsub_b:
3871 case Intrinsic::loongarch_lasx_xvsub_h:
3872 case Intrinsic::loongarch_lasx_xvsub_w:
3873 case Intrinsic::loongarch_lasx_xvsub_d:
3874 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3875 N->getOperand(2));
3876 case Intrinsic::loongarch_lsx_vsubi_bu:
3877 case Intrinsic::loongarch_lsx_vsubi_hu:
3878 case Intrinsic::loongarch_lsx_vsubi_wu:
3879 case Intrinsic::loongarch_lsx_vsubi_du:
3880 case Intrinsic::loongarch_lasx_xvsubi_bu:
3881 case Intrinsic::loongarch_lasx_xvsubi_hu:
3882 case Intrinsic::loongarch_lasx_xvsubi_wu:
3883 case Intrinsic::loongarch_lasx_xvsubi_du:
3884 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3885 lowerVectorSplatImm<5>(N, 2, DAG));
3886 case Intrinsic::loongarch_lsx_vneg_b:
3887 case Intrinsic::loongarch_lsx_vneg_h:
3888 case Intrinsic::loongarch_lsx_vneg_w:
3889 case Intrinsic::loongarch_lsx_vneg_d:
3890 case Intrinsic::loongarch_lasx_xvneg_b:
3891 case Intrinsic::loongarch_lasx_xvneg_h:
3892 case Intrinsic::loongarch_lasx_xvneg_w:
3893 case Intrinsic::loongarch_lasx_xvneg_d:
3894 return DAG.getNode(
3895 ISD::SUB, DL, N->getValueType(0),
3896 DAG.getConstant(
3897 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3898 /*isSigned=*/true),
3899 SDLoc(N), N->getValueType(0)),
3900 N->getOperand(1));
3901 case Intrinsic::loongarch_lsx_vmax_b:
3902 case Intrinsic::loongarch_lsx_vmax_h:
3903 case Intrinsic::loongarch_lsx_vmax_w:
3904 case Intrinsic::loongarch_lsx_vmax_d:
3905 case Intrinsic::loongarch_lasx_xvmax_b:
3906 case Intrinsic::loongarch_lasx_xvmax_h:
3907 case Intrinsic::loongarch_lasx_xvmax_w:
3908 case Intrinsic::loongarch_lasx_xvmax_d:
3909 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3910 N->getOperand(2));
3911 case Intrinsic::loongarch_lsx_vmax_bu:
3912 case Intrinsic::loongarch_lsx_vmax_hu:
3913 case Intrinsic::loongarch_lsx_vmax_wu:
3914 case Intrinsic::loongarch_lsx_vmax_du:
3915 case Intrinsic::loongarch_lasx_xvmax_bu:
3916 case Intrinsic::loongarch_lasx_xvmax_hu:
3917 case Intrinsic::loongarch_lasx_xvmax_wu:
3918 case Intrinsic::loongarch_lasx_xvmax_du:
3919 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3920 N->getOperand(2));
3921 case Intrinsic::loongarch_lsx_vmaxi_b:
3922 case Intrinsic::loongarch_lsx_vmaxi_h:
3923 case Intrinsic::loongarch_lsx_vmaxi_w:
3924 case Intrinsic::loongarch_lsx_vmaxi_d:
3925 case Intrinsic::loongarch_lasx_xvmaxi_b:
3926 case Intrinsic::loongarch_lasx_xvmaxi_h:
3927 case Intrinsic::loongarch_lasx_xvmaxi_w:
3928 case Intrinsic::loongarch_lasx_xvmaxi_d:
3929 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3930 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3931 case Intrinsic::loongarch_lsx_vmaxi_bu:
3932 case Intrinsic::loongarch_lsx_vmaxi_hu:
3933 case Intrinsic::loongarch_lsx_vmaxi_wu:
3934 case Intrinsic::loongarch_lsx_vmaxi_du:
3935 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3936 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3937 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3938 case Intrinsic::loongarch_lasx_xvmaxi_du:
3939 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3940 lowerVectorSplatImm<5>(N, 2, DAG));
3941 case Intrinsic::loongarch_lsx_vmin_b:
3942 case Intrinsic::loongarch_lsx_vmin_h:
3943 case Intrinsic::loongarch_lsx_vmin_w:
3944 case Intrinsic::loongarch_lsx_vmin_d:
3945 case Intrinsic::loongarch_lasx_xvmin_b:
3946 case Intrinsic::loongarch_lasx_xvmin_h:
3947 case Intrinsic::loongarch_lasx_xvmin_w:
3948 case Intrinsic::loongarch_lasx_xvmin_d:
3949 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3950 N->getOperand(2));
3951 case Intrinsic::loongarch_lsx_vmin_bu:
3952 case Intrinsic::loongarch_lsx_vmin_hu:
3953 case Intrinsic::loongarch_lsx_vmin_wu:
3954 case Intrinsic::loongarch_lsx_vmin_du:
3955 case Intrinsic::loongarch_lasx_xvmin_bu:
3956 case Intrinsic::loongarch_lasx_xvmin_hu:
3957 case Intrinsic::loongarch_lasx_xvmin_wu:
3958 case Intrinsic::loongarch_lasx_xvmin_du:
3959 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3960 N->getOperand(2));
3961 case Intrinsic::loongarch_lsx_vmini_b:
3962 case Intrinsic::loongarch_lsx_vmini_h:
3963 case Intrinsic::loongarch_lsx_vmini_w:
3964 case Intrinsic::loongarch_lsx_vmini_d:
3965 case Intrinsic::loongarch_lasx_xvmini_b:
3966 case Intrinsic::loongarch_lasx_xvmini_h:
3967 case Intrinsic::loongarch_lasx_xvmini_w:
3968 case Intrinsic::loongarch_lasx_xvmini_d:
3969 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3970 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3971 case Intrinsic::loongarch_lsx_vmini_bu:
3972 case Intrinsic::loongarch_lsx_vmini_hu:
3973 case Intrinsic::loongarch_lsx_vmini_wu:
3974 case Intrinsic::loongarch_lsx_vmini_du:
3975 case Intrinsic::loongarch_lasx_xvmini_bu:
3976 case Intrinsic::loongarch_lasx_xvmini_hu:
3977 case Intrinsic::loongarch_lasx_xvmini_wu:
3978 case Intrinsic::loongarch_lasx_xvmini_du:
3979 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3980 lowerVectorSplatImm<5>(N, 2, DAG));
3981 case Intrinsic::loongarch_lsx_vmul_b:
3982 case Intrinsic::loongarch_lsx_vmul_h:
3983 case Intrinsic::loongarch_lsx_vmul_w:
3984 case Intrinsic::loongarch_lsx_vmul_d:
3985 case Intrinsic::loongarch_lasx_xvmul_b:
3986 case Intrinsic::loongarch_lasx_xvmul_h:
3987 case Intrinsic::loongarch_lasx_xvmul_w:
3988 case Intrinsic::loongarch_lasx_xvmul_d:
3989 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3990 N->getOperand(2));
3991 case Intrinsic::loongarch_lsx_vmadd_b:
3992 case Intrinsic::loongarch_lsx_vmadd_h:
3993 case Intrinsic::loongarch_lsx_vmadd_w:
3994 case Intrinsic::loongarch_lsx_vmadd_d:
3995 case Intrinsic::loongarch_lasx_xvmadd_b:
3996 case Intrinsic::loongarch_lasx_xvmadd_h:
3997 case Intrinsic::loongarch_lasx_xvmadd_w:
3998 case Intrinsic::loongarch_lasx_xvmadd_d: {
3999 EVT ResTy = N->getValueType(0);
4000 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
4001 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4002 N->getOperand(3)));
4003 }
4004 case Intrinsic::loongarch_lsx_vmsub_b:
4005 case Intrinsic::loongarch_lsx_vmsub_h:
4006 case Intrinsic::loongarch_lsx_vmsub_w:
4007 case Intrinsic::loongarch_lsx_vmsub_d:
4008 case Intrinsic::loongarch_lasx_xvmsub_b:
4009 case Intrinsic::loongarch_lasx_xvmsub_h:
4010 case Intrinsic::loongarch_lasx_xvmsub_w:
4011 case Intrinsic::loongarch_lasx_xvmsub_d: {
4012 EVT ResTy = N->getValueType(0);
4013 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
4014 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4015 N->getOperand(3)));
4016 }
4017 case Intrinsic::loongarch_lsx_vdiv_b:
4018 case Intrinsic::loongarch_lsx_vdiv_h:
4019 case Intrinsic::loongarch_lsx_vdiv_w:
4020 case Intrinsic::loongarch_lsx_vdiv_d:
4021 case Intrinsic::loongarch_lasx_xvdiv_b:
4022 case Intrinsic::loongarch_lasx_xvdiv_h:
4023 case Intrinsic::loongarch_lasx_xvdiv_w:
4024 case Intrinsic::loongarch_lasx_xvdiv_d:
4025 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
4026 N->getOperand(2));
4027 case Intrinsic::loongarch_lsx_vdiv_bu:
4028 case Intrinsic::loongarch_lsx_vdiv_hu:
4029 case Intrinsic::loongarch_lsx_vdiv_wu:
4030 case Intrinsic::loongarch_lsx_vdiv_du:
4031 case Intrinsic::loongarch_lasx_xvdiv_bu:
4032 case Intrinsic::loongarch_lasx_xvdiv_hu:
4033 case Intrinsic::loongarch_lasx_xvdiv_wu:
4034 case Intrinsic::loongarch_lasx_xvdiv_du:
4035 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
4036 N->getOperand(2));
4037 case Intrinsic::loongarch_lsx_vmod_b:
4038 case Intrinsic::loongarch_lsx_vmod_h:
4039 case Intrinsic::loongarch_lsx_vmod_w:
4040 case Intrinsic::loongarch_lsx_vmod_d:
4041 case Intrinsic::loongarch_lasx_xvmod_b:
4042 case Intrinsic::loongarch_lasx_xvmod_h:
4043 case Intrinsic::loongarch_lasx_xvmod_w:
4044 case Intrinsic::loongarch_lasx_xvmod_d:
4045 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
4046 N->getOperand(2));
4047 case Intrinsic::loongarch_lsx_vmod_bu:
4048 case Intrinsic::loongarch_lsx_vmod_hu:
4049 case Intrinsic::loongarch_lsx_vmod_wu:
4050 case Intrinsic::loongarch_lsx_vmod_du:
4051 case Intrinsic::loongarch_lasx_xvmod_bu:
4052 case Intrinsic::loongarch_lasx_xvmod_hu:
4053 case Intrinsic::loongarch_lasx_xvmod_wu:
4054 case Intrinsic::loongarch_lasx_xvmod_du:
4055 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
4056 N->getOperand(2));
4057 case Intrinsic::loongarch_lsx_vand_v:
4058 case Intrinsic::loongarch_lasx_xvand_v:
4059 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4060 N->getOperand(2));
4061 case Intrinsic::loongarch_lsx_vor_v:
4062 case Intrinsic::loongarch_lasx_xvor_v:
4063 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4064 N->getOperand(2));
4065 case Intrinsic::loongarch_lsx_vxor_v:
4066 case Intrinsic::loongarch_lasx_xvxor_v:
4067 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4068 N->getOperand(2));
4069 case Intrinsic::loongarch_lsx_vnor_v:
4070 case Intrinsic::loongarch_lasx_xvnor_v: {
4071 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4072 N->getOperand(2));
4073 return DAG.getNOT(DL, Res, Res->getValueType(0));
4074 }
4075 case Intrinsic::loongarch_lsx_vandi_b:
4076 case Intrinsic::loongarch_lasx_xvandi_b:
4077 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4078 lowerVectorSplatImm<8>(N, 2, DAG));
4079 case Intrinsic::loongarch_lsx_vori_b:
4080 case Intrinsic::loongarch_lasx_xvori_b:
4081 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4082 lowerVectorSplatImm<8>(N, 2, DAG));
4083 case Intrinsic::loongarch_lsx_vxori_b:
4084 case Intrinsic::loongarch_lasx_xvxori_b:
4085 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4086 lowerVectorSplatImm<8>(N, 2, DAG));
4087 case Intrinsic::loongarch_lsx_vsll_b:
4088 case Intrinsic::loongarch_lsx_vsll_h:
4089 case Intrinsic::loongarch_lsx_vsll_w:
4090 case Intrinsic::loongarch_lsx_vsll_d:
4091 case Intrinsic::loongarch_lasx_xvsll_b:
4092 case Intrinsic::loongarch_lasx_xvsll_h:
4093 case Intrinsic::loongarch_lasx_xvsll_w:
4094 case Intrinsic::loongarch_lasx_xvsll_d:
4095 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4096 truncateVecElts(N, DAG));
4097 case Intrinsic::loongarch_lsx_vslli_b:
4098 case Intrinsic::loongarch_lasx_xvslli_b:
4099 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4100 lowerVectorSplatImm<3>(N, 2, DAG));
4101 case Intrinsic::loongarch_lsx_vslli_h:
4102 case Intrinsic::loongarch_lasx_xvslli_h:
4103 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4104 lowerVectorSplatImm<4>(N, 2, DAG));
4105 case Intrinsic::loongarch_lsx_vslli_w:
4106 case Intrinsic::loongarch_lasx_xvslli_w:
4107 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4108 lowerVectorSplatImm<5>(N, 2, DAG));
4109 case Intrinsic::loongarch_lsx_vslli_d:
4110 case Intrinsic::loongarch_lasx_xvslli_d:
4111 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4112 lowerVectorSplatImm<6>(N, 2, DAG));
4113 case Intrinsic::loongarch_lsx_vsrl_b:
4114 case Intrinsic::loongarch_lsx_vsrl_h:
4115 case Intrinsic::loongarch_lsx_vsrl_w:
4116 case Intrinsic::loongarch_lsx_vsrl_d:
4117 case Intrinsic::loongarch_lasx_xvsrl_b:
4118 case Intrinsic::loongarch_lasx_xvsrl_h:
4119 case Intrinsic::loongarch_lasx_xvsrl_w:
4120 case Intrinsic::loongarch_lasx_xvsrl_d:
4121 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4122 truncateVecElts(N, DAG));
4123 case Intrinsic::loongarch_lsx_vsrli_b:
4124 case Intrinsic::loongarch_lasx_xvsrli_b:
4125 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4126 lowerVectorSplatImm<3>(N, 2, DAG));
4127 case Intrinsic::loongarch_lsx_vsrli_h:
4128 case Intrinsic::loongarch_lasx_xvsrli_h:
4129 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4130 lowerVectorSplatImm<4>(N, 2, DAG));
4131 case Intrinsic::loongarch_lsx_vsrli_w:
4132 case Intrinsic::loongarch_lasx_xvsrli_w:
4133 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4134 lowerVectorSplatImm<5>(N, 2, DAG));
4135 case Intrinsic::loongarch_lsx_vsrli_d:
4136 case Intrinsic::loongarch_lasx_xvsrli_d:
4137 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4138 lowerVectorSplatImm<6>(N, 2, DAG));
4139 case Intrinsic::loongarch_lsx_vsra_b:
4140 case Intrinsic::loongarch_lsx_vsra_h:
4141 case Intrinsic::loongarch_lsx_vsra_w:
4142 case Intrinsic::loongarch_lsx_vsra_d:
4143 case Intrinsic::loongarch_lasx_xvsra_b:
4144 case Intrinsic::loongarch_lasx_xvsra_h:
4145 case Intrinsic::loongarch_lasx_xvsra_w:
4146 case Intrinsic::loongarch_lasx_xvsra_d:
4147 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4148 truncateVecElts(N, DAG));
4149 case Intrinsic::loongarch_lsx_vsrai_b:
4150 case Intrinsic::loongarch_lasx_xvsrai_b:
4151 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4152 lowerVectorSplatImm<3>(N, 2, DAG));
4153 case Intrinsic::loongarch_lsx_vsrai_h:
4154 case Intrinsic::loongarch_lasx_xvsrai_h:
4155 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4156 lowerVectorSplatImm<4>(N, 2, DAG));
4157 case Intrinsic::loongarch_lsx_vsrai_w:
4158 case Intrinsic::loongarch_lasx_xvsrai_w:
4159 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4160 lowerVectorSplatImm<5>(N, 2, DAG));
4161 case Intrinsic::loongarch_lsx_vsrai_d:
4162 case Intrinsic::loongarch_lasx_xvsrai_d:
4163 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4164 lowerVectorSplatImm<6>(N, 2, DAG));
4165 case Intrinsic::loongarch_lsx_vclz_b:
4166 case Intrinsic::loongarch_lsx_vclz_h:
4167 case Intrinsic::loongarch_lsx_vclz_w:
4168 case Intrinsic::loongarch_lsx_vclz_d:
4169 case Intrinsic::loongarch_lasx_xvclz_b:
4170 case Intrinsic::loongarch_lasx_xvclz_h:
4171 case Intrinsic::loongarch_lasx_xvclz_w:
4172 case Intrinsic::loongarch_lasx_xvclz_d:
4173 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4174 case Intrinsic::loongarch_lsx_vpcnt_b:
4175 case Intrinsic::loongarch_lsx_vpcnt_h:
4176 case Intrinsic::loongarch_lsx_vpcnt_w:
4177 case Intrinsic::loongarch_lsx_vpcnt_d:
4178 case Intrinsic::loongarch_lasx_xvpcnt_b:
4179 case Intrinsic::loongarch_lasx_xvpcnt_h:
4180 case Intrinsic::loongarch_lasx_xvpcnt_w:
4181 case Intrinsic::loongarch_lasx_xvpcnt_d:
4182 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4183 case Intrinsic::loongarch_lsx_vbitclr_b:
4184 case Intrinsic::loongarch_lsx_vbitclr_h:
4185 case Intrinsic::loongarch_lsx_vbitclr_w:
4186 case Intrinsic::loongarch_lsx_vbitclr_d:
4187 case Intrinsic::loongarch_lasx_xvbitclr_b:
4188 case Intrinsic::loongarch_lasx_xvbitclr_h:
4189 case Intrinsic::loongarch_lasx_xvbitclr_w:
4190 case Intrinsic::loongarch_lasx_xvbitclr_d:
4191 return lowerVectorBitClear(N, DAG);
4192 case Intrinsic::loongarch_lsx_vbitclri_b:
4193 case Intrinsic::loongarch_lasx_xvbitclri_b:
4194 return lowerVectorBitClearImm<3>(N, DAG);
4195 case Intrinsic::loongarch_lsx_vbitclri_h:
4196 case Intrinsic::loongarch_lasx_xvbitclri_h:
4197 return lowerVectorBitClearImm<4>(N, DAG);
4198 case Intrinsic::loongarch_lsx_vbitclri_w:
4199 case Intrinsic::loongarch_lasx_xvbitclri_w:
4200 return lowerVectorBitClearImm<5>(N, DAG);
4201 case Intrinsic::loongarch_lsx_vbitclri_d:
4202 case Intrinsic::loongarch_lasx_xvbitclri_d:
4203 return lowerVectorBitClearImm<6>(N, DAG);
4204 case Intrinsic::loongarch_lsx_vbitset_b:
4205 case Intrinsic::loongarch_lsx_vbitset_h:
4206 case Intrinsic::loongarch_lsx_vbitset_w:
4207 case Intrinsic::loongarch_lsx_vbitset_d:
4208 case Intrinsic::loongarch_lasx_xvbitset_b:
4209 case Intrinsic::loongarch_lasx_xvbitset_h:
4210 case Intrinsic::loongarch_lasx_xvbitset_w:
4211 case Intrinsic::loongarch_lasx_xvbitset_d: {
4212 EVT VecTy = N->getValueType(0);
4213 SDValue One = DAG.getConstant(1, DL, VecTy);
4214 return DAG.getNode(
4215 ISD::OR, DL, VecTy, N->getOperand(1),
4216 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4217 }
4218 case Intrinsic::loongarch_lsx_vbitseti_b:
4219 case Intrinsic::loongarch_lasx_xvbitseti_b:
4220 return lowerVectorBitSetImm<3>(N, DAG);
4221 case Intrinsic::loongarch_lsx_vbitseti_h:
4222 case Intrinsic::loongarch_lasx_xvbitseti_h:
4223 return lowerVectorBitSetImm<4>(N, DAG);
4224 case Intrinsic::loongarch_lsx_vbitseti_w:
4225 case Intrinsic::loongarch_lasx_xvbitseti_w:
4226 return lowerVectorBitSetImm<5>(N, DAG);
4227 case Intrinsic::loongarch_lsx_vbitseti_d:
4228 case Intrinsic::loongarch_lasx_xvbitseti_d:
4229 return lowerVectorBitSetImm<6>(N, DAG);
4230 case Intrinsic::loongarch_lsx_vbitrev_b:
4231 case Intrinsic::loongarch_lsx_vbitrev_h:
4232 case Intrinsic::loongarch_lsx_vbitrev_w:
4233 case Intrinsic::loongarch_lsx_vbitrev_d:
4234 case Intrinsic::loongarch_lasx_xvbitrev_b:
4235 case Intrinsic::loongarch_lasx_xvbitrev_h:
4236 case Intrinsic::loongarch_lasx_xvbitrev_w:
4237 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4238 EVT VecTy = N->getValueType(0);
4239 SDValue One = DAG.getConstant(1, DL, VecTy);
4240 return DAG.getNode(
4241 ISD::XOR, DL, VecTy, N->getOperand(1),
4242 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4243 }
4244 case Intrinsic::loongarch_lsx_vbitrevi_b:
4245 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4246 return lowerVectorBitRevImm<3>(N, DAG);
4247 case Intrinsic::loongarch_lsx_vbitrevi_h:
4248 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4249 return lowerVectorBitRevImm<4>(N, DAG);
4250 case Intrinsic::loongarch_lsx_vbitrevi_w:
4251 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4252 return lowerVectorBitRevImm<5>(N, DAG);
4253 case Intrinsic::loongarch_lsx_vbitrevi_d:
4254 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4255 return lowerVectorBitRevImm<6>(N, DAG);
4256 case Intrinsic::loongarch_lsx_vfadd_s:
4257 case Intrinsic::loongarch_lsx_vfadd_d:
4258 case Intrinsic::loongarch_lasx_xvfadd_s:
4259 case Intrinsic::loongarch_lasx_xvfadd_d:
4260 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4261 N->getOperand(2));
4262 case Intrinsic::loongarch_lsx_vfsub_s:
4263 case Intrinsic::loongarch_lsx_vfsub_d:
4264 case Intrinsic::loongarch_lasx_xvfsub_s:
4265 case Intrinsic::loongarch_lasx_xvfsub_d:
4266 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4267 N->getOperand(2));
4268 case Intrinsic::loongarch_lsx_vfmul_s:
4269 case Intrinsic::loongarch_lsx_vfmul_d:
4270 case Intrinsic::loongarch_lasx_xvfmul_s:
4271 case Intrinsic::loongarch_lasx_xvfmul_d:
4272 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4273 N->getOperand(2));
4274 case Intrinsic::loongarch_lsx_vfdiv_s:
4275 case Intrinsic::loongarch_lsx_vfdiv_d:
4276 case Intrinsic::loongarch_lasx_xvfdiv_s:
4277 case Intrinsic::loongarch_lasx_xvfdiv_d:
4278 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4279 N->getOperand(2));
4280 case Intrinsic::loongarch_lsx_vfmadd_s:
4281 case Intrinsic::loongarch_lsx_vfmadd_d:
4282 case Intrinsic::loongarch_lasx_xvfmadd_s:
4283 case Intrinsic::loongarch_lasx_xvfmadd_d:
4284 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4285 N->getOperand(2), N->getOperand(3));
4286 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4287 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4288 N->getOperand(1), N->getOperand(2),
4289 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4290 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4291 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4292 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4293 N->getOperand(1), N->getOperand(2),
4294 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4295 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4296 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4297 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4298 N->getOperand(1), N->getOperand(2),
4299 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4300 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4301 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4302 N->getOperand(1), N->getOperand(2),
4303 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4304 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4305 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4306 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4307 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4308 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4309 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4310 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4311 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
4312 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
4313 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4314 N->getOperand(1)));
4315 case Intrinsic::loongarch_lsx_vreplve_b:
4316 case Intrinsic::loongarch_lsx_vreplve_h:
4317 case Intrinsic::loongarch_lsx_vreplve_w:
4318 case Intrinsic::loongarch_lsx_vreplve_d:
4319 case Intrinsic::loongarch_lasx_xvreplve_b:
4320 case Intrinsic::loongarch_lasx_xvreplve_h:
4321 case Intrinsic::loongarch_lasx_xvreplve_w:
4322 case Intrinsic::loongarch_lasx_xvreplve_d:
4323 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4324 N->getOperand(1),
4325 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4326 N->getOperand(2)));
4327 }
4328 return SDValue();
4329}
4330
4332 DAGCombinerInfo &DCI) const {
4333 SelectionDAG &DAG = DCI.DAG;
4334 switch (N->getOpcode()) {
4335 default:
4336 break;
4337 case ISD::AND:
4338 return performANDCombine(N, DAG, DCI, Subtarget);
4339 case ISD::OR:
4340 return performORCombine(N, DAG, DCI, Subtarget);
4341 case ISD::SETCC:
4342 return performSETCCCombine(N, DAG, DCI, Subtarget);
4343 case ISD::SRL:
4344 return performSRLCombine(N, DAG, DCI, Subtarget);
4346 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4348 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4349 }
4350 return SDValue();
4351}
4352
4355 if (!ZeroDivCheck)
4356 return MBB;
4357
4358 // Build instructions:
4359 // MBB:
4360 // div(or mod) $dst, $dividend, $divisor
4361 // bnez $divisor, SinkMBB
4362 // BreakMBB:
4363 // break 7 // BRK_DIVZERO
4364 // SinkMBB:
4365 // fallthrough
4366 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4368 MachineFunction *MF = MBB->getParent();
4369 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4370 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4371 MF->insert(It, BreakMBB);
4372 MF->insert(It, SinkMBB);
4373
4374 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4375 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4376 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4377
4378 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4379 DebugLoc DL = MI.getDebugLoc();
4380 MachineOperand &Divisor = MI.getOperand(2);
4381 Register DivisorReg = Divisor.getReg();
4382
4383 // MBB:
4384 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4385 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4386 .addMBB(SinkMBB);
4387 MBB->addSuccessor(BreakMBB);
4388 MBB->addSuccessor(SinkMBB);
4389
4390 // BreakMBB:
4391 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4392 // definition of BRK_DIVZERO.
4393 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4394 BreakMBB->addSuccessor(SinkMBB);
4395
4396 // Clear Divisor's kill flag.
4397 Divisor.setIsKill(false);
4398
4399 return SinkMBB;
4400}
4401
4402static MachineBasicBlock *
4404 const LoongArchSubtarget &Subtarget) {
4405 unsigned CondOpc;
4406 switch (MI.getOpcode()) {
4407 default:
4408 llvm_unreachable("Unexpected opcode");
4409 case LoongArch::PseudoVBZ:
4410 CondOpc = LoongArch::VSETEQZ_V;
4411 break;
4412 case LoongArch::PseudoVBZ_B:
4413 CondOpc = LoongArch::VSETANYEQZ_B;
4414 break;
4415 case LoongArch::PseudoVBZ_H:
4416 CondOpc = LoongArch::VSETANYEQZ_H;
4417 break;
4418 case LoongArch::PseudoVBZ_W:
4419 CondOpc = LoongArch::VSETANYEQZ_W;
4420 break;
4421 case LoongArch::PseudoVBZ_D:
4422 CondOpc = LoongArch::VSETANYEQZ_D;
4423 break;
4424 case LoongArch::PseudoVBNZ:
4425 CondOpc = LoongArch::VSETNEZ_V;
4426 break;
4427 case LoongArch::PseudoVBNZ_B:
4428 CondOpc = LoongArch::VSETALLNEZ_B;
4429 break;
4430 case LoongArch::PseudoVBNZ_H:
4431 CondOpc = LoongArch::VSETALLNEZ_H;
4432 break;
4433 case LoongArch::PseudoVBNZ_W:
4434 CondOpc = LoongArch::VSETALLNEZ_W;
4435 break;
4436 case LoongArch::PseudoVBNZ_D:
4437 CondOpc = LoongArch::VSETALLNEZ_D;
4438 break;
4439 case LoongArch::PseudoXVBZ:
4440 CondOpc = LoongArch::XVSETEQZ_V;
4441 break;
4442 case LoongArch::PseudoXVBZ_B:
4443 CondOpc = LoongArch::XVSETANYEQZ_B;
4444 break;
4445 case LoongArch::PseudoXVBZ_H:
4446 CondOpc = LoongArch::XVSETANYEQZ_H;
4447 break;
4448 case LoongArch::PseudoXVBZ_W:
4449 CondOpc = LoongArch::XVSETANYEQZ_W;
4450 break;
4451 case LoongArch::PseudoXVBZ_D:
4452 CondOpc = LoongArch::XVSETANYEQZ_D;
4453 break;
4454 case LoongArch::PseudoXVBNZ:
4455 CondOpc = LoongArch::XVSETNEZ_V;
4456 break;
4457 case LoongArch::PseudoXVBNZ_B:
4458 CondOpc = LoongArch::XVSETALLNEZ_B;
4459 break;
4460 case LoongArch::PseudoXVBNZ_H:
4461 CondOpc = LoongArch::XVSETALLNEZ_H;
4462 break;
4463 case LoongArch::PseudoXVBNZ_W:
4464 CondOpc = LoongArch::XVSETALLNEZ_W;
4465 break;
4466 case LoongArch::PseudoXVBNZ_D:
4467 CondOpc = LoongArch::XVSETALLNEZ_D;
4468 break;
4469 }
4470
4471 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4472 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4473 DebugLoc DL = MI.getDebugLoc();
4476
4477 MachineFunction *F = BB->getParent();
4478 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4479 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4480 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4481
4482 F->insert(It, FalseBB);
4483 F->insert(It, TrueBB);
4484 F->insert(It, SinkBB);
4485
4486 // Transfer the remainder of MBB and its successor edges to Sink.
4487 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4489
4490 // Insert the real instruction to BB.
4491 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4492 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4493
4494 // Insert branch.
4495 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4496 BB->addSuccessor(FalseBB);
4497 BB->addSuccessor(TrueBB);
4498
4499 // FalseBB.
4500 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4501 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4502 .addReg(LoongArch::R0)
4503 .addImm(0);
4504 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4505 FalseBB->addSuccessor(SinkBB);
4506
4507 // TrueBB.
4508 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4509 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4510 .addReg(LoongArch::R0)
4511 .addImm(1);
4512 TrueBB->addSuccessor(SinkBB);
4513
4514 // SinkBB: merge the results.
4515 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4516 MI.getOperand(0).getReg())
4517 .addReg(RD1)
4518 .addMBB(FalseBB)
4519 .addReg(RD2)
4520 .addMBB(TrueBB);
4521
4522 // The pseudo instruction is gone now.
4523 MI.eraseFromParent();
4524 return SinkBB;
4525}
4526
4527static MachineBasicBlock *
4529 const LoongArchSubtarget &Subtarget) {
4530 unsigned InsOp;
4531 unsigned HalfSize;
4532 switch (MI.getOpcode()) {
4533 default:
4534 llvm_unreachable("Unexpected opcode");
4535 case LoongArch::PseudoXVINSGR2VR_B:
4536 HalfSize = 16;
4537 InsOp = LoongArch::VINSGR2VR_B;
4538 break;
4539 case LoongArch::PseudoXVINSGR2VR_H:
4540 HalfSize = 8;
4541 InsOp = LoongArch::VINSGR2VR_H;
4542 break;
4543 }
4544 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4545 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4546 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4547 DebugLoc DL = MI.getDebugLoc();
4549 // XDst = vector_insert XSrc, Elt, Idx
4550 Register XDst = MI.getOperand(0).getReg();
4551 Register XSrc = MI.getOperand(1).getReg();
4552 Register Elt = MI.getOperand(2).getReg();
4553 unsigned Idx = MI.getOperand(3).getImm();
4554
4555 Register ScratchReg1 = XSrc;
4556 if (Idx >= HalfSize) {
4557 ScratchReg1 = MRI.createVirtualRegister(RC);
4558 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4559 .addReg(XSrc)
4560 .addReg(XSrc)
4561 .addImm(1);
4562 }
4563
4564 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4565 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4566 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4567 .addReg(ScratchReg1, 0, LoongArch::sub_128);
4568 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4569 .addReg(ScratchSubReg1)
4570 .addReg(Elt)
4571 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4572
4573 Register ScratchReg2 = XDst;
4574 if (Idx >= HalfSize)
4575 ScratchReg2 = MRI.createVirtualRegister(RC);
4576
4577 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4578 .addImm(0)
4579 .addReg(ScratchSubReg2)
4580 .addImm(LoongArch::sub_128);
4581
4582 if (Idx >= HalfSize)
4583 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4584 .addReg(XSrc)
4585 .addReg(ScratchReg2)
4586 .addImm(2);
4587
4588 MI.eraseFromParent();
4589 return BB;
4590}
4591
4594 const LoongArchSubtarget &Subtarget) {
4595 assert(Subtarget.hasExtLSX());
4596 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4597 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
4598 DebugLoc DL = MI.getDebugLoc();
4600 Register Dst = MI.getOperand(0).getReg();
4601 Register Src = MI.getOperand(1).getReg();
4602 Register ScratchReg1 = MRI.createVirtualRegister(RC);
4603 Register ScratchReg2 = MRI.createVirtualRegister(RC);
4604 Register ScratchReg3 = MRI.createVirtualRegister(RC);
4605
4606 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
4607 BuildMI(*BB, MI, DL,
4608 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
4609 : LoongArch::VINSGR2VR_W),
4610 ScratchReg2)
4611 .addReg(ScratchReg1)
4612 .addReg(Src)
4613 .addImm(0);
4614 BuildMI(
4615 *BB, MI, DL,
4616 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
4617 ScratchReg3)
4618 .addReg(ScratchReg2);
4619 BuildMI(*BB, MI, DL,
4620 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
4621 : LoongArch::VPICKVE2GR_W),
4622 Dst)
4623 .addReg(ScratchReg3)
4624 .addImm(0);
4625
4626 MI.eraseFromParent();
4627 return BB;
4628}
4629
4630MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4631 MachineInstr &MI, MachineBasicBlock *BB) const {
4632 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4633 DebugLoc DL = MI.getDebugLoc();
4634
4635 switch (MI.getOpcode()) {
4636 default:
4637 llvm_unreachable("Unexpected instr type to insert");
4638 case LoongArch::DIV_W:
4639 case LoongArch::DIV_WU:
4640 case LoongArch::MOD_W:
4641 case LoongArch::MOD_WU:
4642 case LoongArch::DIV_D:
4643 case LoongArch::DIV_DU:
4644 case LoongArch::MOD_D:
4645 case LoongArch::MOD_DU:
4646 return insertDivByZeroTrap(MI, BB);
4647 break;
4648 case LoongArch::WRFCSR: {
4649 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4650 LoongArch::FCSR0 + MI.getOperand(0).getImm())
4651 .addReg(MI.getOperand(1).getReg());
4652 MI.eraseFromParent();
4653 return BB;
4654 }
4655 case LoongArch::RDFCSR: {
4656 MachineInstr *ReadFCSR =
4657 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4658 MI.getOperand(0).getReg())
4659 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4660 ReadFCSR->getOperand(1).setIsUndef();
4661 MI.eraseFromParent();
4662 return BB;
4663 }
4664 case LoongArch::PseudoVBZ:
4665 case LoongArch::PseudoVBZ_B:
4666 case LoongArch::PseudoVBZ_H:
4667 case LoongArch::PseudoVBZ_W:
4668 case LoongArch::PseudoVBZ_D:
4669 case LoongArch::PseudoVBNZ:
4670 case LoongArch::PseudoVBNZ_B:
4671 case LoongArch::PseudoVBNZ_H:
4672 case LoongArch::PseudoVBNZ_W:
4673 case LoongArch::PseudoVBNZ_D:
4674 case LoongArch::PseudoXVBZ:
4675 case LoongArch::PseudoXVBZ_B:
4676 case LoongArch::PseudoXVBZ_H:
4677 case LoongArch::PseudoXVBZ_W:
4678 case LoongArch::PseudoXVBZ_D:
4679 case LoongArch::PseudoXVBNZ:
4680 case LoongArch::PseudoXVBNZ_B:
4681 case LoongArch::PseudoXVBNZ_H:
4682 case LoongArch::PseudoXVBNZ_W:
4683 case LoongArch::PseudoXVBNZ_D:
4684 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4685 case LoongArch::PseudoXVINSGR2VR_B:
4686 case LoongArch::PseudoXVINSGR2VR_H:
4687 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4688 case LoongArch::PseudoCTPOP:
4689 return emitPseudoCTPOP(MI, BB, Subtarget);
4690 case TargetOpcode::STATEPOINT:
4691 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
4692 // while bl call instruction (where statepoint will be lowered at the
4693 // end) has implicit def. This def is early-clobber as it will be set at
4694 // the moment of the call and earlier than any use is read.
4695 // Add this implicit dead def here as a workaround.
4696 MI.addOperand(*MI.getMF(),
4698 LoongArch::R1, /*isDef*/ true,
4699 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
4700 /*isUndef*/ false, /*isEarlyClobber*/ true));
4701 if (!Subtarget.is64Bit())
4702 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
4703 return emitPatchPoint(MI, BB);
4704 }
4705}
4706
4708 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4709 unsigned *Fast) const {
4710 if (!Subtarget.hasUAL())
4711 return false;
4712
4713 // TODO: set reasonable speed number.
4714 if (Fast)
4715 *Fast = 1;
4716 return true;
4717}
4718
4719const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4720 switch ((LoongArchISD::NodeType)Opcode) {
4722 break;
4723
4724#define NODE_NAME_CASE(node) \
4725 case LoongArchISD::node: \
4726 return "LoongArchISD::" #node;
4727
4728 // TODO: Add more target-dependent nodes later.
4729 NODE_NAME_CASE(CALL)
4730 NODE_NAME_CASE(CALL_MEDIUM)
4731 NODE_NAME_CASE(CALL_LARGE)
4732 NODE_NAME_CASE(RET)
4733 NODE_NAME_CASE(TAIL)
4734 NODE_NAME_CASE(TAIL_MEDIUM)
4735 NODE_NAME_CASE(TAIL_LARGE)
4736 NODE_NAME_CASE(SLL_W)
4737 NODE_NAME_CASE(SRA_W)
4738 NODE_NAME_CASE(SRL_W)
4739 NODE_NAME_CASE(BSTRINS)
4740 NODE_NAME_CASE(BSTRPICK)
4741 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4742 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4743 NODE_NAME_CASE(FTINT)
4744 NODE_NAME_CASE(REVB_2H)
4745 NODE_NAME_CASE(REVB_2W)
4746 NODE_NAME_CASE(BITREV_4B)
4747 NODE_NAME_CASE(BITREV_8B)
4748 NODE_NAME_CASE(BITREV_W)
4749 NODE_NAME_CASE(ROTR_W)
4750 NODE_NAME_CASE(ROTL_W)
4751 NODE_NAME_CASE(DIV_W)
4752 NODE_NAME_CASE(DIV_WU)
4753 NODE_NAME_CASE(MOD_W)
4754 NODE_NAME_CASE(MOD_WU)
4755 NODE_NAME_CASE(CLZ_W)
4756 NODE_NAME_CASE(CTZ_W)
4757 NODE_NAME_CASE(DBAR)
4758 NODE_NAME_CASE(IBAR)
4759 NODE_NAME_CASE(BREAK)
4760 NODE_NAME_CASE(SYSCALL)
4761 NODE_NAME_CASE(CRC_W_B_W)
4762 NODE_NAME_CASE(CRC_W_H_W)
4763 NODE_NAME_CASE(CRC_W_W_W)
4764 NODE_NAME_CASE(CRC_W_D_W)
4765 NODE_NAME_CASE(CRCC_W_B_W)
4766 NODE_NAME_CASE(CRCC_W_H_W)
4767 NODE_NAME_CASE(CRCC_W_W_W)
4768 NODE_NAME_CASE(CRCC_W_D_W)
4769 NODE_NAME_CASE(CSRRD)
4770 NODE_NAME_CASE(CSRWR)
4771 NODE_NAME_CASE(CSRXCHG)
4772 NODE_NAME_CASE(IOCSRRD_B)
4773 NODE_NAME_CASE(IOCSRRD_H)
4774 NODE_NAME_CASE(IOCSRRD_W)
4775 NODE_NAME_CASE(IOCSRRD_D)
4776 NODE_NAME_CASE(IOCSRWR_B)
4777 NODE_NAME_CASE(IOCSRWR_H)
4778 NODE_NAME_CASE(IOCSRWR_W)
4779 NODE_NAME_CASE(IOCSRWR_D)
4780 NODE_NAME_CASE(CPUCFG)
4781 NODE_NAME_CASE(MOVGR2FCSR)
4782 NODE_NAME_CASE(MOVFCSR2GR)
4783 NODE_NAME_CASE(CACOP_D)
4784 NODE_NAME_CASE(CACOP_W)
4785 NODE_NAME_CASE(VSHUF)
4786 NODE_NAME_CASE(VPICKEV)
4787 NODE_NAME_CASE(VPICKOD)
4788 NODE_NAME_CASE(VPACKEV)
4789 NODE_NAME_CASE(VPACKOD)
4790 NODE_NAME_CASE(VILVL)
4791 NODE_NAME_CASE(VILVH)
4792 NODE_NAME_CASE(VSHUF4I)
4793 NODE_NAME_CASE(VREPLVEI)
4794 NODE_NAME_CASE(VREPLGR2VR)
4795 NODE_NAME_CASE(XVPERMI)
4796 NODE_NAME_CASE(VPICK_SEXT_ELT)
4797 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4798 NODE_NAME_CASE(VREPLVE)
4799 NODE_NAME_CASE(VALL_ZERO)
4800 NODE_NAME_CASE(VANY_ZERO)
4801 NODE_NAME_CASE(VALL_NONZERO)
4802 NODE_NAME_CASE(VANY_NONZERO)
4803 NODE_NAME_CASE(FRECIPE)
4804 NODE_NAME_CASE(FRSQRTE)
4805 }
4806#undef NODE_NAME_CASE
4807 return nullptr;
4808}
4809
4810//===----------------------------------------------------------------------===//
4811// Calling Convention Implementation
4812//===----------------------------------------------------------------------===//
4813
4814// Eight general-purpose registers a0-a7 used for passing integer arguments,
4815// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4816// fixed-point arguments, and floating-point arguments when no FPR is available
4817// or with soft float ABI.
4818const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4819 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4820 LoongArch::R10, LoongArch::R11};
4821// Eight floating-point registers fa0-fa7 used for passing floating-point
4822// arguments, and fa0-fa1 are also used to return values.
4823const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4824 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4825 LoongArch::F6, LoongArch::F7};
4826// FPR32 and FPR64 alias each other.
4828 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4829 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4830
4831const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4832 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4833 LoongArch::VR6, LoongArch::VR7};
4834
4835const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4836 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4837 LoongArch::XR6, LoongArch::XR7};
4838
4839// Pass a 2*GRLen argument that has been split into two GRLen values through
4840// registers or the stack as necessary.
4841static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4842 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4843 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4844 ISD::ArgFlagsTy ArgFlags2) {
4845 unsigned GRLenInBytes = GRLen / 8;
4846 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4847 // At least one half can be passed via register.
4848 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4849 VA1.getLocVT(), CCValAssign::Full));
4850 } else {
4851 // Both halves must be passed on the stack, with proper alignment.
4852 Align StackAlign =
4853 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4854 State.addLoc(
4856 State.AllocateStack(GRLenInBytes, StackAlign),
4857 VA1.getLocVT(), CCValAssign::Full));
4859 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4860 LocVT2, CCValAssign::Full));
4861 return false;
4862 }
4863 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4864 // The second half can also be passed via register.
4865 State.addLoc(
4866 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4867 } else {
4868 // The second half is passed via the stack, without additional alignment.
4870 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4871 LocVT2, CCValAssign::Full));
4872 }
4873 return false;
4874}
4875
4876// Implements the LoongArch calling convention. Returns true upon failure.
4878 unsigned ValNo, MVT ValVT,
4879 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4880 CCState &State, bool IsFixed, bool IsRet,
4881 Type *OrigTy) {
4882 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4883 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4884 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4885 MVT LocVT = ValVT;
4886
4887 // Any return value split into more than two values can't be returned
4888 // directly.
4889 if (IsRet && ValNo > 1)
4890 return true;
4891
4892 // If passing a variadic argument, or if no FPR is available.
4893 bool UseGPRForFloat = true;
4894
4895 switch (ABI) {
4896 default:
4897 llvm_unreachable("Unexpected ABI");
4898 break;
4903 UseGPRForFloat = !IsFixed;
4904 break;
4907 break;
4908 }
4909
4910 // FPR32 and FPR64 alias each other.
4911 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4912 UseGPRForFloat = true;
4913
4914 if (UseGPRForFloat && ValVT == MVT::f32) {
4915 LocVT = GRLenVT;
4916 LocInfo = CCValAssign::BCvt;
4917 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4918 LocVT = MVT::i64;
4919 LocInfo = CCValAssign::BCvt;
4920 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4921 // TODO: Handle passing f64 on LA32 with D feature.
4922 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4923 }
4924
4925 // If this is a variadic argument, the LoongArch calling convention requires
4926 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4927 // byte alignment. An aligned register should be used regardless of whether
4928 // the original argument was split during legalisation or not. The argument
4929 // will not be passed by registers if the original type is larger than
4930 // 2*GRLen, so the register alignment rule does not apply.
4931 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4932 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4933 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4934 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4935 // Skip 'odd' register if necessary.
4936 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4937 State.AllocateReg(ArgGPRs);
4938 }
4939
4940 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4941 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4942 State.getPendingArgFlags();
4943
4944 assert(PendingLocs.size() == PendingArgFlags.size() &&
4945 "PendingLocs and PendingArgFlags out of sync");
4946
4947 // Split arguments might be passed indirectly, so keep track of the pending
4948 // values.
4949 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4950 LocVT = GRLenVT;
4951 LocInfo = CCValAssign::Indirect;
4952 PendingLocs.push_back(
4953 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4954 PendingArgFlags.push_back(ArgFlags);
4955 if (!ArgFlags.isSplitEnd()) {
4956 return false;
4957 }
4958 }
4959
4960 // If the split argument only had two elements, it should be passed directly
4961 // in registers or on the stack.
4962 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4963 PendingLocs.size() <= 2) {
4964 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4965 // Apply the normal calling convention rules to the first half of the
4966 // split argument.
4967 CCValAssign VA = PendingLocs[0];
4968 ISD::ArgFlagsTy AF = PendingArgFlags[0];
4969 PendingLocs.clear();
4970 PendingArgFlags.clear();
4971 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4972 ArgFlags);
4973 }
4974
4975 // Allocate to a register if possible, or else a stack slot.
4976 Register Reg;
4977 unsigned StoreSizeBytes = GRLen / 8;
4978 Align StackAlign = Align(GRLen / 8);
4979
4980 if (ValVT == MVT::f32 && !UseGPRForFloat)
4981 Reg = State.AllocateReg(ArgFPR32s);
4982 else if (ValVT == MVT::f64 && !UseGPRForFloat)
4983 Reg = State.AllocateReg(ArgFPR64s);
4984 else if (ValVT.is128BitVector())
4985 Reg = State.AllocateReg(ArgVRs);
4986 else if (ValVT.is256BitVector())
4987 Reg = State.AllocateReg(ArgXRs);
4988 else
4989 Reg = State.AllocateReg(ArgGPRs);
4990
4991 unsigned StackOffset =
4992 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
4993
4994 // If we reach this point and PendingLocs is non-empty, we must be at the
4995 // end of a split argument that must be passed indirectly.
4996 if (!PendingLocs.empty()) {
4997 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4998 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4999 for (auto &It : PendingLocs) {
5000 if (Reg)
5001 It.convertToReg(Reg);
5002 else
5003 It.convertToMem(StackOffset);
5004 State.addLoc(It);
5005 }
5006 PendingLocs.clear();
5007 PendingArgFlags.clear();
5008 return false;
5009 }
5010 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
5011 "Expected an GRLenVT at this stage");
5012
5013 if (Reg) {
5014 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5015 return false;
5016 }
5017
5018 // When a floating-point value is passed on the stack, no bit-cast is needed.
5019 if (ValVT.isFloatingPoint()) {
5020 LocVT = ValVT;
5021 LocInfo = CCValAssign::Full;
5022 }
5023
5024 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5025 return false;
5026}
5027
5028void LoongArchTargetLowering::analyzeInputArgs(
5029 MachineFunction &MF, CCState &CCInfo,
5030 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
5031 LoongArchCCAssignFn Fn) const {
5033 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5034 MVT ArgVT = Ins[i].VT;
5035 Type *ArgTy = nullptr;
5036 if (IsRet)
5037 ArgTy = FType->getReturnType();
5038 else if (Ins[i].isOrigArg())
5039 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
5042 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
5043 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
5044 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
5045 << '\n');
5046 llvm_unreachable("");
5047 }
5048 }
5049}
5050
5051void LoongArchTargetLowering::analyzeOutputArgs(
5052 MachineFunction &MF, CCState &CCInfo,
5053 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
5054 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
5055 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5056 MVT ArgVT = Outs[i].VT;
5057 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
5060 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
5061 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
5062 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
5063 << "\n");
5064 llvm_unreachable("");
5065 }
5066 }
5067}
5068
5069// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5070// values.
5072 const CCValAssign &VA, const SDLoc &DL) {
5073 switch (VA.getLocInfo()) {
5074 default:
5075 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5076 case CCValAssign::Full:
5078 break;
5079 case CCValAssign::BCvt:
5080 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5081 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
5082 else
5083 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5084 break;
5085 }
5086 return Val;
5087}
5088
5090 const CCValAssign &VA, const SDLoc &DL,
5091 const ISD::InputArg &In,
5092 const LoongArchTargetLowering &TLI) {
5095 EVT LocVT = VA.getLocVT();
5096 SDValue Val;
5097 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5098 Register VReg = RegInfo.createVirtualRegister(RC);
5099 RegInfo.addLiveIn(VA.getLocReg(), VReg);
5100 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5101
5102 // If input is sign extended from 32 bits, note it for the OptW pass.
5103 if (In.isOrigArg()) {
5104 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
5105 if (OrigArg->getType()->isIntegerTy()) {
5106 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
5107 // An input zero extended from i31 can also be considered sign extended.
5108 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
5109 (BitWidth < 32 && In.Flags.isZExt())) {
5112 LAFI->addSExt32Register(VReg);
5113 }
5114 }
5115 }
5116
5117 return convertLocVTToValVT(DAG, Val, VA, DL);
5118}
5119
5120// The caller is responsible for loading the full value if the argument is
5121// passed with CCValAssign::Indirect.
5123 const CCValAssign &VA, const SDLoc &DL) {
5125 MachineFrameInfo &MFI = MF.getFrameInfo();
5126 EVT ValVT = VA.getValVT();
5127 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
5128 /*IsImmutable=*/true);
5129 SDValue FIN = DAG.getFrameIndex(
5131
5132 ISD::LoadExtType ExtType;
5133 switch (VA.getLocInfo()) {
5134 default:
5135 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5136 case CCValAssign::Full:
5138 case CCValAssign::BCvt:
5139 ExtType = ISD::NON_EXTLOAD;
5140 break;
5141 }
5142 return DAG.getExtLoad(
5143 ExtType, DL, VA.getLocVT(), Chain, FIN,
5145}
5146
5148 const CCValAssign &VA, const SDLoc &DL) {
5149 EVT LocVT = VA.getLocVT();
5150
5151 switch (VA.getLocInfo()) {
5152 default:
5153 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5154 case CCValAssign::Full:
5155 break;
5156 case CCValAssign::BCvt:
5157 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5158 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
5159 else
5160 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5161 break;
5162 }
5163 return Val;
5164}
5165
5166static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5167 CCValAssign::LocInfo LocInfo,
5168 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5169 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5170 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5171 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5172 static const MCPhysReg GPRList[] = {
5173 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5174 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5175 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5176 if (MCRegister Reg = State.AllocateReg(GPRList)) {
5177 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5178 return false;
5179 }
5180 }
5181
5182 if (LocVT == MVT::f32) {
5183 // Pass in STG registers: F1, F2, F3, F4
5184 // fs0,fs1,fs2,fs3
5185 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5186 LoongArch::F26, LoongArch::F27};
5187 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
5188 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5189 return false;
5190 }
5191 }
5192
5193 if (LocVT == MVT::f64) {
5194 // Pass in STG registers: D1, D2, D3, D4
5195 // fs4,fs5,fs6,fs7
5196 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5197 LoongArch::F30_64, LoongArch::F31_64};
5198 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
5199 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5200 return false;
5201 }
5202 }
5203
5204 report_fatal_error("No registers left in GHC calling convention");
5205 return true;
5206}
5207
5208// Transform physical registers into virtual registers.
5210 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5211 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5212 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5213
5215
5216 switch (CallConv) {
5217 default:
5218 llvm_unreachable("Unsupported calling convention");
5219 case CallingConv::C:
5220 case CallingConv::Fast:
5221 break;
5222 case CallingConv::GHC:
5223 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5224 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5226 "GHC calling convention requires the F and D extensions");
5227 }
5228
5229 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5230 MVT GRLenVT = Subtarget.getGRLenVT();
5231 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5232 // Used with varargs to acumulate store chains.
5233 std::vector<SDValue> OutChains;
5234
5235 // Assign locations to all of the incoming arguments.
5237 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5238
5239 if (CallConv == CallingConv::GHC)
5241 else
5242 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5243
5244 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5245 CCValAssign &VA = ArgLocs[i];
5246 SDValue ArgValue;
5247 if (VA.isRegLoc())
5248 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5249 else
5250 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5251 if (VA.getLocInfo() == CCValAssign::Indirect) {
5252 // If the original argument was split and passed by reference, we need to
5253 // load all parts of it here (using the same address).
5254 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5256 unsigned ArgIndex = Ins[i].OrigArgIndex;
5257 unsigned ArgPartOffset = Ins[i].PartOffset;
5258 assert(ArgPartOffset == 0);
5259 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5260 CCValAssign &PartVA = ArgLocs[i + 1];
5261 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5262 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5263 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5264 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5266 ++i;
5267 }
5268 continue;
5269 }
5270 InVals.push_back(ArgValue);
5271 }
5272
5273 if (IsVarArg) {
5275 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5276 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5277 MachineFrameInfo &MFI = MF.getFrameInfo();
5278 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5279 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5280
5281 // Offset of the first variable argument from stack pointer, and size of
5282 // the vararg save area. For now, the varargs save area is either zero or
5283 // large enough to hold a0-a7.
5284 int VaArgOffset, VarArgsSaveSize;
5285
5286 // If all registers are allocated, then all varargs must be passed on the
5287 // stack and we don't need to save any argregs.
5288 if (ArgRegs.size() == Idx) {
5289 VaArgOffset = CCInfo.getStackSize();
5290 VarArgsSaveSize = 0;
5291 } else {
5292 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5293 VaArgOffset = -VarArgsSaveSize;
5294 }
5295
5296 // Record the frame index of the first variable argument
5297 // which is a value necessary to VASTART.
5298 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5299 LoongArchFI->setVarArgsFrameIndex(FI);
5300
5301 // If saving an odd number of registers then create an extra stack slot to
5302 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5303 // offsets to even-numbered registered remain 2*GRLen-aligned.
5304 if (Idx % 2) {
5305 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5306 true);
5307 VarArgsSaveSize += GRLenInBytes;
5308 }
5309
5310 // Copy the integer registers that may have been used for passing varargs
5311 // to the vararg save area.
5312 for (unsigned I = Idx; I < ArgRegs.size();
5313 ++I, VaArgOffset += GRLenInBytes) {
5314 const Register Reg = RegInfo.createVirtualRegister(RC);
5315 RegInfo.addLiveIn(ArgRegs[I], Reg);
5316 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5317 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5318 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5319 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5321 cast<StoreSDNode>(Store.getNode())
5322 ->getMemOperand()
5323 ->setValue((Value *)nullptr);
5324 OutChains.push_back(Store);
5325 }
5326 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5327 }
5328
5329 // All stores are grouped in one node to allow the matching between
5330 // the size of Ins and InVals. This only happens for vararg functions.
5331 if (!OutChains.empty()) {
5332 OutChains.push_back(Chain);
5333 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5334 }
5335
5336 return Chain;
5337}
5338
5340 return CI->isTailCall();
5341}
5342
5343// Check if the return value is used as only a return value, as otherwise
5344// we can't perform a tail-call.
5346 SDValue &Chain) const {
5347 if (N->getNumValues() != 1)
5348 return false;
5349 if (!N->hasNUsesOfValue(1, 0))
5350 return false;
5351
5352 SDNode *Copy = *N->user_begin();
5353 if (Copy->getOpcode() != ISD::CopyToReg)
5354 return false;
5355
5356 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5357 // isn't safe to perform a tail call.
5358 if (Copy->getGluedNode())
5359 return false;
5360
5361 // The copy must be used by a LoongArchISD::RET, and nothing else.
5362 bool HasRet = false;
5363 for (SDNode *Node : Copy->users()) {
5364 if (Node->getOpcode() != LoongArchISD::RET)
5365 return false;
5366 HasRet = true;
5367 }
5368
5369 if (!HasRet)
5370 return false;
5371
5372 Chain = Copy->getOperand(0);
5373 return true;
5374}
5375
5376// Check whether the call is eligible for tail call optimization.
5377bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5378 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5379 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5380
5381 auto CalleeCC = CLI.CallConv;
5382 auto &Outs = CLI.Outs;
5383 auto &Caller = MF.getFunction();
5384 auto CallerCC = Caller.getCallingConv();
5385
5386 // Do not tail call opt if the stack is used to pass parameters.
5387 if (CCInfo.getStackSize() != 0)
5388 return false;
5389
5390 // Do not tail call opt if any parameters need to be passed indirectly.
5391 for (auto &VA : ArgLocs)
5392 if (VA.getLocInfo() == CCValAssign::Indirect)
5393 return false;
5394
5395 // Do not tail call opt if either caller or callee uses struct return
5396 // semantics.
5397 auto IsCallerStructRet = Caller.hasStructRetAttr();
5398 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5399 if (IsCallerStructRet || IsCalleeStructRet)
5400 return false;
5401
5402 // Do not tail call opt if either the callee or caller has a byval argument.
5403 for (auto &Arg : Outs)
5404 if (Arg.Flags.isByVal())
5405 return false;
5406
5407 // The callee has to preserve all registers the caller needs to preserve.
5408 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5409 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5410 if (CalleeCC != CallerCC) {
5411 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5412 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5413 return false;
5414 }
5415 return true;
5416}
5417
5419 return DAG.getDataLayout().getPrefTypeAlign(
5420 VT.getTypeForEVT(*DAG.getContext()));
5421}
5422
5423// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5424// and output parameter nodes.
5425SDValue
5427 SmallVectorImpl<SDValue> &InVals) const {
5428 SelectionDAG &DAG = CLI.DAG;
5429 SDLoc &DL = CLI.DL;
5431 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5433 SDValue Chain = CLI.Chain;
5434 SDValue Callee = CLI.Callee;
5435 CallingConv::ID CallConv = CLI.CallConv;
5436 bool IsVarArg = CLI.IsVarArg;
5437 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5438 MVT GRLenVT = Subtarget.getGRLenVT();
5439 bool &IsTailCall = CLI.IsTailCall;
5440
5442
5443 // Analyze the operands of the call, assigning locations to each operand.
5445 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5446
5447 if (CallConv == CallingConv::GHC)
5448 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5449 else
5450 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5451
5452 // Check if it's really possible to do a tail call.
5453 if (IsTailCall)
5454 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5455
5456 if (IsTailCall)
5457 ++NumTailCalls;
5458 else if (CLI.CB && CLI.CB->isMustTailCall())
5459 report_fatal_error("failed to perform tail call elimination on a call "
5460 "site marked musttail");
5461
5462 // Get a count of how many bytes are to be pushed on the stack.
5463 unsigned NumBytes = ArgCCInfo.getStackSize();
5464
5465 // Create local copies for byval args.
5466 SmallVector<SDValue> ByValArgs;
5467 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5468 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5469 if (!Flags.isByVal())
5470 continue;
5471
5472 SDValue Arg = OutVals[i];
5473 unsigned Size = Flags.getByValSize();
5474 Align Alignment = Flags.getNonZeroByValAlign();
5475
5476 int FI =
5477 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5478 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5479 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5480
5481 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5482 /*IsVolatile=*/false,
5483 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5485 ByValArgs.push_back(FIPtr);
5486 }
5487
5488 if (!IsTailCall)
5489 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5490
5491 // Copy argument values to their designated locations.
5493 SmallVector<SDValue> MemOpChains;
5494 SDValue StackPtr;
5495 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5496 CCValAssign &VA = ArgLocs[i];
5497 SDValue ArgValue = OutVals[i];
5498 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5499
5500 // Promote the value if needed.
5501 // For now, only handle fully promoted and indirect arguments.
5502 if (VA.getLocInfo() == CCValAssign::Indirect) {
5503 // Store the argument in a stack slot and pass its address.
5504 Align StackAlign =
5505 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5506 getPrefTypeAlign(ArgValue.getValueType(), DAG));
5507 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5508 // If the original argument was split and passed by reference, we need to
5509 // store the required parts of it here (and pass just one address).
5510 unsigned ArgIndex = Outs[i].OrigArgIndex;
5511 unsigned ArgPartOffset = Outs[i].PartOffset;
5512 assert(ArgPartOffset == 0);
5513 // Calculate the total size to store. We don't have access to what we're
5514 // actually storing other than performing the loop and collecting the
5515 // info.
5517 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5518 SDValue PartValue = OutVals[i + 1];
5519 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5520 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5521 EVT PartVT = PartValue.getValueType();
5522
5523 StoredSize += PartVT.getStoreSize();
5524 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5525 Parts.push_back(std::make_pair(PartValue, Offset));
5526 ++i;
5527 }
5528 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5529 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5530 MemOpChains.push_back(
5531 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5533 for (const auto &Part : Parts) {
5534 SDValue PartValue = Part.first;
5535 SDValue PartOffset = Part.second;
5537 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5538 MemOpChains.push_back(
5539 DAG.getStore(Chain, DL, PartValue, Address,
5541 }
5542 ArgValue = SpillSlot;
5543 } else {
5544 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5545 }
5546
5547 // Use local copy if it is a byval arg.
5548 if (Flags.isByVal())
5549 ArgValue = ByValArgs[j++];
5550
5551 if (VA.isRegLoc()) {
5552 // Queue up the argument copies and emit them at the end.
5553 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5554 } else {
5555 assert(VA.isMemLoc() && "Argument not register or memory");
5556 assert(!IsTailCall && "Tail call not allowed if stack is used "
5557 "for passing parameters");
5558
5559 // Work out the address of the stack slot.
5560 if (!StackPtr.getNode())
5561 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5563 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5565
5566 // Emit the store.
5567 MemOpChains.push_back(
5568 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5569 }
5570 }
5571
5572 // Join the stores, which are independent of one another.
5573 if (!MemOpChains.empty())
5574 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5575
5576 SDValue Glue;
5577
5578 // Build a sequence of copy-to-reg nodes, chained and glued together.
5579 for (auto &Reg : RegsToPass) {
5580 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5581 Glue = Chain.getValue(1);
5582 }
5583
5584 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5585 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5586 // split it and then direct call can be matched by PseudoCALL.
5587 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5588 const GlobalValue *GV = S->getGlobal();
5589 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5592 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5593 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5594 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5597 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5598 }
5599
5600 // The first call operand is the chain and the second is the target address.
5602 Ops.push_back(Chain);
5603 Ops.push_back(Callee);
5604
5605 // Add argument registers to the end of the list so that they are
5606 // known live into the call.
5607 for (auto &Reg : RegsToPass)
5608 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5609
5610 if (!IsTailCall) {
5611 // Add a register mask operand representing the call-preserved registers.
5612 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5613 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5614 assert(Mask && "Missing call preserved mask for calling convention");
5615 Ops.push_back(DAG.getRegisterMask(Mask));
5616 }
5617
5618 // Glue the call to the argument copies, if any.
5619 if (Glue.getNode())
5620 Ops.push_back(Glue);
5621
5622 // Emit the call.
5623 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5624 unsigned Op;
5625 switch (DAG.getTarget().getCodeModel()) {
5626 default:
5627 report_fatal_error("Unsupported code model");
5628 case CodeModel::Small:
5629 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5630 break;
5631 case CodeModel::Medium:
5632 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5634 break;
5635 case CodeModel::Large:
5636 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5638 break;
5639 }
5640
5641 if (IsTailCall) {
5643 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5644 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5645 return Ret;
5646 }
5647
5648 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5649 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5650 Glue = Chain.getValue(1);
5651
5652 // Mark the end of the call, which is glued to the call itself.
5653 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5654 Glue = Chain.getValue(1);
5655
5656 // Assign locations to each value returned by this call.
5658 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5659 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5660
5661 // Copy all of the result registers out of their specified physreg.
5662 for (auto &VA : RVLocs) {
5663 // Copy the value out.
5664 SDValue RetValue =
5665 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5666 // Glue the RetValue to the end of the call sequence.
5667 Chain = RetValue.getValue(1);
5668 Glue = RetValue.getValue(2);
5669
5670 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5671
5672 InVals.push_back(RetValue);
5673 }
5674
5675 return Chain;
5676}
5677
5679 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5680 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
5681 const Type *RetTy) const {
5683 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5684
5685 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5686 LoongArchABI::ABI ABI =
5687 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5688 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5689 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5690 nullptr))
5691 return false;
5692 }
5693 return true;
5694}
5695
5697 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5699 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5700 SelectionDAG &DAG) const {
5701 // Stores the assignment of the return value to a location.
5703
5704 // Info about the registers and stack slot.
5705 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5706 *DAG.getContext());
5707
5708 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5709 nullptr, CC_LoongArch);
5710 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5711 report_fatal_error("GHC functions return void only");
5712 SDValue Glue;
5713 SmallVector<SDValue, 4> RetOps(1, Chain);
5714
5715 // Copy the result values into the output registers.
5716 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5717 CCValAssign &VA = RVLocs[i];
5718 assert(VA.isRegLoc() && "Can only return in registers!");
5719
5720 // Handle a 'normal' return.
5721 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5722 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5723
5724 // Guarantee that all emitted copies are stuck together.
5725 Glue = Chain.getValue(1);
5726 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5727 }
5728
5729 RetOps[0] = Chain; // Update chain.
5730
5731 // Add the glue node if we have it.
5732 if (Glue.getNode())
5733 RetOps.push_back(Glue);
5734
5735 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5736}
5737
5739 EVT VT) const {
5740 if (!Subtarget.hasExtLSX())
5741 return false;
5742
5743 if (VT == MVT::f32) {
5744 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
5745 return (masked == 0x3e000000 || masked == 0x40000000);
5746 }
5747
5748 if (VT == MVT::f64) {
5749 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
5750 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
5751 }
5752
5753 return false;
5754}
5755
5756bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5757 bool ForCodeSize) const {
5758 // TODO: Maybe need more checks here after vector extension is supported.
5759 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5760 return false;
5761 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5762 return false;
5763 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
5764}
5765
5767 return true;
5768}
5769
5771 return true;
5772}
5773
5774bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5775 const Instruction *I) const {
5776 if (!Subtarget.is64Bit())
5777 return isa<LoadInst>(I) || isa<StoreInst>(I);
5778
5779 if (isa<LoadInst>(I))
5780 return true;
5781
5782 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5783 // require fences beacuse we can use amswap_db.[w/d].
5784 Type *Ty = I->getOperand(0)->getType();
5785 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5786 unsigned Size = Ty->getIntegerBitWidth();
5787 return (Size == 8 || Size == 16);
5788 }
5789
5790 return false;
5791}
5792
5794 LLVMContext &Context,
5795 EVT VT) const {
5796 if (!VT.isVector())
5797 return getPointerTy(DL);
5799}
5800
5802 // TODO: Support vectors.
5803 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5804}
5805
5807 const CallInst &I,
5808 MachineFunction &MF,
5809 unsigned Intrinsic) const {
5810 switch (Intrinsic) {
5811 default:
5812 return false;
5813 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5814 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5815 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5816 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5818 Info.memVT = MVT::i32;
5819 Info.ptrVal = I.getArgOperand(0);
5820 Info.offset = 0;
5821 Info.align = Align(4);
5824 return true;
5825 // TODO: Add more Intrinsics later.
5826 }
5827}
5828
5829// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
5830// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
5831// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
5832// regression, we need to implement it manually.
5835
5837 Op == AtomicRMWInst::And) &&
5838 "Unable to expand");
5839 unsigned MinWordSize = 4;
5840
5841 IRBuilder<> Builder(AI);
5842 LLVMContext &Ctx = Builder.getContext();
5843 const DataLayout &DL = AI->getDataLayout();
5844 Type *ValueType = AI->getType();
5845 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
5846
5847 Value *Addr = AI->getPointerOperand();
5848 PointerType *PtrTy = cast<PointerType>(Addr->getType());
5849 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
5850
5851 Value *AlignedAddr = Builder.CreateIntrinsic(
5852 Intrinsic::ptrmask, {PtrTy, IntTy},
5853 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
5854 "AlignedAddr");
5855
5856 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
5857 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
5858 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
5859 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
5860 Value *Mask = Builder.CreateShl(
5861 ConstantInt::get(WordType,
5862 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
5863 ShiftAmt, "Mask");
5864 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
5865 Value *ValOperand_Shifted =
5866 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
5867 ShiftAmt, "ValOperand_Shifted");
5868 Value *NewOperand;
5869 if (Op == AtomicRMWInst::And)
5870 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
5871 else
5872 NewOperand = ValOperand_Shifted;
5873
5874 AtomicRMWInst *NewAI =
5875 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
5876 AI->getOrdering(), AI->getSyncScopeID());
5877
5878 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
5879 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
5880 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
5881 AI->replaceAllUsesWith(FinalOldResult);
5882 AI->eraseFromParent();
5883}
5884
5887 // TODO: Add more AtomicRMWInst that needs to be extended.
5888
5889 // Since floating-point operation requires a non-trivial set of data
5890 // operations, use CmpXChg to expand.
5891 if (AI->isFloatingPointOperation() ||
5897
5898 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
5901 AI->getOperation() == AtomicRMWInst::Sub)) {
5903 }
5904
5905 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5906 if (Subtarget.hasLAMCAS()) {
5907 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
5911 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
5913 }
5914
5915 if (Size == 8 || Size == 16)
5918}
5919
5920static Intrinsic::ID
5922 AtomicRMWInst::BinOp BinOp) {
5923 if (GRLen == 64) {
5924 switch (BinOp) {
5925 default:
5926 llvm_unreachable("Unexpected AtomicRMW BinOp");
5928 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5929 case AtomicRMWInst::Add:
5930 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5931 case AtomicRMWInst::Sub:
5932 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5934 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5936 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5938 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5939 case AtomicRMWInst::Max:
5940 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5941 case AtomicRMWInst::Min:
5942 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5943 // TODO: support other AtomicRMWInst.
5944 }
5945 }
5946
5947 if (GRLen == 32) {
5948 switch (BinOp) {
5949 default:
5950 llvm_unreachable("Unexpected AtomicRMW BinOp");
5952 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5953 case AtomicRMWInst::Add:
5954 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5955 case AtomicRMWInst::Sub:
5956 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5958 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5959 // TODO: support other AtomicRMWInst.
5960 }
5961 }
5962
5963 llvm_unreachable("Unexpected GRLen\n");
5964}
5965
5968 AtomicCmpXchgInst *CI) const {
5969
5970 if (Subtarget.hasLAMCAS())
5972
5974 if (Size == 8 || Size == 16)
5977}
5978
5980 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5981 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5982 AtomicOrdering FailOrd = CI->getFailureOrdering();
5983 Value *FailureOrdering =
5984 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
5985
5986 // TODO: Support cmpxchg on LA32.
5987 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5988 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5989 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5990 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5991 Type *Tys[] = {AlignedAddr->getType()};
5992 Value *Result = Builder.CreateIntrinsic(
5993 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5994 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5995 return Result;
5996}
5997
5999 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
6000 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
6001 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
6002 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
6003 // mask, as this produces better code than the LL/SC loop emitted by
6004 // int_loongarch_masked_atomicrmw_xchg.
6005 if (AI->getOperation() == AtomicRMWInst::Xchg &&
6006 isa<ConstantInt>(AI->getValOperand())) {
6007 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
6008 if (CVal->isZero())
6009 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
6010 Builder.CreateNot(Mask, "Inv_Mask"),
6011 AI->getAlign(), Ord);
6012 if (CVal->isMinusOne())
6013 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
6014 AI->getAlign(), Ord);
6015 }
6016
6017 unsigned GRLen = Subtarget.getGRLen();
6018 Value *Ordering =
6019 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
6020 Type *Tys[] = {AlignedAddr->getType()};
6022 AI->getModule(),
6024
6025 if (GRLen == 64) {
6026 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
6027 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6028 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
6029 }
6030
6031 Value *Result;
6032
6033 // Must pass the shift amount needed to sign extend the loaded value prior
6034 // to performing a signed comparison for min/max. ShiftAmt is the number of
6035 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
6036 // is the number of bits to left+right shift the value in order to
6037 // sign-extend.
6038 if (AI->getOperation() == AtomicRMWInst::Min ||
6040 const DataLayout &DL = AI->getDataLayout();
6041 unsigned ValWidth =
6042 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
6043 Value *SextShamt =
6044 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
6045 Result = Builder.CreateCall(LlwOpScwLoop,
6046 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
6047 } else {
6048 Result =
6049 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
6050 }
6051
6052 if (GRLen == 64)
6053 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6054 return Result;
6055}
6056
6058 const MachineFunction &MF, EVT VT) const {
6059 VT = VT.getScalarType();
6060
6061 if (!VT.isSimple())
6062 return false;
6063
6064 switch (VT.getSimpleVT().SimpleTy) {
6065 case MVT::f32:
6066 case MVT::f64:
6067 return true;
6068 default:
6069 break;
6070 }
6071
6072 return false;
6073}
6074
6076 const Constant *PersonalityFn) const {
6077 return LoongArch::R4;
6078}
6079
6081 const Constant *PersonalityFn) const {
6082 return LoongArch::R5;
6083}
6084
6085//===----------------------------------------------------------------------===//
6086// Target Optimization Hooks
6087//===----------------------------------------------------------------------===//
6088
6090 const LoongArchSubtarget &Subtarget) {
6091 // Feature FRECIPE instrucions relative accuracy is 2^-14.
6092 // IEEE float has 23 digits and double has 52 digits.
6093 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
6094 return RefinementSteps;
6095}
6096
6098 SelectionDAG &DAG, int Enabled,
6099 int &RefinementSteps,
6100 bool &UseOneConstNR,
6101 bool Reciprocal) const {
6102 if (Subtarget.hasFrecipe()) {
6103 SDLoc DL(Operand);
6104 EVT VT = Operand.getValueType();
6105
6106 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6107 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6108 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6109 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6110 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6111
6112 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6113 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6114
6115 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
6116 if (Reciprocal)
6117 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
6118
6119 return Estimate;
6120 }
6121 }
6122
6123 return SDValue();
6124}
6125
6127 SelectionDAG &DAG,
6128 int Enabled,
6129 int &RefinementSteps) const {
6130 if (Subtarget.hasFrecipe()) {
6131 SDLoc DL(Operand);
6132 EVT VT = Operand.getValueType();
6133
6134 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6135 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6136 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6137 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6138 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6139
6140 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6141 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6142
6143 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
6144 }
6145 }
6146
6147 return SDValue();
6148}
6149
6150//===----------------------------------------------------------------------===//
6151// LoongArch Inline Assembly Support
6152//===----------------------------------------------------------------------===//
6153
6155LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
6156 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
6157 //
6158 // 'f': A floating-point register (if available).
6159 // 'k': A memory operand whose address is formed by a base register and
6160 // (optionally scaled) index register.
6161 // 'l': A signed 16-bit constant.
6162 // 'm': A memory operand whose address is formed by a base register and
6163 // offset that is suitable for use in instructions with the same
6164 // addressing mode as st.w and ld.w.
6165 // 'I': A signed 12-bit constant (for arithmetic instructions).
6166 // 'J': Integer zero.
6167 // 'K': An unsigned 12-bit constant (for logic instructions).
6168 // "ZB": An address that is held in a general-purpose register. The offset is
6169 // zero.
6170 // "ZC": A memory operand whose address is formed by a base register and
6171 // offset that is suitable for use in instructions with the same
6172 // addressing mode as ll.w and sc.w.
6173 if (Constraint.size() == 1) {
6174 switch (Constraint[0]) {
6175 default:
6176 break;
6177 case 'f':
6178 return C_RegisterClass;
6179 case 'l':
6180 case 'I':
6181 case 'J':
6182 case 'K':
6183 return C_Immediate;
6184 case 'k':
6185 return C_Memory;
6186 }
6187 }
6188
6189 if (Constraint == "ZC" || Constraint == "ZB")
6190 return C_Memory;
6191
6192 // 'm' is handled here.
6193 return TargetLowering::getConstraintType(Constraint);
6194}
6195
6196InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
6197 StringRef ConstraintCode) const {
6198 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
6203}
6204
6205std::pair<unsigned, const TargetRegisterClass *>
6206LoongArchTargetLowering::getRegForInlineAsmConstraint(
6207 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
6208 // First, see if this is a constraint that directly corresponds to a LoongArch
6209 // register class.
6210 if (Constraint.size() == 1) {
6211 switch (Constraint[0]) {
6212 case 'r':
6213 // TODO: Support fixed vectors up to GRLen?
6214 if (VT.isVector())
6215 break;
6216 return std::make_pair(0U, &LoongArch::GPRRegClass);
6217 case 'f':
6218 if (Subtarget.hasBasicF() && VT == MVT::f32)
6219 return std::make_pair(0U, &LoongArch::FPR32RegClass);
6220 if (Subtarget.hasBasicD() && VT == MVT::f64)
6221 return std::make_pair(0U, &LoongArch::FPR64RegClass);
6222 if (Subtarget.hasExtLSX() &&
6223 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
6224 return std::make_pair(0U, &LoongArch::LSX128RegClass);
6225 if (Subtarget.hasExtLASX() &&
6226 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
6227 return std::make_pair(0U, &LoongArch::LASX256RegClass);
6228 break;
6229 default:
6230 break;
6231 }
6232 }
6233
6234 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
6235 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
6236 // constraints while the official register name is prefixed with a '$'. So we
6237 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
6238 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
6239 // case insensitive, so no need to convert the constraint to upper case here.
6240 //
6241 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
6242 // decode the usage of register name aliases into their official names. And
6243 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
6244 // official register names.
6245 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
6246 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
6247 bool IsFP = Constraint[2] == 'f';
6248 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
6249 std::pair<unsigned, const TargetRegisterClass *> R;
6251 TRI, join_items("", Temp.first, Temp.second), VT);
6252 // Match those names to the widest floating point register type available.
6253 if (IsFP) {
6254 unsigned RegNo = R.first;
6255 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
6256 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
6257 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
6258 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
6259 }
6260 }
6261 }
6262 return R;
6263 }
6264
6265 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
6266}
6267
6268void LoongArchTargetLowering::LowerAsmOperandForConstraint(
6269 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
6270 SelectionDAG &DAG) const {
6271 // Currently only support length 1 constraints.
6272 if (Constraint.size() == 1) {
6273 switch (Constraint[0]) {
6274 case 'l':
6275 // Validate & create a 16-bit signed immediate operand.
6276 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6277 uint64_t CVal = C->getSExtValue();
6278 if (isInt<16>(CVal))
6279 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6280 Subtarget.getGRLenVT()));
6281 }
6282 return;
6283 case 'I':
6284 // Validate & create a 12-bit signed immediate operand.
6285 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6286 uint64_t CVal = C->getSExtValue();
6287 if (isInt<12>(CVal))
6288 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6289 Subtarget.getGRLenVT()));
6290 }
6291 return;
6292 case 'J':
6293 // Validate & create an integer zero operand.
6294 if (auto *C = dyn_cast<ConstantSDNode>(Op))
6295 if (C->getZExtValue() == 0)
6296 Ops.push_back(
6297 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
6298 return;
6299 case 'K':
6300 // Validate & create a 12-bit unsigned immediate operand.
6301 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6302 uint64_t CVal = C->getZExtValue();
6303 if (isUInt<12>(CVal))
6304 Ops.push_back(
6305 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
6306 }
6307 return;
6308 default:
6309 break;
6310 }
6311 }
6312 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6313}
6314
6315#define GET_REGISTER_MATCHER
6316#include "LoongArchGenAsmMatcher.inc"
6317
6320 const MachineFunction &MF) const {
6321 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6322 std::string NewRegName = Name.second.str();
6323 Register Reg = MatchRegisterAltName(NewRegName);
6324 if (Reg == LoongArch::NoRegister)
6325 Reg = MatchRegisterName(NewRegName);
6326 if (Reg == LoongArch::NoRegister)
6328 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6329 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6330 if (!ReservedRegs.test(Reg))
6331 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6332 StringRef(RegName) + "\"."));
6333 return Reg;
6334}
6335
6337 EVT VT, SDValue C) const {
6338 // TODO: Support vectors.
6339 if (!VT.isScalarInteger())
6340 return false;
6341
6342 // Omit the optimization if the data size exceeds GRLen.
6343 if (VT.getSizeInBits() > Subtarget.getGRLen())
6344 return false;
6345
6346 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6347 const APInt &Imm = ConstNode->getAPIntValue();
6348 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6349 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6350 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6351 return true;
6352 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6353 if (ConstNode->hasOneUse() &&
6354 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6355 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6356 return true;
6357 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6358 // in which the immediate has two set bits. Or Break (MUL x, imm)
6359 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6360 // equals to (1 << s0) - (1 << s1).
6361 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6362 unsigned Shifts = Imm.countr_zero();
6363 // Reject immediates which can be composed via a single LUI.
6364 if (Shifts >= 12)
6365 return false;
6366 // Reject multiplications can be optimized to
6367 // (SLLI (ALSL x, x, 1/2/3/4), s).
6368 APInt ImmPop = Imm.ashr(Shifts);
6369 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6370 return false;
6371 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6372 // since it needs one more instruction than other 3 cases.
6373 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6374 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6375 (ImmSmall - Imm).isPowerOf2())
6376 return true;
6377 }
6378 }
6379
6380 return false;
6381}
6382
6384 const AddrMode &AM,
6385 Type *Ty, unsigned AS,
6386 Instruction *I) const {
6387 // LoongArch has four basic addressing modes:
6388 // 1. reg
6389 // 2. reg + 12-bit signed offset
6390 // 3. reg + 14-bit signed offset left-shifted by 2
6391 // 4. reg1 + reg2
6392 // TODO: Add more checks after support vector extension.
6393
6394 // No global is ever allowed as a base.
6395 if (AM.BaseGV)
6396 return false;
6397
6398 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6399 // with `UAL` feature.
6400 if (!isInt<12>(AM.BaseOffs) &&
6401 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6402 return false;
6403
6404 switch (AM.Scale) {
6405 case 0:
6406 // "r+i" or just "i", depending on HasBaseReg.
6407 break;
6408 case 1:
6409 // "r+r+i" is not allowed.
6410 if (AM.HasBaseReg && AM.BaseOffs)
6411 return false;
6412 // Otherwise we have "r+r" or "r+i".
6413 break;
6414 case 2:
6415 // "2*r+r" or "2*r+i" is not allowed.
6416 if (AM.HasBaseReg || AM.BaseOffs)
6417 return false;
6418 // Allow "2*r" as "r+r".
6419 break;
6420 default:
6421 return false;
6422 }
6423
6424 return true;
6425}
6426
6428 return isInt<12>(Imm);
6429}
6430
6432 return isInt<12>(Imm);
6433}
6434
6436 // Zexts are free if they can be combined with a load.
6437 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6438 // poorly with type legalization of compares preferring sext.
6439 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6440 EVT MemVT = LD->getMemoryVT();
6441 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6442 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6443 LD->getExtensionType() == ISD::ZEXTLOAD))
6444 return true;
6445 }
6446
6447 return TargetLowering::isZExtFree(Val, VT2);
6448}
6449
6451 EVT DstVT) const {
6452 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6453}
6454
6456 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6457}
6458
6460 // TODO: Support vectors.
6461 if (Y.getValueType().isVector())
6462 return false;
6463
6464 return !isa<ConstantSDNode>(Y);
6465}
6466
6468 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
6469 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
6470}
6471
6473 Type *Ty, bool IsSigned) const {
6474 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
6475 return true;
6476
6477 return IsSigned;
6478}
6479
6481 // Return false to suppress the unnecessary extensions if the LibCall
6482 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6483 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6484 Type.getSizeInBits() < Subtarget.getGRLen()))
6485 return false;
6486 return true;
6487}
6488
6489// memcpy, and other memory intrinsics, typically tries to use wider load/store
6490// if the source/dest is aligned and the copy size is large enough. We therefore
6491// want to align such objects passed to memory intrinsics.
6493 unsigned &MinSize,
6494 Align &PrefAlign) const {
6495 if (!isa<MemIntrinsic>(CI))
6496 return false;
6497
6498 if (Subtarget.is64Bit()) {
6499 MinSize = 8;
6500 PrefAlign = Align(8);
6501 } else {
6502 MinSize = 4;
6503 PrefAlign = Align(4);
6504 }
6505
6506 return true;
6507}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
const MCPhysReg ArgFPR32s[]
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:594
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
Value * getPointerOperand()
Definition: Instructions.h:870
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:861
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool isDSOLocal() const
Definition: GlobalValue.h:306
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1480
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:516
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1459
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
LLVMContext & getContext() const
Definition: IRBuilder.h:195
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1518
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2142
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
Class to represent pointers.
Definition: DerivedTypes.h:670
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:703
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:801
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:760
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:827
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:712
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:578
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ Entry
Definition: COFF.h:844
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1494
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:287
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
@ None
Definition: CodeGenData.h:106
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:195
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)