LLVM 20.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/IntrinsicsLoongArch.h"
31#include "llvm/Support/Debug.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "loongarch-isel-lowering"
39
40STATISTIC(NumTailCalls, "Number of tail calls");
41
42static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
43 cl::desc("Trap on integer division by zero."),
44 cl::init(false));
45
47 const LoongArchSubtarget &STI)
48 : TargetLowering(TM), Subtarget(STI) {
49
50 MVT GRLenVT = Subtarget.getGRLenVT();
51
52 // Set up the register classes.
53
54 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
55 if (Subtarget.hasBasicF())
56 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
57 if (Subtarget.hasBasicD())
58 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
59
60 static const MVT::SimpleValueType LSXVTs[] = {
61 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
62 static const MVT::SimpleValueType LASXVTs[] = {
63 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
64
65 if (Subtarget.hasExtLSX())
66 for (MVT VT : LSXVTs)
67 addRegisterClass(VT, &LoongArch::LSX128RegClass);
68
69 if (Subtarget.hasExtLASX())
70 for (MVT VT : LASXVTs)
71 addRegisterClass(VT, &LoongArch::LASX256RegClass);
72
73 // Set operations for LA32 and LA64.
74
76 MVT::i1, Promote);
77
84
87 GRLenVT, Custom);
88
90
95
98
102
103 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
104 // we get to know which of sll and revb.2h is faster.
107
108 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
109 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
110 // and i32 could still be byte-swapped relatively cheaply.
112
118
121
122 // Set operations for LA64 only.
123
124 if (Subtarget.is64Bit()) {
142
146 }
147
148 // Set operations for LA32 only.
149
150 if (!Subtarget.is64Bit()) {
156 }
157
159
160 static const ISD::CondCode FPCCToExpand[] = {
163
164 // Set operations for 'F' feature.
165
166 if (Subtarget.hasBasicF()) {
167 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
168 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
169 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
170
186
187 if (Subtarget.is64Bit())
189
190 if (!Subtarget.hasBasicD()) {
192 if (Subtarget.is64Bit()) {
195 }
196 }
197 }
198
199 // Set operations for 'D' feature.
200
201 if (Subtarget.hasBasicD()) {
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
203 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
204 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
205 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
206 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
207
223
224 if (Subtarget.is64Bit())
226 }
227
228 // Set operations for 'LSX' feature.
229
230 if (Subtarget.hasExtLSX()) {
232 // Expand all truncating stores and extending loads.
233 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
234 setTruncStoreAction(VT, InnerVT, Expand);
237 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
238 }
239 // By default everything must be expanded. Then we will selectively turn
240 // on ones that can be effectively codegen'd.
241 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
243 }
244
245 for (MVT VT : LSXVTs) {
249
253
257 }
258 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
261 Legal);
263 VT, Legal);
270 Expand);
271 }
272 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
275 }
276 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
284 VT, Expand);
285 }
286 }
287
288 // Set operations for 'LASX' feature.
289
290 if (Subtarget.hasExtLASX()) {
291 for (MVT VT : LASXVTs) {
295
299
303 }
304 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
307 Legal);
309 VT, Legal);
316 Expand);
317 }
318 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
321 }
322 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
330 VT, Expand);
331 }
332 }
333
334 // Set DAG combine for LA32 and LA64.
335
340
341 // Set DAG combine for 'LSX' feature.
342
343 if (Subtarget.hasExtLSX())
345
346 // Compute derived properties from the register classes.
348
350
353
355
357
358 // Function alignments.
360 // Set preferred alignments.
364}
365
367 const GlobalAddressSDNode *GA) const {
368 // In order to maximise the opportunity for common subexpression elimination,
369 // keep a separate ADD node for the global address offset instead of folding
370 // it in the global address node. Later peephole optimisations may choose to
371 // fold it back in when profitable.
372 return false;
373}
374
376 SelectionDAG &DAG) const {
377 switch (Op.getOpcode()) {
379 return lowerATOMIC_FENCE(Op, DAG);
381 return lowerEH_DWARF_CFA(Op, DAG);
383 return lowerGlobalAddress(Op, DAG);
385 return lowerGlobalTLSAddress(Op, DAG);
387 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
389 return lowerINTRINSIC_W_CHAIN(Op, DAG);
391 return lowerINTRINSIC_VOID(Op, DAG);
393 return lowerBlockAddress(Op, DAG);
394 case ISD::JumpTable:
395 return lowerJumpTable(Op, DAG);
396 case ISD::SHL_PARTS:
397 return lowerShiftLeftParts(Op, DAG);
398 case ISD::SRA_PARTS:
399 return lowerShiftRightParts(Op, DAG, true);
400 case ISD::SRL_PARTS:
401 return lowerShiftRightParts(Op, DAG, false);
403 return lowerConstantPool(Op, DAG);
404 case ISD::FP_TO_SINT:
405 return lowerFP_TO_SINT(Op, DAG);
406 case ISD::BITCAST:
407 return lowerBITCAST(Op, DAG);
408 case ISD::UINT_TO_FP:
409 return lowerUINT_TO_FP(Op, DAG);
410 case ISD::SINT_TO_FP:
411 return lowerSINT_TO_FP(Op, DAG);
412 case ISD::VASTART:
413 return lowerVASTART(Op, DAG);
414 case ISD::FRAMEADDR:
415 return lowerFRAMEADDR(Op, DAG);
416 case ISD::RETURNADDR:
417 return lowerRETURNADDR(Op, DAG);
419 return lowerWRITE_REGISTER(Op, DAG);
421 return lowerINSERT_VECTOR_ELT(Op, DAG);
423 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
425 return lowerBUILD_VECTOR(Op, DAG);
427 return lowerVECTOR_SHUFFLE(Op, DAG);
428 }
429 return SDValue();
430}
431
432/// Determine whether a range fits a regular pattern of values.
433/// This function accounts for the possibility of jumping over the End iterator.
434template <typename ValType>
435static bool
437 unsigned CheckStride,
439 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
440 auto &I = Begin;
441
442 while (I != End) {
443 if (*I != -1 && *I != ExpectedIndex)
444 return false;
445 ExpectedIndex += ExpectedIndexStride;
446
447 // Incrementing past End is undefined behaviour so we must increment one
448 // step at a time and check for End at each step.
449 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
450 ; // Empty loop body.
451 }
452 return true;
453}
454
455/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
456///
457/// VREPLVEI performs vector broadcast based on an element specified by an
458/// integer immediate, with its mask being similar to:
459/// <x, x, x, ...>
460/// where x is any valid index.
461///
462/// When undef's appear in the mask they are treated as if they were whatever
463/// value is necessary in order to fit the above form.
465 MVT VT, SDValue V1, SDValue V2,
466 SelectionDAG &DAG) {
467 int SplatIndex = -1;
468 for (const auto &M : Mask) {
469 if (M != -1) {
470 SplatIndex = M;
471 break;
472 }
473 }
474
475 if (SplatIndex == -1)
476 return DAG.getUNDEF(VT);
477
478 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
479 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
480 APInt Imm(64, SplatIndex);
481 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
482 DAG.getConstant(Imm, DL, MVT::i64));
483 }
484
485 return SDValue();
486}
487
488/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
489///
490/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
491/// elements according to a <4 x i2> constant (encoded as an integer immediate).
492///
493/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
494/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
495/// When undef's appear they are treated as if they were whatever value is
496/// necessary in order to fit the above forms.
497///
498/// For example:
499/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
500/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
501/// i32 7, i32 6, i32 5, i32 4>
502/// is lowered to:
503/// (VSHUF4I_H $v0, $v1, 27)
504/// where the 27 comes from:
505/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
507 MVT VT, SDValue V1, SDValue V2,
508 SelectionDAG &DAG) {
509
510 // When the size is less than 4, lower cost instructions may be used.
511 if (Mask.size() < 4)
512 return SDValue();
513
514 int SubMask[4] = {-1, -1, -1, -1};
515 for (unsigned i = 0; i < 4; ++i) {
516 for (unsigned j = i; j < Mask.size(); j += 4) {
517 int Idx = Mask[j];
518
519 // Convert from vector index to 4-element subvector index
520 // If an index refers to an element outside of the subvector then give up
521 if (Idx != -1) {
522 Idx -= 4 * (j / 4);
523 if (Idx < 0 || Idx >= 4)
524 return SDValue();
525 }
526
527 // If the mask has an undef, replace it with the current index.
528 // Note that it might still be undef if the current index is also undef
529 if (SubMask[i] == -1)
530 SubMask[i] = Idx;
531 // Check that non-undef values are the same as in the mask. If they
532 // aren't then give up
533 else if (Idx != -1 && Idx != SubMask[i])
534 return SDValue();
535 }
536 }
537
538 // Calculate the immediate. Replace any remaining undefs with zero
539 APInt Imm(64, 0);
540 for (int i = 3; i >= 0; --i) {
541 int Idx = SubMask[i];
542
543 if (Idx == -1)
544 Idx = 0;
545
546 Imm <<= 2;
547 Imm |= Idx & 0x3;
548 }
549
550 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
551 DAG.getConstant(Imm, DL, MVT::i64));
552}
553
554/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
555///
556/// VPACKEV interleaves the even elements from each vector.
557///
558/// It is possible to lower into VPACKEV when the mask consists of two of the
559/// following forms interleaved:
560/// <0, 2, 4, ...>
561/// <n, n+2, n+4, ...>
562/// where n is the number of elements in the vector.
563/// For example:
564/// <0, 0, 2, 2, 4, 4, ...>
565/// <0, n, 2, n+2, 4, n+4, ...>
566///
567/// When undef's appear in the mask they are treated as if they were whatever
568/// value is necessary in order to fit the above forms.
570 MVT VT, SDValue V1, SDValue V2,
571 SelectionDAG &DAG) {
572
573 const auto &Begin = Mask.begin();
574 const auto &End = Mask.end();
575 SDValue OriV1 = V1, OriV2 = V2;
576
577 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
578 V1 = OriV1;
579 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
580 V1 = OriV2;
581 else
582 return SDValue();
583
584 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
585 V2 = OriV1;
586 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
587 V2 = OriV2;
588 else
589 return SDValue();
590
591 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
592}
593
594/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
595///
596/// VPACKOD interleaves the odd elements from each vector.
597///
598/// It is possible to lower into VPACKOD when the mask consists of two of the
599/// following forms interleaved:
600/// <1, 3, 5, ...>
601/// <n+1, n+3, n+5, ...>
602/// where n is the number of elements in the vector.
603/// For example:
604/// <1, 1, 3, 3, 5, 5, ...>
605/// <1, n+1, 3, n+3, 5, n+5, ...>
606///
607/// When undef's appear in the mask they are treated as if they were whatever
608/// value is necessary in order to fit the above forms.
610 MVT VT, SDValue V1, SDValue V2,
611 SelectionDAG &DAG) {
612
613 const auto &Begin = Mask.begin();
614 const auto &End = Mask.end();
615 SDValue OriV1 = V1, OriV2 = V2;
616
617 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
618 V1 = OriV1;
619 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
620 V1 = OriV2;
621 else
622 return SDValue();
623
624 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
625 V2 = OriV1;
626 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
627 V2 = OriV2;
628 else
629 return SDValue();
630
631 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
632}
633
634/// Lower VECTOR_SHUFFLE into VILVH (if possible).
635///
636/// VILVH interleaves consecutive elements from the left (highest-indexed) half
637/// of each vector.
638///
639/// It is possible to lower into VILVH when the mask consists of two of the
640/// following forms interleaved:
641/// <x, x+1, x+2, ...>
642/// <n+x, n+x+1, n+x+2, ...>
643/// where n is the number of elements in the vector and x is half n.
644/// For example:
645/// <x, x, x+1, x+1, x+2, x+2, ...>
646/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
647///
648/// When undef's appear in the mask they are treated as if they were whatever
649/// value is necessary in order to fit the above forms.
651 MVT VT, SDValue V1, SDValue V2,
652 SelectionDAG &DAG) {
653
654 const auto &Begin = Mask.begin();
655 const auto &End = Mask.end();
656 unsigned HalfSize = Mask.size() / 2;
657 SDValue OriV1 = V1, OriV2 = V2;
658
659 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
660 V1 = OriV1;
661 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
662 V1 = OriV2;
663 else
664 return SDValue();
665
666 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
667 V2 = OriV1;
668 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
669 1))
670 V2 = OriV2;
671 else
672 return SDValue();
673
674 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
675}
676
677/// Lower VECTOR_SHUFFLE into VILVL (if possible).
678///
679/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
680/// of each vector.
681///
682/// It is possible to lower into VILVL when the mask consists of two of the
683/// following forms interleaved:
684/// <0, 1, 2, ...>
685/// <n, n+1, n+2, ...>
686/// where n is the number of elements in the vector.
687/// For example:
688/// <0, 0, 1, 1, 2, 2, ...>
689/// <0, n, 1, n+1, 2, n+2, ...>
690///
691/// When undef's appear in the mask they are treated as if they were whatever
692/// value is necessary in order to fit the above forms.
694 MVT VT, SDValue V1, SDValue V2,
695 SelectionDAG &DAG) {
696
697 const auto &Begin = Mask.begin();
698 const auto &End = Mask.end();
699 SDValue OriV1 = V1, OriV2 = V2;
700
701 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
702 V1 = OriV1;
703 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
704 V1 = OriV2;
705 else
706 return SDValue();
707
708 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
709 V2 = OriV1;
710 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
711 V2 = OriV2;
712 else
713 return SDValue();
714
715 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
716}
717
718/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
719///
720/// VPICKEV copies the even elements of each vector into the result vector.
721///
722/// It is possible to lower into VPICKEV when the mask consists of two of the
723/// following forms concatenated:
724/// <0, 2, 4, ...>
725/// <n, n+2, n+4, ...>
726/// where n is the number of elements in the vector.
727/// For example:
728/// <0, 2, 4, ..., 0, 2, 4, ...>
729/// <0, 2, 4, ..., n, n+2, n+4, ...>
730///
731/// When undef's appear in the mask they are treated as if they were whatever
732/// value is necessary in order to fit the above forms.
734 MVT VT, SDValue V1, SDValue V2,
735 SelectionDAG &DAG) {
736
737 const auto &Begin = Mask.begin();
738 const auto &Mid = Mask.begin() + Mask.size() / 2;
739 const auto &End = Mask.end();
740 SDValue OriV1 = V1, OriV2 = V2;
741
742 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
743 V1 = OriV1;
744 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
745 V1 = OriV2;
746 else
747 return SDValue();
748
749 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
750 V2 = OriV1;
751 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
752 V2 = OriV2;
753
754 else
755 return SDValue();
756
757 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
758}
759
760/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
761///
762/// VPICKOD copies the odd elements of each vector into the result vector.
763///
764/// It is possible to lower into VPICKOD when the mask consists of two of the
765/// following forms concatenated:
766/// <1, 3, 5, ...>
767/// <n+1, n+3, n+5, ...>
768/// where n is the number of elements in the vector.
769/// For example:
770/// <1, 3, 5, ..., 1, 3, 5, ...>
771/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
772///
773/// When undef's appear in the mask they are treated as if they were whatever
774/// value is necessary in order to fit the above forms.
776 MVT VT, SDValue V1, SDValue V2,
777 SelectionDAG &DAG) {
778
779 const auto &Begin = Mask.begin();
780 const auto &Mid = Mask.begin() + Mask.size() / 2;
781 const auto &End = Mask.end();
782 SDValue OriV1 = V1, OriV2 = V2;
783
784 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
785 V1 = OriV1;
786 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
787 V1 = OriV2;
788 else
789 return SDValue();
790
791 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
792 V2 = OriV1;
793 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
794 V2 = OriV2;
795 else
796 return SDValue();
797
798 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
799}
800
801/// Lower VECTOR_SHUFFLE into VSHUF.
802///
803/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
804/// adding it as an operand to the resulting VSHUF.
806 MVT VT, SDValue V1, SDValue V2,
807 SelectionDAG &DAG) {
808
810 for (auto M : Mask)
811 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
812
813 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
814 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
815
816 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
817 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
818 // VSHF concatenates the vectors in a bitwise fashion:
819 // <0b00, 0b01> + <0b10, 0b11> ->
820 // 0b0100 + 0b1110 -> 0b01001110
821 // <0b10, 0b11, 0b00, 0b01>
822 // We must therefore swap the operands to get the correct result.
823 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
824}
825
826/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
827///
828/// This routine breaks down the specific type of 128-bit shuffle and
829/// dispatches to the lowering routines accordingly.
831 SDValue V1, SDValue V2, SelectionDAG &DAG) {
832 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
833 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
834 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
835 "Vector type is unsupported for lsx!");
836 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
837 "Two operands have different types!");
838 assert(VT.getVectorNumElements() == Mask.size() &&
839 "Unexpected mask size for shuffle!");
840 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
841
842 SDValue Result;
843 // TODO: Add more comparison patterns.
844 if (V2.isUndef()) {
845 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
846 return Result;
847 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
848 return Result;
849
850 // TODO: This comment may be enabled in the future to better match the
851 // pattern for instruction selection.
852 /* V2 = V1; */
853 }
854
855 // It is recommended not to change the pattern comparison order for better
856 // performance.
857 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
858 return Result;
859 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
860 return Result;
861 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
862 return Result;
863 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
864 return Result;
865 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
866 return Result;
867 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
868 return Result;
869 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
870 return Result;
871
872 return SDValue();
873}
874
875/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
876///
877/// It is a XVREPLVEI when the mask is:
878/// <x, x, x, ..., x+n, x+n, x+n, ...>
879/// where the number of x is equal to n and n is half the length of vector.
880///
881/// When undef's appear in the mask they are treated as if they were whatever
882/// value is necessary in order to fit the above form.
884 ArrayRef<int> Mask, MVT VT,
885 SDValue V1, SDValue V2,
886 SelectionDAG &DAG) {
887 int SplatIndex = -1;
888 for (const auto &M : Mask) {
889 if (M != -1) {
890 SplatIndex = M;
891 break;
892 }
893 }
894
895 if (SplatIndex == -1)
896 return DAG.getUNDEF(VT);
897
898 const auto &Begin = Mask.begin();
899 const auto &End = Mask.end();
900 unsigned HalfSize = Mask.size() / 2;
901
902 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
903 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
904 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
905 0)) {
906 APInt Imm(64, SplatIndex);
907 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
908 DAG.getConstant(Imm, DL, MVT::i64));
909 }
910
911 return SDValue();
912}
913
914/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
916 MVT VT, SDValue V1, SDValue V2,
917 SelectionDAG &DAG) {
918 // When the size is less than or equal to 4, lower cost instructions may be
919 // used.
920 if (Mask.size() <= 4)
921 return SDValue();
922 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
923}
924
925/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
927 MVT VT, SDValue V1, SDValue V2,
928 SelectionDAG &DAG) {
929 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
930}
931
932/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
934 MVT VT, SDValue V1, SDValue V2,
935 SelectionDAG &DAG) {
936 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
937}
938
939/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
941 MVT VT, SDValue V1, SDValue V2,
942 SelectionDAG &DAG) {
943
944 const auto &Begin = Mask.begin();
945 const auto &End = Mask.end();
946 unsigned HalfSize = Mask.size() / 2;
947 unsigned LeftSize = HalfSize / 2;
948 SDValue OriV1 = V1, OriV2 = V2;
949
950 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
951 1) &&
952 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
953 V1 = OriV1;
954 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
955 Mask.size() + HalfSize - LeftSize, 1) &&
956 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
957 Mask.size() + HalfSize + LeftSize, 1))
958 V1 = OriV2;
959 else
960 return SDValue();
961
962 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
963 1) &&
964 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
965 1))
966 V2 = OriV1;
967 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
968 Mask.size() + HalfSize - LeftSize, 1) &&
969 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
970 Mask.size() + HalfSize + LeftSize, 1))
971 V2 = OriV2;
972 else
973 return SDValue();
974
975 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
976}
977
978/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
980 MVT VT, SDValue V1, SDValue V2,
981 SelectionDAG &DAG) {
982
983 const auto &Begin = Mask.begin();
984 const auto &End = Mask.end();
985 unsigned HalfSize = Mask.size() / 2;
986 SDValue OriV1 = V1, OriV2 = V2;
987
988 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
989 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
990 V1 = OriV1;
991 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
992 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
993 Mask.size() + HalfSize, 1))
994 V1 = OriV2;
995 else
996 return SDValue();
997
998 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
999 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1000 V2 = OriV1;
1001 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1002 1) &&
1003 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1004 Mask.size() + HalfSize, 1))
1005 V2 = OriV2;
1006 else
1007 return SDValue();
1008
1009 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1010}
1011
1012/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1014 MVT VT, SDValue V1, SDValue V2,
1015 SelectionDAG &DAG) {
1016
1017 const auto &Begin = Mask.begin();
1018 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1019 const auto &Mid = Mask.begin() + Mask.size() / 2;
1020 const auto &RightMid = Mask.end() - Mask.size() / 4;
1021 const auto &End = Mask.end();
1022 unsigned HalfSize = Mask.size() / 2;
1023 SDValue OriV1 = V1, OriV2 = V2;
1024
1025 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1026 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1027 V1 = OriV1;
1028 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1029 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1030 V1 = OriV2;
1031 else
1032 return SDValue();
1033
1034 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1035 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1036 V2 = OriV1;
1037 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1038 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1039 V2 = OriV2;
1040
1041 else
1042 return SDValue();
1043
1044 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1045}
1046
1047/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1049 MVT VT, SDValue V1, SDValue V2,
1050 SelectionDAG &DAG) {
1051
1052 const auto &Begin = Mask.begin();
1053 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1054 const auto &Mid = Mask.begin() + Mask.size() / 2;
1055 const auto &RightMid = Mask.end() - Mask.size() / 4;
1056 const auto &End = Mask.end();
1057 unsigned HalfSize = Mask.size() / 2;
1058 SDValue OriV1 = V1, OriV2 = V2;
1059
1060 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1061 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1062 V1 = OriV1;
1063 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1064 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1065 2))
1066 V1 = OriV2;
1067 else
1068 return SDValue();
1069
1070 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1071 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1072 V2 = OriV1;
1073 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1074 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1075 2))
1076 V2 = OriV2;
1077 else
1078 return SDValue();
1079
1080 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1081}
1082
1083/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1085 MVT VT, SDValue V1, SDValue V2,
1086 SelectionDAG &DAG) {
1087
1088 int MaskSize = Mask.size();
1089 int HalfSize = Mask.size() / 2;
1090 const auto &Begin = Mask.begin();
1091 const auto &Mid = Mask.begin() + HalfSize;
1092 const auto &End = Mask.end();
1093
1094 // VECTOR_SHUFFLE concatenates the vectors:
1095 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1096 // shuffling ->
1097 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1098 //
1099 // XVSHUF concatenates the vectors:
1100 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1101 // shuffling ->
1102 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1103 SmallVector<SDValue, 8> MaskAlloc;
1104 for (auto it = Begin; it < Mid; it++) {
1105 if (*it < 0) // UNDEF
1106 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1107 else if ((*it >= 0 && *it < HalfSize) ||
1108 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1109 int M = *it < HalfSize ? *it : *it - HalfSize;
1110 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1111 } else
1112 return SDValue();
1113 }
1114 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1115
1116 for (auto it = Mid; it < End; it++) {
1117 if (*it < 0) // UNDEF
1118 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1119 else if ((*it >= HalfSize && *it < MaskSize) ||
1120 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1121 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1122 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1123 } else
1124 return SDValue();
1125 }
1126 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1127
1128 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1129 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1130 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1131}
1132
1133/// Shuffle vectors by lane to generate more optimized instructions.
1134/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1135///
1136/// Therefore, except for the following four cases, other cases are regarded
1137/// as cross-lane shuffles, where optimization is relatively limited.
1138///
1139/// - Shuffle high, low lanes of two inputs vector
1140/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1141/// - Shuffle low, high lanes of two inputs vector
1142/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1143/// - Shuffle low, low lanes of two inputs vector
1144/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1145/// - Shuffle high, high lanes of two inputs vector
1146/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1147///
1148/// The first case is the closest to LoongArch instructions and the other
1149/// cases need to be converted to it for processing.
1150///
1151/// This function may modify V1, V2 and Mask
1153 MutableArrayRef<int> Mask, MVT VT,
1154 SDValue &V1, SDValue &V2,
1155 SelectionDAG &DAG) {
1156
1157 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1158
1159 int MaskSize = Mask.size();
1160 int HalfSize = Mask.size() / 2;
1161
1162 HalfMaskType preMask = None, postMask = None;
1163
1164 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1165 return M < 0 || (M >= 0 && M < HalfSize) ||
1166 (M >= MaskSize && M < MaskSize + HalfSize);
1167 }))
1168 preMask = HighLaneTy;
1169 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1170 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1171 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1172 }))
1173 preMask = LowLaneTy;
1174
1175 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1176 return M < 0 || (M >= 0 && M < HalfSize) ||
1177 (M >= MaskSize && M < MaskSize + HalfSize);
1178 }))
1179 postMask = HighLaneTy;
1180 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1181 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1182 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1183 }))
1184 postMask = LowLaneTy;
1185
1186 // The pre-half of mask is high lane type, and the post-half of mask
1187 // is low lane type, which is closest to the LoongArch instructions.
1188 //
1189 // Note: In the LoongArch architecture, the high lane of mask corresponds
1190 // to the lower 128-bit of vector register, and the low lane of mask
1191 // corresponds the higher 128-bit of vector register.
1192 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1193 return;
1194 }
1195 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1196 V1 = DAG.getBitcast(MVT::v4i64, V1);
1197 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1198 DAG.getConstant(0b01001110, DL, MVT::i64));
1199 V1 = DAG.getBitcast(VT, V1);
1200
1201 if (!V2.isUndef()) {
1202 V2 = DAG.getBitcast(MVT::v4i64, V2);
1203 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1204 DAG.getConstant(0b01001110, DL, MVT::i64));
1205 V2 = DAG.getBitcast(VT, V2);
1206 }
1207
1208 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1209 *it = *it < 0 ? *it : *it - HalfSize;
1210 }
1211 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1212 *it = *it < 0 ? *it : *it + HalfSize;
1213 }
1214 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1215 V1 = DAG.getBitcast(MVT::v4i64, V1);
1216 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1217 DAG.getConstant(0b11101110, DL, MVT::i64));
1218 V1 = DAG.getBitcast(VT, V1);
1219
1220 if (!V2.isUndef()) {
1221 V2 = DAG.getBitcast(MVT::v4i64, V2);
1222 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1223 DAG.getConstant(0b11101110, DL, MVT::i64));
1224 V2 = DAG.getBitcast(VT, V2);
1225 }
1226
1227 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1228 *it = *it < 0 ? *it : *it - HalfSize;
1229 }
1230 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1231 V1 = DAG.getBitcast(MVT::v4i64, V1);
1232 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1233 DAG.getConstant(0b01000100, DL, MVT::i64));
1234 V1 = DAG.getBitcast(VT, V1);
1235
1236 if (!V2.isUndef()) {
1237 V2 = DAG.getBitcast(MVT::v4i64, V2);
1238 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1239 DAG.getConstant(0b01000100, DL, MVT::i64));
1240 V2 = DAG.getBitcast(VT, V2);
1241 }
1242
1243 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1244 *it = *it < 0 ? *it : *it + HalfSize;
1245 }
1246 } else { // cross-lane
1247 return;
1248 }
1249}
1250
1251/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1252///
1253/// This routine breaks down the specific type of 256-bit shuffle and
1254/// dispatches to the lowering routines accordingly.
1256 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1257 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1258 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1259 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1260 "Vector type is unsupported for lasx!");
1261 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1262 "Two operands have different types!");
1263 assert(VT.getVectorNumElements() == Mask.size() &&
1264 "Unexpected mask size for shuffle!");
1265 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1266 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1267
1268 // canonicalize non cross-lane shuffle vector
1269 SmallVector<int> NewMask(Mask);
1270 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1271
1272 SDValue Result;
1273 // TODO: Add more comparison patterns.
1274 if (V2.isUndef()) {
1275 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1276 return Result;
1277 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1278 return Result;
1279
1280 // TODO: This comment may be enabled in the future to better match the
1281 // pattern for instruction selection.
1282 /* V2 = V1; */
1283 }
1284
1285 // It is recommended not to change the pattern comparison order for better
1286 // performance.
1287 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1288 return Result;
1289 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1290 return Result;
1291 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1292 return Result;
1293 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1294 return Result;
1295 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1296 return Result;
1297 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1298 return Result;
1299 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1300 return Result;
1301
1302 return SDValue();
1303}
1304
1305SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1306 SelectionDAG &DAG) const {
1307 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1308 ArrayRef<int> OrigMask = SVOp->getMask();
1309 SDValue V1 = Op.getOperand(0);
1310 SDValue V2 = Op.getOperand(1);
1311 MVT VT = Op.getSimpleValueType();
1312 int NumElements = VT.getVectorNumElements();
1313 SDLoc DL(Op);
1314
1315 bool V1IsUndef = V1.isUndef();
1316 bool V2IsUndef = V2.isUndef();
1317 if (V1IsUndef && V2IsUndef)
1318 return DAG.getUNDEF(VT);
1319
1320 // When we create a shuffle node we put the UNDEF node to second operand,
1321 // but in some cases the first operand may be transformed to UNDEF.
1322 // In this case we should just commute the node.
1323 if (V1IsUndef)
1324 return DAG.getCommutedVectorShuffle(*SVOp);
1325
1326 // Check for non-undef masks pointing at an undef vector and make the masks
1327 // undef as well. This makes it easier to match the shuffle based solely on
1328 // the mask.
1329 if (V2IsUndef &&
1330 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1331 SmallVector<int, 8> NewMask(OrigMask);
1332 for (int &M : NewMask)
1333 if (M >= NumElements)
1334 M = -1;
1335 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1336 }
1337
1338 // Check for illegal shuffle mask element index values.
1339 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1340 (void)MaskUpperLimit;
1341 assert(llvm::all_of(OrigMask,
1342 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1343 "Out of bounds shuffle index");
1344
1345 // For each vector width, delegate to a specialized lowering routine.
1346 if (VT.is128BitVector())
1347 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1348
1349 if (VT.is256BitVector())
1350 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1351
1352 return SDValue();
1353}
1354
1355static bool isConstantOrUndef(const SDValue Op) {
1356 if (Op->isUndef())
1357 return true;
1358 if (isa<ConstantSDNode>(Op))
1359 return true;
1360 if (isa<ConstantFPSDNode>(Op))
1361 return true;
1362 return false;
1363}
1364
1366 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1367 if (isConstantOrUndef(Op->getOperand(i)))
1368 return true;
1369 return false;
1370}
1371
1372SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1373 SelectionDAG &DAG) const {
1374 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1375 EVT ResTy = Op->getValueType(0);
1376 SDLoc DL(Op);
1377 APInt SplatValue, SplatUndef;
1378 unsigned SplatBitSize;
1379 bool HasAnyUndefs;
1380 bool Is128Vec = ResTy.is128BitVector();
1381 bool Is256Vec = ResTy.is256BitVector();
1382
1383 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1384 (!Subtarget.hasExtLASX() || !Is256Vec))
1385 return SDValue();
1386
1387 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1388 /*MinSplatBits=*/8) &&
1389 SplatBitSize <= 64) {
1390 // We can only cope with 8, 16, 32, or 64-bit elements.
1391 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1392 SplatBitSize != 64)
1393 return SDValue();
1394
1395 EVT ViaVecTy;
1396
1397 switch (SplatBitSize) {
1398 default:
1399 return SDValue();
1400 case 8:
1401 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1402 break;
1403 case 16:
1404 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1405 break;
1406 case 32:
1407 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1408 break;
1409 case 64:
1410 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1411 break;
1412 }
1413
1414 // SelectionDAG::getConstant will promote SplatValue appropriately.
1415 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1416
1417 // Bitcast to the type we originally wanted.
1418 if (ViaVecTy != ResTy)
1419 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1420
1421 return Result;
1422 }
1423
1424 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1425 return Op;
1426
1428 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1429 // The resulting code is the same length as the expansion, but it doesn't
1430 // use memory operations.
1431 EVT ResTy = Node->getValueType(0);
1432
1433 assert(ResTy.isVector());
1434
1435 unsigned NumElts = ResTy.getVectorNumElements();
1436 SDValue Vector = DAG.getUNDEF(ResTy);
1437 for (unsigned i = 0; i < NumElts; ++i) {
1439 Node->getOperand(i),
1440 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1441 }
1442 return Vector;
1443 }
1444
1445 return SDValue();
1446}
1447
1448SDValue
1449LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1450 SelectionDAG &DAG) const {
1451 EVT VecTy = Op->getOperand(0)->getValueType(0);
1452 SDValue Idx = Op->getOperand(1);
1453 EVT EltTy = VecTy.getVectorElementType();
1454 unsigned NumElts = VecTy.getVectorNumElements();
1455
1456 if (isa<ConstantSDNode>(Idx) &&
1457 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1458 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1459 return Op;
1460
1461 return SDValue();
1462}
1463
1464SDValue
1465LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1466 SelectionDAG &DAG) const {
1467 if (isa<ConstantSDNode>(Op->getOperand(2)))
1468 return Op;
1469 return SDValue();
1470}
1471
1472SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1473 SelectionDAG &DAG) const {
1474 SDLoc DL(Op);
1475 SyncScope::ID FenceSSID =
1476 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1477
1478 // singlethread fences only synchronize with signal handlers on the same
1479 // thread and thus only need to preserve instruction order, not actually
1480 // enforce memory ordering.
1481 if (FenceSSID == SyncScope::SingleThread)
1482 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1483 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1484
1485 return Op;
1486}
1487
1488SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1489 SelectionDAG &DAG) const {
1490
1491 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1492 DAG.getContext()->emitError(
1493 "On LA64, only 64-bit registers can be written.");
1494 return Op.getOperand(0);
1495 }
1496
1497 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1498 DAG.getContext()->emitError(
1499 "On LA32, only 32-bit registers can be written.");
1500 return Op.getOperand(0);
1501 }
1502
1503 return Op;
1504}
1505
1506SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1507 SelectionDAG &DAG) const {
1508 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1509 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1510 "be a constant integer");
1511 return SDValue();
1512 }
1513
1516 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1517 EVT VT = Op.getValueType();
1518 SDLoc DL(Op);
1519 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1520 unsigned Depth = Op.getConstantOperandVal(0);
1521 int GRLenInBytes = Subtarget.getGRLen() / 8;
1522
1523 while (Depth--) {
1524 int Offset = -(GRLenInBytes * 2);
1525 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1527 FrameAddr =
1528 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1529 }
1530 return FrameAddr;
1531}
1532
1533SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1534 SelectionDAG &DAG) const {
1536 return SDValue();
1537
1538 // Currently only support lowering return address for current frame.
1539 if (Op.getConstantOperandVal(0) != 0) {
1540 DAG.getContext()->emitError(
1541 "return address can only be determined for the current frame");
1542 return SDValue();
1543 }
1544
1547 MVT GRLenVT = Subtarget.getGRLenVT();
1548
1549 // Return the value of the return address register, marking it an implicit
1550 // live-in.
1551 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1552 getRegClassFor(GRLenVT));
1553 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1554}
1555
1556SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1557 SelectionDAG &DAG) const {
1559 auto Size = Subtarget.getGRLen() / 8;
1560 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1561 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1562}
1563
1564SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1565 SelectionDAG &DAG) const {
1567 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1568
1569 SDLoc DL(Op);
1570 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1572
1573 // vastart just stores the address of the VarArgsFrameIndex slot into the
1574 // memory location argument.
1575 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1576 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1577 MachinePointerInfo(SV));
1578}
1579
1580SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1581 SelectionDAG &DAG) const {
1582 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1583 !Subtarget.hasBasicD() && "unexpected target features");
1584
1585 SDLoc DL(Op);
1586 SDValue Op0 = Op.getOperand(0);
1587 if (Op0->getOpcode() == ISD::AND) {
1588 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1589 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1590 return Op;
1591 }
1592
1593 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1594 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1595 Op0.getConstantOperandVal(2) == UINT64_C(0))
1596 return Op;
1597
1598 if (Op0.getOpcode() == ISD::AssertZext &&
1599 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1600 return Op;
1601
1602 EVT OpVT = Op0.getValueType();
1603 EVT RetVT = Op.getValueType();
1604 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1605 MakeLibCallOptions CallOptions;
1606 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1607 SDValue Chain = SDValue();
1609 std::tie(Result, Chain) =
1610 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1611 return Result;
1612}
1613
1614SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1615 SelectionDAG &DAG) const {
1616 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1617 !Subtarget.hasBasicD() && "unexpected target features");
1618
1619 SDLoc DL(Op);
1620 SDValue Op0 = Op.getOperand(0);
1621
1622 if ((Op0.getOpcode() == ISD::AssertSext ||
1624 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1625 return Op;
1626
1627 EVT OpVT = Op0.getValueType();
1628 EVT RetVT = Op.getValueType();
1629 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1630 MakeLibCallOptions CallOptions;
1631 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1632 SDValue Chain = SDValue();
1634 std::tie(Result, Chain) =
1635 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1636 return Result;
1637}
1638
1639SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1640 SelectionDAG &DAG) const {
1641
1642 SDLoc DL(Op);
1643 SDValue Op0 = Op.getOperand(0);
1644
1645 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1646 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1647 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1648 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1649 }
1650 return Op;
1651}
1652
1653SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1654 SelectionDAG &DAG) const {
1655
1656 SDLoc DL(Op);
1657
1658 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1659 !Subtarget.hasBasicD()) {
1660 SDValue Dst =
1661 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
1662 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1663 }
1664
1665 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1666 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
1667 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1668}
1669
1671 SelectionDAG &DAG, unsigned Flags) {
1672 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1673}
1674
1676 SelectionDAG &DAG, unsigned Flags) {
1677 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1678 Flags);
1679}
1680
1682 SelectionDAG &DAG, unsigned Flags) {
1683 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1684 N->getOffset(), Flags);
1685}
1686
1688 SelectionDAG &DAG, unsigned Flags) {
1689 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1690}
1691
1692template <class NodeTy>
1693SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1695 bool IsLocal) const {
1696 SDLoc DL(N);
1697 EVT Ty = getPointerTy(DAG.getDataLayout());
1698 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1699 SDValue Load;
1700
1701 switch (M) {
1702 default:
1703 report_fatal_error("Unsupported code model");
1704
1705 case CodeModel::Large: {
1706 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1707
1708 // This is not actually used, but is necessary for successfully matching
1709 // the PseudoLA_*_LARGE nodes.
1710 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1711 if (IsLocal) {
1712 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1713 // eventually becomes the desired 5-insn code sequence.
1714 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1715 Tmp, Addr),
1716 0);
1717 } else {
1718 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1719 // eventually becomes the desired 5-insn code sequence.
1720 Load = SDValue(
1721 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1722 0);
1723 }
1724 break;
1725 }
1726
1727 case CodeModel::Small:
1728 case CodeModel::Medium:
1729 if (IsLocal) {
1730 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1731 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1732 Load = SDValue(
1733 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1734 } else {
1735 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1736 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1737 Load =
1738 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1739 }
1740 }
1741
1742 if (!IsLocal) {
1743 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1749 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1750 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1751 }
1752
1753 return Load;
1754}
1755
1756SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1757 SelectionDAG &DAG) const {
1758 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1759 DAG.getTarget().getCodeModel());
1760}
1761
1762SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1763 SelectionDAG &DAG) const {
1764 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1765 DAG.getTarget().getCodeModel());
1766}
1767
1768SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1769 SelectionDAG &DAG) const {
1770 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1771 DAG.getTarget().getCodeModel());
1772}
1773
1774SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1775 SelectionDAG &DAG) const {
1776 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1777 assert(N->getOffset() == 0 && "unexpected offset in global node");
1778 auto CM = DAG.getTarget().getCodeModel();
1779 const GlobalValue *GV = N->getGlobal();
1780
1781 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1782 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1783 CM = *GCM;
1784 }
1785
1786 return getAddr(N, DAG, CM, GV->isDSOLocal());
1787}
1788
1789SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1790 SelectionDAG &DAG,
1791 unsigned Opc, bool UseGOT,
1792 bool Large) const {
1793 SDLoc DL(N);
1794 EVT Ty = getPointerTy(DAG.getDataLayout());
1795 MVT GRLenVT = Subtarget.getGRLenVT();
1796
1797 // This is not actually used, but is necessary for successfully matching the
1798 // PseudoLA_*_LARGE nodes.
1799 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1800 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1802 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1803 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1804 if (UseGOT) {
1805 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1811 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1812 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1813 }
1814
1815 // Add the thread pointer.
1816 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1817 DAG.getRegister(LoongArch::R2, GRLenVT));
1818}
1819
1820SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1821 SelectionDAG &DAG,
1822 unsigned Opc,
1823 bool Large) const {
1824 SDLoc DL(N);
1825 EVT Ty = getPointerTy(DAG.getDataLayout());
1826 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1827
1828 // This is not actually used, but is necessary for successfully matching the
1829 // PseudoLA_*_LARGE nodes.
1830 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1831
1832 // Use a PC-relative addressing mode to access the dynamic GOT address.
1833 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1834 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1835 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1836
1837 // Prepare argument list to generate call.
1839 ArgListEntry Entry;
1840 Entry.Node = Load;
1841 Entry.Ty = CallTy;
1842 Args.push_back(Entry);
1843
1844 // Setup call to __tls_get_addr.
1846 CLI.setDebugLoc(DL)
1847 .setChain(DAG.getEntryNode())
1848 .setLibCallee(CallingConv::C, CallTy,
1849 DAG.getExternalSymbol("__tls_get_addr", Ty),
1850 std::move(Args));
1851
1852 return LowerCallTo(CLI).first;
1853}
1854
1855SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1856 SelectionDAG &DAG, unsigned Opc,
1857 bool Large) const {
1858 SDLoc DL(N);
1859 EVT Ty = getPointerTy(DAG.getDataLayout());
1860 const GlobalValue *GV = N->getGlobal();
1861
1862 // This is not actually used, but is necessary for successfully matching the
1863 // PseudoLA_*_LARGE nodes.
1864 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1865
1866 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1867 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1868 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1869 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1870 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1871}
1872
1873SDValue
1874LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1875 SelectionDAG &DAG) const {
1878 report_fatal_error("In GHC calling convention TLS is not supported");
1879
1880 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1881 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1882
1883 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1884 assert(N->getOffset() == 0 && "unexpected offset in global node");
1885
1886 if (DAG.getTarget().useEmulatedTLS())
1887 report_fatal_error("the emulated TLS is prohibited",
1888 /*GenCrashDiag=*/false);
1889
1890 bool IsDesc = DAG.getTarget().useTLSDESC();
1891
1892 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1894 // In this model, application code calls the dynamic linker function
1895 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1896 // runtime.
1897 if (!IsDesc)
1898 return getDynamicTLSAddr(N, DAG,
1899 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1900 : LoongArch::PseudoLA_TLS_GD,
1901 Large);
1902 break;
1904 // Same as GeneralDynamic, except for assembly modifiers and relocation
1905 // records.
1906 if (!IsDesc)
1907 return getDynamicTLSAddr(N, DAG,
1908 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1909 : LoongArch::PseudoLA_TLS_LD,
1910 Large);
1911 break;
1913 // This model uses the GOT to resolve TLS offsets.
1914 return getStaticTLSAddr(N, DAG,
1915 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1916 : LoongArch::PseudoLA_TLS_IE,
1917 /*UseGOT=*/true, Large);
1919 // This model is used when static linking as the TLS offsets are resolved
1920 // during program linking.
1921 //
1922 // This node doesn't need an extra argument for the large code model.
1923 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
1924 /*UseGOT=*/false);
1925 }
1926
1927 return getTLSDescAddr(N, DAG,
1928 Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
1929 : LoongArch::PseudoLA_TLS_DESC_PC,
1930 Large);
1931}
1932
1933template <unsigned N>
1935 SelectionDAG &DAG, bool IsSigned = false) {
1936 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1937 // Check the ImmArg.
1938 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
1939 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
1940 DAG.getContext()->emitError(Op->getOperationName(0) +
1941 ": argument out of range.");
1942 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
1943 }
1944 return SDValue();
1945}
1946
1947SDValue
1948LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1949 SelectionDAG &DAG) const {
1950 SDLoc DL(Op);
1951 switch (Op.getConstantOperandVal(0)) {
1952 default:
1953 return SDValue(); // Don't custom lower most intrinsics.
1954 case Intrinsic::thread_pointer: {
1955 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1956 return DAG.getRegister(LoongArch::R2, PtrVT);
1957 }
1958 case Intrinsic::loongarch_lsx_vpickve2gr_d:
1959 case Intrinsic::loongarch_lsx_vpickve2gr_du:
1960 case Intrinsic::loongarch_lsx_vreplvei_d:
1961 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1962 return checkIntrinsicImmArg<1>(Op, 2, DAG);
1963 case Intrinsic::loongarch_lsx_vreplvei_w:
1964 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1965 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1966 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1967 case Intrinsic::loongarch_lasx_xvpickve_d:
1968 case Intrinsic::loongarch_lasx_xvpickve_d_f:
1969 return checkIntrinsicImmArg<2>(Op, 2, DAG);
1970 case Intrinsic::loongarch_lasx_xvinsve0_d:
1971 return checkIntrinsicImmArg<2>(Op, 3, DAG);
1972 case Intrinsic::loongarch_lsx_vsat_b:
1973 case Intrinsic::loongarch_lsx_vsat_bu:
1974 case Intrinsic::loongarch_lsx_vrotri_b:
1975 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1976 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1977 case Intrinsic::loongarch_lsx_vsrlri_b:
1978 case Intrinsic::loongarch_lsx_vsrari_b:
1979 case Intrinsic::loongarch_lsx_vreplvei_h:
1980 case Intrinsic::loongarch_lasx_xvsat_b:
1981 case Intrinsic::loongarch_lasx_xvsat_bu:
1982 case Intrinsic::loongarch_lasx_xvrotri_b:
1983 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1984 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1985 case Intrinsic::loongarch_lasx_xvsrlri_b:
1986 case Intrinsic::loongarch_lasx_xvsrari_b:
1987 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1988 case Intrinsic::loongarch_lasx_xvpickve_w:
1989 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1990 return checkIntrinsicImmArg<3>(Op, 2, DAG);
1991 case Intrinsic::loongarch_lasx_xvinsve0_w:
1992 return checkIntrinsicImmArg<3>(Op, 3, DAG);
1993 case Intrinsic::loongarch_lsx_vsat_h:
1994 case Intrinsic::loongarch_lsx_vsat_hu:
1995 case Intrinsic::loongarch_lsx_vrotri_h:
1996 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1997 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1998 case Intrinsic::loongarch_lsx_vsrlri_h:
1999 case Intrinsic::loongarch_lsx_vsrari_h:
2000 case Intrinsic::loongarch_lsx_vreplvei_b:
2001 case Intrinsic::loongarch_lasx_xvsat_h:
2002 case Intrinsic::loongarch_lasx_xvsat_hu:
2003 case Intrinsic::loongarch_lasx_xvrotri_h:
2004 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2005 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2006 case Intrinsic::loongarch_lasx_xvsrlri_h:
2007 case Intrinsic::loongarch_lasx_xvsrari_h:
2008 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2009 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2010 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2011 case Intrinsic::loongarch_lsx_vsrani_b_h:
2012 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2013 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2014 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2015 case Intrinsic::loongarch_lsx_vssrani_b_h:
2016 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2017 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2018 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2019 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2020 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2021 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2022 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2023 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2024 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2025 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2026 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2027 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2028 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2029 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2030 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2031 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2032 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2033 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2034 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2035 case Intrinsic::loongarch_lsx_vsat_w:
2036 case Intrinsic::loongarch_lsx_vsat_wu:
2037 case Intrinsic::loongarch_lsx_vrotri_w:
2038 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2039 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2040 case Intrinsic::loongarch_lsx_vsrlri_w:
2041 case Intrinsic::loongarch_lsx_vsrari_w:
2042 case Intrinsic::loongarch_lsx_vslei_bu:
2043 case Intrinsic::loongarch_lsx_vslei_hu:
2044 case Intrinsic::loongarch_lsx_vslei_wu:
2045 case Intrinsic::loongarch_lsx_vslei_du:
2046 case Intrinsic::loongarch_lsx_vslti_bu:
2047 case Intrinsic::loongarch_lsx_vslti_hu:
2048 case Intrinsic::loongarch_lsx_vslti_wu:
2049 case Intrinsic::loongarch_lsx_vslti_du:
2050 case Intrinsic::loongarch_lsx_vbsll_v:
2051 case Intrinsic::loongarch_lsx_vbsrl_v:
2052 case Intrinsic::loongarch_lasx_xvsat_w:
2053 case Intrinsic::loongarch_lasx_xvsat_wu:
2054 case Intrinsic::loongarch_lasx_xvrotri_w:
2055 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2056 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2057 case Intrinsic::loongarch_lasx_xvsrlri_w:
2058 case Intrinsic::loongarch_lasx_xvsrari_w:
2059 case Intrinsic::loongarch_lasx_xvslei_bu:
2060 case Intrinsic::loongarch_lasx_xvslei_hu:
2061 case Intrinsic::loongarch_lasx_xvslei_wu:
2062 case Intrinsic::loongarch_lasx_xvslei_du:
2063 case Intrinsic::loongarch_lasx_xvslti_bu:
2064 case Intrinsic::loongarch_lasx_xvslti_hu:
2065 case Intrinsic::loongarch_lasx_xvslti_wu:
2066 case Intrinsic::loongarch_lasx_xvslti_du:
2067 case Intrinsic::loongarch_lasx_xvbsll_v:
2068 case Intrinsic::loongarch_lasx_xvbsrl_v:
2069 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2070 case Intrinsic::loongarch_lsx_vseqi_b:
2071 case Intrinsic::loongarch_lsx_vseqi_h:
2072 case Intrinsic::loongarch_lsx_vseqi_w:
2073 case Intrinsic::loongarch_lsx_vseqi_d:
2074 case Intrinsic::loongarch_lsx_vslei_b:
2075 case Intrinsic::loongarch_lsx_vslei_h:
2076 case Intrinsic::loongarch_lsx_vslei_w:
2077 case Intrinsic::loongarch_lsx_vslei_d:
2078 case Intrinsic::loongarch_lsx_vslti_b:
2079 case Intrinsic::loongarch_lsx_vslti_h:
2080 case Intrinsic::loongarch_lsx_vslti_w:
2081 case Intrinsic::loongarch_lsx_vslti_d:
2082 case Intrinsic::loongarch_lasx_xvseqi_b:
2083 case Intrinsic::loongarch_lasx_xvseqi_h:
2084 case Intrinsic::loongarch_lasx_xvseqi_w:
2085 case Intrinsic::loongarch_lasx_xvseqi_d:
2086 case Intrinsic::loongarch_lasx_xvslei_b:
2087 case Intrinsic::loongarch_lasx_xvslei_h:
2088 case Intrinsic::loongarch_lasx_xvslei_w:
2089 case Intrinsic::loongarch_lasx_xvslei_d:
2090 case Intrinsic::loongarch_lasx_xvslti_b:
2091 case Intrinsic::loongarch_lasx_xvslti_h:
2092 case Intrinsic::loongarch_lasx_xvslti_w:
2093 case Intrinsic::loongarch_lasx_xvslti_d:
2094 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2095 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2096 case Intrinsic::loongarch_lsx_vsrani_h_w:
2097 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2098 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2099 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2100 case Intrinsic::loongarch_lsx_vssrani_h_w:
2101 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2102 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2103 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2104 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2105 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2106 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2107 case Intrinsic::loongarch_lsx_vfrstpi_b:
2108 case Intrinsic::loongarch_lsx_vfrstpi_h:
2109 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2110 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2111 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2112 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2113 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2114 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2115 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2116 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2117 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2118 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2119 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2120 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2121 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2122 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2123 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2124 case Intrinsic::loongarch_lsx_vsat_d:
2125 case Intrinsic::loongarch_lsx_vsat_du:
2126 case Intrinsic::loongarch_lsx_vrotri_d:
2127 case Intrinsic::loongarch_lsx_vsrlri_d:
2128 case Intrinsic::loongarch_lsx_vsrari_d:
2129 case Intrinsic::loongarch_lasx_xvsat_d:
2130 case Intrinsic::loongarch_lasx_xvsat_du:
2131 case Intrinsic::loongarch_lasx_xvrotri_d:
2132 case Intrinsic::loongarch_lasx_xvsrlri_d:
2133 case Intrinsic::loongarch_lasx_xvsrari_d:
2134 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2135 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2136 case Intrinsic::loongarch_lsx_vsrani_w_d:
2137 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2138 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2139 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2140 case Intrinsic::loongarch_lsx_vssrani_w_d:
2141 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2142 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2143 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2144 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2145 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2146 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2147 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2148 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2149 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2150 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2151 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2152 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2153 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2154 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2155 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2156 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2157 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2158 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2159 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2160 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2161 case Intrinsic::loongarch_lsx_vsrani_d_q:
2162 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2163 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2164 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2165 case Intrinsic::loongarch_lsx_vssrani_d_q:
2166 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2167 case Intrinsic::loongarch_lsx_vssrani_du_q:
2168 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2169 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2170 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2171 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2172 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2173 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2174 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2175 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2176 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2177 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2178 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2179 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2180 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2181 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2182 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2183 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2184 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2185 case Intrinsic::loongarch_lsx_vnori_b:
2186 case Intrinsic::loongarch_lsx_vshuf4i_b:
2187 case Intrinsic::loongarch_lsx_vshuf4i_h:
2188 case Intrinsic::loongarch_lsx_vshuf4i_w:
2189 case Intrinsic::loongarch_lasx_xvnori_b:
2190 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2191 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2192 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2193 case Intrinsic::loongarch_lasx_xvpermi_d:
2194 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2195 case Intrinsic::loongarch_lsx_vshuf4i_d:
2196 case Intrinsic::loongarch_lsx_vpermi_w:
2197 case Intrinsic::loongarch_lsx_vbitseli_b:
2198 case Intrinsic::loongarch_lsx_vextrins_b:
2199 case Intrinsic::loongarch_lsx_vextrins_h:
2200 case Intrinsic::loongarch_lsx_vextrins_w:
2201 case Intrinsic::loongarch_lsx_vextrins_d:
2202 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2203 case Intrinsic::loongarch_lasx_xvpermi_w:
2204 case Intrinsic::loongarch_lasx_xvpermi_q:
2205 case Intrinsic::loongarch_lasx_xvbitseli_b:
2206 case Intrinsic::loongarch_lasx_xvextrins_b:
2207 case Intrinsic::loongarch_lasx_xvextrins_h:
2208 case Intrinsic::loongarch_lasx_xvextrins_w:
2209 case Intrinsic::loongarch_lasx_xvextrins_d:
2210 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2211 case Intrinsic::loongarch_lsx_vrepli_b:
2212 case Intrinsic::loongarch_lsx_vrepli_h:
2213 case Intrinsic::loongarch_lsx_vrepli_w:
2214 case Intrinsic::loongarch_lsx_vrepli_d:
2215 case Intrinsic::loongarch_lasx_xvrepli_b:
2216 case Intrinsic::loongarch_lasx_xvrepli_h:
2217 case Intrinsic::loongarch_lasx_xvrepli_w:
2218 case Intrinsic::loongarch_lasx_xvrepli_d:
2219 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2220 case Intrinsic::loongarch_lsx_vldi:
2221 case Intrinsic::loongarch_lasx_xvldi:
2222 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2223 }
2224}
2225
2226// Helper function that emits error message for intrinsics with chain and return
2227// merge values of a UNDEF and the chain.
2229 StringRef ErrorMsg,
2230 SelectionDAG &DAG) {
2231 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2232 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2233 SDLoc(Op));
2234}
2235
2236SDValue
2237LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2238 SelectionDAG &DAG) const {
2239 SDLoc DL(Op);
2240 MVT GRLenVT = Subtarget.getGRLenVT();
2241 EVT VT = Op.getValueType();
2242 SDValue Chain = Op.getOperand(0);
2243 const StringRef ErrorMsgOOR = "argument out of range";
2244 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2245 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2246
2247 switch (Op.getConstantOperandVal(1)) {
2248 default:
2249 return Op;
2250 case Intrinsic::loongarch_crc_w_b_w:
2251 case Intrinsic::loongarch_crc_w_h_w:
2252 case Intrinsic::loongarch_crc_w_w_w:
2253 case Intrinsic::loongarch_crc_w_d_w:
2254 case Intrinsic::loongarch_crcc_w_b_w:
2255 case Intrinsic::loongarch_crcc_w_h_w:
2256 case Intrinsic::loongarch_crcc_w_w_w:
2257 case Intrinsic::loongarch_crcc_w_d_w:
2258 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2259 case Intrinsic::loongarch_csrrd_w:
2260 case Intrinsic::loongarch_csrrd_d: {
2261 unsigned Imm = Op.getConstantOperandVal(2);
2262 return !isUInt<14>(Imm)
2263 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2264 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2265 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2266 }
2267 case Intrinsic::loongarch_csrwr_w:
2268 case Intrinsic::loongarch_csrwr_d: {
2269 unsigned Imm = Op.getConstantOperandVal(3);
2270 return !isUInt<14>(Imm)
2271 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2272 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2273 {Chain, Op.getOperand(2),
2274 DAG.getConstant(Imm, DL, GRLenVT)});
2275 }
2276 case Intrinsic::loongarch_csrxchg_w:
2277 case Intrinsic::loongarch_csrxchg_d: {
2278 unsigned Imm = Op.getConstantOperandVal(4);
2279 return !isUInt<14>(Imm)
2280 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2281 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2282 {Chain, Op.getOperand(2), Op.getOperand(3),
2283 DAG.getConstant(Imm, DL, GRLenVT)});
2284 }
2285 case Intrinsic::loongarch_iocsrrd_d: {
2286 return DAG.getNode(
2287 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2288 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2289 }
2290#define IOCSRRD_CASE(NAME, NODE) \
2291 case Intrinsic::loongarch_##NAME: { \
2292 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2293 {Chain, Op.getOperand(2)}); \
2294 }
2295 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2296 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2297 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2298#undef IOCSRRD_CASE
2299 case Intrinsic::loongarch_cpucfg: {
2300 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2301 {Chain, Op.getOperand(2)});
2302 }
2303 case Intrinsic::loongarch_lddir_d: {
2304 unsigned Imm = Op.getConstantOperandVal(3);
2305 return !isUInt<8>(Imm)
2306 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2307 : Op;
2308 }
2309 case Intrinsic::loongarch_movfcsr2gr: {
2310 if (!Subtarget.hasBasicF())
2311 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2312 unsigned Imm = Op.getConstantOperandVal(2);
2313 return !isUInt<2>(Imm)
2314 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2315 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2316 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2317 }
2318 case Intrinsic::loongarch_lsx_vld:
2319 case Intrinsic::loongarch_lsx_vldrepl_b:
2320 case Intrinsic::loongarch_lasx_xvld:
2321 case Intrinsic::loongarch_lasx_xvldrepl_b:
2322 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2323 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2324 : SDValue();
2325 case Intrinsic::loongarch_lsx_vldrepl_h:
2326 case Intrinsic::loongarch_lasx_xvldrepl_h:
2327 return !isShiftedInt<11, 1>(
2328 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2330 Op, "argument out of range or not a multiple of 2", DAG)
2331 : SDValue();
2332 case Intrinsic::loongarch_lsx_vldrepl_w:
2333 case Intrinsic::loongarch_lasx_xvldrepl_w:
2334 return !isShiftedInt<10, 2>(
2335 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2337 Op, "argument out of range or not a multiple of 4", DAG)
2338 : SDValue();
2339 case Intrinsic::loongarch_lsx_vldrepl_d:
2340 case Intrinsic::loongarch_lasx_xvldrepl_d:
2341 return !isShiftedInt<9, 3>(
2342 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2344 Op, "argument out of range or not a multiple of 8", DAG)
2345 : SDValue();
2346 }
2347}
2348
2349// Helper function that emits error message for intrinsics with void return
2350// value and return the chain.
2352 SelectionDAG &DAG) {
2353
2354 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2355 return Op.getOperand(0);
2356}
2357
2358SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2359 SelectionDAG &DAG) const {
2360 SDLoc DL(Op);
2361 MVT GRLenVT = Subtarget.getGRLenVT();
2362 SDValue Chain = Op.getOperand(0);
2363 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2364 SDValue Op2 = Op.getOperand(2);
2365 const StringRef ErrorMsgOOR = "argument out of range";
2366 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2367 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2368 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2369
2370 switch (IntrinsicEnum) {
2371 default:
2372 // TODO: Add more Intrinsics.
2373 return SDValue();
2374 case Intrinsic::loongarch_cacop_d:
2375 case Intrinsic::loongarch_cacop_w: {
2376 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2377 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2378 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2379 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2380 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2381 unsigned Imm1 = Op2->getAsZExtVal();
2382 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2383 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2384 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2385 return Op;
2386 }
2387 case Intrinsic::loongarch_dbar: {
2388 unsigned Imm = Op2->getAsZExtVal();
2389 return !isUInt<15>(Imm)
2390 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2391 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2392 DAG.getConstant(Imm, DL, GRLenVT));
2393 }
2394 case Intrinsic::loongarch_ibar: {
2395 unsigned Imm = Op2->getAsZExtVal();
2396 return !isUInt<15>(Imm)
2397 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2398 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2399 DAG.getConstant(Imm, DL, GRLenVT));
2400 }
2401 case Intrinsic::loongarch_break: {
2402 unsigned Imm = Op2->getAsZExtVal();
2403 return !isUInt<15>(Imm)
2404 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2405 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2406 DAG.getConstant(Imm, DL, GRLenVT));
2407 }
2408 case Intrinsic::loongarch_movgr2fcsr: {
2409 if (!Subtarget.hasBasicF())
2410 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2411 unsigned Imm = Op2->getAsZExtVal();
2412 return !isUInt<2>(Imm)
2413 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2414 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2415 DAG.getConstant(Imm, DL, GRLenVT),
2416 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2417 Op.getOperand(3)));
2418 }
2419 case Intrinsic::loongarch_syscall: {
2420 unsigned Imm = Op2->getAsZExtVal();
2421 return !isUInt<15>(Imm)
2422 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2423 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2424 DAG.getConstant(Imm, DL, GRLenVT));
2425 }
2426#define IOCSRWR_CASE(NAME, NODE) \
2427 case Intrinsic::loongarch_##NAME: { \
2428 SDValue Op3 = Op.getOperand(3); \
2429 return Subtarget.is64Bit() \
2430 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2432 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2433 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2434 Op3); \
2435 }
2436 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2437 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2438 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2439#undef IOCSRWR_CASE
2440 case Intrinsic::loongarch_iocsrwr_d: {
2441 return !Subtarget.is64Bit()
2442 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2443 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2444 Op2,
2445 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2446 Op.getOperand(3)));
2447 }
2448#define ASRT_LE_GT_CASE(NAME) \
2449 case Intrinsic::loongarch_##NAME: { \
2450 return !Subtarget.is64Bit() \
2451 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2452 : Op; \
2453 }
2454 ASRT_LE_GT_CASE(asrtle_d)
2455 ASRT_LE_GT_CASE(asrtgt_d)
2456#undef ASRT_LE_GT_CASE
2457 case Intrinsic::loongarch_ldpte_d: {
2458 unsigned Imm = Op.getConstantOperandVal(3);
2459 return !Subtarget.is64Bit()
2460 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2461 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2462 : Op;
2463 }
2464 case Intrinsic::loongarch_lsx_vst:
2465 case Intrinsic::loongarch_lasx_xvst:
2466 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2467 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2468 : SDValue();
2469 case Intrinsic::loongarch_lasx_xvstelm_b:
2470 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2471 !isUInt<5>(Op.getConstantOperandVal(5)))
2472 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2473 : SDValue();
2474 case Intrinsic::loongarch_lsx_vstelm_b:
2475 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2476 !isUInt<4>(Op.getConstantOperandVal(5)))
2477 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2478 : SDValue();
2479 case Intrinsic::loongarch_lasx_xvstelm_h:
2480 return (!isShiftedInt<8, 1>(
2481 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2482 !isUInt<4>(Op.getConstantOperandVal(5)))
2484 Op, "argument out of range or not a multiple of 2", DAG)
2485 : SDValue();
2486 case Intrinsic::loongarch_lsx_vstelm_h:
2487 return (!isShiftedInt<8, 1>(
2488 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2489 !isUInt<3>(Op.getConstantOperandVal(5)))
2491 Op, "argument out of range or not a multiple of 2", DAG)
2492 : SDValue();
2493 case Intrinsic::loongarch_lasx_xvstelm_w:
2494 return (!isShiftedInt<8, 2>(
2495 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2496 !isUInt<3>(Op.getConstantOperandVal(5)))
2498 Op, "argument out of range or not a multiple of 4", DAG)
2499 : SDValue();
2500 case Intrinsic::loongarch_lsx_vstelm_w:
2501 return (!isShiftedInt<8, 2>(
2502 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2503 !isUInt<2>(Op.getConstantOperandVal(5)))
2505 Op, "argument out of range or not a multiple of 4", DAG)
2506 : SDValue();
2507 case Intrinsic::loongarch_lasx_xvstelm_d:
2508 return (!isShiftedInt<8, 3>(
2509 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2510 !isUInt<2>(Op.getConstantOperandVal(5)))
2512 Op, "argument out of range or not a multiple of 8", DAG)
2513 : SDValue();
2514 case Intrinsic::loongarch_lsx_vstelm_d:
2515 return (!isShiftedInt<8, 3>(
2516 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2517 !isUInt<1>(Op.getConstantOperandVal(5)))
2519 Op, "argument out of range or not a multiple of 8", DAG)
2520 : SDValue();
2521 }
2522}
2523
2524SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2525 SelectionDAG &DAG) const {
2526 SDLoc DL(Op);
2527 SDValue Lo = Op.getOperand(0);
2528 SDValue Hi = Op.getOperand(1);
2529 SDValue Shamt = Op.getOperand(2);
2530 EVT VT = Lo.getValueType();
2531
2532 // if Shamt-GRLen < 0: // Shamt < GRLen
2533 // Lo = Lo << Shamt
2534 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2535 // else:
2536 // Lo = 0
2537 // Hi = Lo << (Shamt-GRLen)
2538
2539 SDValue Zero = DAG.getConstant(0, DL, VT);
2540 SDValue One = DAG.getConstant(1, DL, VT);
2541 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2542 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2543 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2544 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2545
2546 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2547 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2548 SDValue ShiftRightLo =
2549 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2550 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2551 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2552 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2553
2554 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2555
2556 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2557 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2558
2559 SDValue Parts[2] = {Lo, Hi};
2560 return DAG.getMergeValues(Parts, DL);
2561}
2562
2563SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2564 SelectionDAG &DAG,
2565 bool IsSRA) const {
2566 SDLoc DL(Op);
2567 SDValue Lo = Op.getOperand(0);
2568 SDValue Hi = Op.getOperand(1);
2569 SDValue Shamt = Op.getOperand(2);
2570 EVT VT = Lo.getValueType();
2571
2572 // SRA expansion:
2573 // if Shamt-GRLen < 0: // Shamt < GRLen
2574 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2575 // Hi = Hi >>s Shamt
2576 // else:
2577 // Lo = Hi >>s (Shamt-GRLen);
2578 // Hi = Hi >>s (GRLen-1)
2579 //
2580 // SRL expansion:
2581 // if Shamt-GRLen < 0: // Shamt < GRLen
2582 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2583 // Hi = Hi >>u Shamt
2584 // else:
2585 // Lo = Hi >>u (Shamt-GRLen);
2586 // Hi = 0;
2587
2588 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2589
2590 SDValue Zero = DAG.getConstant(0, DL, VT);
2591 SDValue One = DAG.getConstant(1, DL, VT);
2592 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2593 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2594 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2595 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2596
2597 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2598 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2599 SDValue ShiftLeftHi =
2600 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2601 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2602 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2603 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2604 SDValue HiFalse =
2605 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2606
2607 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2608
2609 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2610 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2611
2612 SDValue Parts[2] = {Lo, Hi};
2613 return DAG.getMergeValues(Parts, DL);
2614}
2615
2616// Returns the opcode of the target-specific SDNode that implements the 32-bit
2617// form of the given Opcode.
2619 switch (Opcode) {
2620 default:
2621 llvm_unreachable("Unexpected opcode");
2622 case ISD::UDIV:
2623 return LoongArchISD::DIV_WU;
2624 case ISD::UREM:
2625 return LoongArchISD::MOD_WU;
2626 case ISD::SHL:
2627 return LoongArchISD::SLL_W;
2628 case ISD::SRA:
2629 return LoongArchISD::SRA_W;
2630 case ISD::SRL:
2631 return LoongArchISD::SRL_W;
2632 case ISD::ROTL:
2633 case ISD::ROTR:
2634 return LoongArchISD::ROTR_W;
2635 case ISD::CTTZ:
2636 return LoongArchISD::CTZ_W;
2637 case ISD::CTLZ:
2638 return LoongArchISD::CLZ_W;
2639 }
2640}
2641
2642// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2643// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2644// otherwise be promoted to i64, making it difficult to select the
2645// SLL_W/.../*W later one because the fact the operation was originally of
2646// type i8/i16/i32 is lost.
2648 unsigned ExtOpc = ISD::ANY_EXTEND) {
2649 SDLoc DL(N);
2650 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2651 SDValue NewOp0, NewRes;
2652
2653 switch (NumOp) {
2654 default:
2655 llvm_unreachable("Unexpected NumOp");
2656 case 1: {
2657 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2658 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2659 break;
2660 }
2661 case 2: {
2662 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2663 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2664 if (N->getOpcode() == ISD::ROTL) {
2665 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2666 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2667 }
2668 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2669 break;
2670 }
2671 // TODO:Handle more NumOp.
2672 }
2673
2674 // ReplaceNodeResults requires we maintain the same type for the return
2675 // value.
2676 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2677}
2678
2679// Converts the given 32-bit operation to a i64 operation with signed extension
2680// semantic to reduce the signed extension instructions.
2682 SDLoc DL(N);
2683 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2684 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2685 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2686 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2687 DAG.getValueType(MVT::i32));
2688 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2689}
2690
2691// Helper function that emits error message for intrinsics with/without chain
2692// and return a UNDEF or and the chain as the results.
2695 StringRef ErrorMsg, bool WithChain = true) {
2696 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2697 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2698 if (!WithChain)
2699 return;
2700 Results.push_back(N->getOperand(0));
2701}
2702
2703template <unsigned N>
2704static void
2706 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2707 unsigned ResOp) {
2708 const StringRef ErrorMsgOOR = "argument out of range";
2709 unsigned Imm = Node->getConstantOperandVal(2);
2710 if (!isUInt<N>(Imm)) {
2712 /*WithChain=*/false);
2713 return;
2714 }
2715 SDLoc DL(Node);
2716 SDValue Vec = Node->getOperand(1);
2717
2718 SDValue PickElt =
2719 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2720 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2722 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2723 PickElt.getValue(0)));
2724}
2725
2728 SelectionDAG &DAG,
2729 const LoongArchSubtarget &Subtarget,
2730 unsigned ResOp) {
2731 SDLoc DL(N);
2732 SDValue Vec = N->getOperand(1);
2733
2734 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2735 Results.push_back(
2736 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2737}
2738
2739static void
2741 SelectionDAG &DAG,
2742 const LoongArchSubtarget &Subtarget) {
2743 switch (N->getConstantOperandVal(0)) {
2744 default:
2745 llvm_unreachable("Unexpected Intrinsic.");
2746 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2747 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2749 break;
2750 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2751 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2752 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2754 break;
2755 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2756 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2758 break;
2759 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2760 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2762 break;
2763 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2764 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2765 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2767 break;
2768 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2769 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2771 break;
2772 case Intrinsic::loongarch_lsx_bz_b:
2773 case Intrinsic::loongarch_lsx_bz_h:
2774 case Intrinsic::loongarch_lsx_bz_w:
2775 case Intrinsic::loongarch_lsx_bz_d:
2776 case Intrinsic::loongarch_lasx_xbz_b:
2777 case Intrinsic::loongarch_lasx_xbz_h:
2778 case Intrinsic::loongarch_lasx_xbz_w:
2779 case Intrinsic::loongarch_lasx_xbz_d:
2780 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2782 break;
2783 case Intrinsic::loongarch_lsx_bz_v:
2784 case Intrinsic::loongarch_lasx_xbz_v:
2785 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2787 break;
2788 case Intrinsic::loongarch_lsx_bnz_b:
2789 case Intrinsic::loongarch_lsx_bnz_h:
2790 case Intrinsic::loongarch_lsx_bnz_w:
2791 case Intrinsic::loongarch_lsx_bnz_d:
2792 case Intrinsic::loongarch_lasx_xbnz_b:
2793 case Intrinsic::loongarch_lasx_xbnz_h:
2794 case Intrinsic::loongarch_lasx_xbnz_w:
2795 case Intrinsic::loongarch_lasx_xbnz_d:
2796 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2798 break;
2799 case Intrinsic::loongarch_lsx_bnz_v:
2800 case Intrinsic::loongarch_lasx_xbnz_v:
2801 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2803 break;
2804 }
2805}
2806
2809 SDLoc DL(N);
2810 EVT VT = N->getValueType(0);
2811 switch (N->getOpcode()) {
2812 default:
2813 llvm_unreachable("Don't know how to legalize this operation");
2814 case ISD::ADD:
2815 case ISD::SUB:
2816 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2817 "Unexpected custom legalisation");
2818 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2819 break;
2820 case ISD::UDIV:
2821 case ISD::UREM:
2822 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2823 "Unexpected custom legalisation");
2824 Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));
2825 break;
2826 case ISD::SHL:
2827 case ISD::SRA:
2828 case ISD::SRL:
2829 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2830 "Unexpected custom legalisation");
2831 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2832 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2833 break;
2834 }
2835 break;
2836 case ISD::ROTL:
2837 case ISD::ROTR:
2838 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2839 "Unexpected custom legalisation");
2840 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2841 break;
2842 case ISD::FP_TO_SINT: {
2843 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2844 "Unexpected custom legalisation");
2845 SDValue Src = N->getOperand(0);
2846 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2847 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2849 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2850 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2851 return;
2852 }
2853 // If the FP type needs to be softened, emit a library call using the 'si'
2854 // version. If we left it to default legalization we'd end up with 'di'.
2855 RTLIB::Libcall LC;
2856 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2857 MakeLibCallOptions CallOptions;
2858 EVT OpVT = Src.getValueType();
2859 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2860 SDValue Chain = SDValue();
2861 SDValue Result;
2862 std::tie(Result, Chain) =
2863 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2864 Results.push_back(Result);
2865 break;
2866 }
2867 case ISD::BITCAST: {
2868 SDValue Src = N->getOperand(0);
2869 EVT SrcVT = Src.getValueType();
2870 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2871 Subtarget.hasBasicF()) {
2872 SDValue Dst =
2873 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2874 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2875 }
2876 break;
2877 }
2878 case ISD::FP_TO_UINT: {
2879 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2880 "Unexpected custom legalisation");
2881 auto &TLI = DAG.getTargetLoweringInfo();
2882 SDValue Tmp1, Tmp2;
2883 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2884 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2885 break;
2886 }
2887 case ISD::BSWAP: {
2888 SDValue Src = N->getOperand(0);
2889 assert((VT == MVT::i16 || VT == MVT::i32) &&
2890 "Unexpected custom legalization");
2891 MVT GRLenVT = Subtarget.getGRLenVT();
2892 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2893 SDValue Tmp;
2894 switch (VT.getSizeInBits()) {
2895 default:
2896 llvm_unreachable("Unexpected operand width");
2897 case 16:
2898 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2899 break;
2900 case 32:
2901 // Only LA64 will get to here due to the size mismatch between VT and
2902 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2903 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2904 break;
2905 }
2906 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2907 break;
2908 }
2909 case ISD::BITREVERSE: {
2910 SDValue Src = N->getOperand(0);
2911 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
2912 "Unexpected custom legalization");
2913 MVT GRLenVT = Subtarget.getGRLenVT();
2914 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2915 SDValue Tmp;
2916 switch (VT.getSizeInBits()) {
2917 default:
2918 llvm_unreachable("Unexpected operand width");
2919 case 8:
2920 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
2921 break;
2922 case 32:
2923 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
2924 break;
2925 }
2926 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2927 break;
2928 }
2929 case ISD::CTLZ:
2930 case ISD::CTTZ: {
2931 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2932 "Unexpected custom legalisation");
2933 Results.push_back(customLegalizeToWOp(N, DAG, 1));
2934 break;
2935 }
2937 SDValue Chain = N->getOperand(0);
2938 SDValue Op2 = N->getOperand(2);
2939 MVT GRLenVT = Subtarget.getGRLenVT();
2940 const StringRef ErrorMsgOOR = "argument out of range";
2941 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2942 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2943
2944 switch (N->getConstantOperandVal(1)) {
2945 default:
2946 llvm_unreachable("Unexpected Intrinsic.");
2947 case Intrinsic::loongarch_movfcsr2gr: {
2948 if (!Subtarget.hasBasicF()) {
2949 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
2950 return;
2951 }
2952 unsigned Imm = Op2->getAsZExtVal();
2953 if (!isUInt<2>(Imm)) {
2954 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2955 return;
2956 }
2957 SDValue MOVFCSR2GRResults = DAG.getNode(
2958 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
2959 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2960 Results.push_back(
2961 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
2962 Results.push_back(MOVFCSR2GRResults.getValue(1));
2963 break;
2964 }
2965#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
2966 case Intrinsic::loongarch_##NAME: { \
2967 SDValue NODE = DAG.getNode( \
2968 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2969 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2970 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2971 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2972 Results.push_back(NODE.getValue(1)); \
2973 break; \
2974 }
2975 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
2976 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
2977 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
2978 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
2979 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
2980 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
2981#undef CRC_CASE_EXT_BINARYOP
2982
2983#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
2984 case Intrinsic::loongarch_##NAME: { \
2985 SDValue NODE = DAG.getNode( \
2986 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2987 {Chain, Op2, \
2988 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2989 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2990 Results.push_back(NODE.getValue(1)); \
2991 break; \
2992 }
2993 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2994 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2995#undef CRC_CASE_EXT_UNARYOP
2996#define CSR_CASE(ID) \
2997 case Intrinsic::loongarch_##ID: { \
2998 if (!Subtarget.is64Bit()) \
2999 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3000 break; \
3001 }
3002 CSR_CASE(csrrd_d);
3003 CSR_CASE(csrwr_d);
3004 CSR_CASE(csrxchg_d);
3005 CSR_CASE(iocsrrd_d);
3006#undef CSR_CASE
3007 case Intrinsic::loongarch_csrrd_w: {
3008 unsigned Imm = Op2->getAsZExtVal();
3009 if (!isUInt<14>(Imm)) {
3010 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3011 return;
3012 }
3013 SDValue CSRRDResults =
3014 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3015 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3016 Results.push_back(
3017 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3018 Results.push_back(CSRRDResults.getValue(1));
3019 break;
3020 }
3021 case Intrinsic::loongarch_csrwr_w: {
3022 unsigned Imm = N->getConstantOperandVal(3);
3023 if (!isUInt<14>(Imm)) {
3024 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3025 return;
3026 }
3027 SDValue CSRWRResults =
3028 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3029 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3030 DAG.getConstant(Imm, DL, GRLenVT)});
3031 Results.push_back(
3032 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3033 Results.push_back(CSRWRResults.getValue(1));
3034 break;
3035 }
3036 case Intrinsic::loongarch_csrxchg_w: {
3037 unsigned Imm = N->getConstantOperandVal(4);
3038 if (!isUInt<14>(Imm)) {
3039 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3040 return;
3041 }
3042 SDValue CSRXCHGResults = DAG.getNode(
3043 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3044 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3045 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3046 DAG.getConstant(Imm, DL, GRLenVT)});
3047 Results.push_back(
3048 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3049 Results.push_back(CSRXCHGResults.getValue(1));
3050 break;
3051 }
3052#define IOCSRRD_CASE(NAME, NODE) \
3053 case Intrinsic::loongarch_##NAME: { \
3054 SDValue IOCSRRDResults = \
3055 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3056 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3057 Results.push_back( \
3058 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3059 Results.push_back(IOCSRRDResults.getValue(1)); \
3060 break; \
3061 }
3062 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3063 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3064 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3065#undef IOCSRRD_CASE
3066 case Intrinsic::loongarch_cpucfg: {
3067 SDValue CPUCFGResults =
3068 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3069 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3070 Results.push_back(
3071 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3072 Results.push_back(CPUCFGResults.getValue(1));
3073 break;
3074 }
3075 case Intrinsic::loongarch_lddir_d: {
3076 if (!Subtarget.is64Bit()) {
3077 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3078 return;
3079 }
3080 break;
3081 }
3082 }
3083 break;
3084 }
3085 case ISD::READ_REGISTER: {
3086 if (Subtarget.is64Bit())
3087 DAG.getContext()->emitError(
3088 "On LA64, only 64-bit registers can be read.");
3089 else
3090 DAG.getContext()->emitError(
3091 "On LA32, only 32-bit registers can be read.");
3092 Results.push_back(DAG.getUNDEF(VT));
3093 Results.push_back(N->getOperand(0));
3094 break;
3095 }
3097 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3098 break;
3099 }
3100 }
3101}
3102
3105 const LoongArchSubtarget &Subtarget) {
3106 if (DCI.isBeforeLegalizeOps())
3107 return SDValue();
3108
3109 SDValue FirstOperand = N->getOperand(0);
3110 SDValue SecondOperand = N->getOperand(1);
3111 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3112 EVT ValTy = N->getValueType(0);
3113 SDLoc DL(N);
3114 uint64_t lsb, msb;
3115 unsigned SMIdx, SMLen;
3116 ConstantSDNode *CN;
3117 SDValue NewOperand;
3118 MVT GRLenVT = Subtarget.getGRLenVT();
3119
3120 // Op's second operand must be a shifted mask.
3121 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3122 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3123 return SDValue();
3124
3125 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3126 // Pattern match BSTRPICK.
3127 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3128 // => BSTRPICK $dst, $src, msb, lsb
3129 // where msb = lsb + len - 1
3130
3131 // The second operand of the shift must be an immediate.
3132 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3133 return SDValue();
3134
3135 lsb = CN->getZExtValue();
3136
3137 // Return if the shifted mask does not start at bit 0 or the sum of its
3138 // length and lsb exceeds the word's size.
3139 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3140 return SDValue();
3141
3142 NewOperand = FirstOperand.getOperand(0);
3143 } else {
3144 // Pattern match BSTRPICK.
3145 // $dst = and $src, (2**len- 1) , if len > 12
3146 // => BSTRPICK $dst, $src, msb, lsb
3147 // where lsb = 0 and msb = len - 1
3148
3149 // If the mask is <= 0xfff, andi can be used instead.
3150 if (CN->getZExtValue() <= 0xfff)
3151 return SDValue();
3152
3153 // Return if the MSB exceeds.
3154 if (SMIdx + SMLen > ValTy.getSizeInBits())
3155 return SDValue();
3156
3157 if (SMIdx > 0) {
3158 // Omit if the constant has more than 2 uses. This a conservative
3159 // decision. Whether it is a win depends on the HW microarchitecture.
3160 // However it should always be better for 1 and 2 uses.
3161 if (CN->use_size() > 2)
3162 return SDValue();
3163 // Return if the constant can be composed by a single LU12I.W.
3164 if ((CN->getZExtValue() & 0xfff) == 0)
3165 return SDValue();
3166 // Return if the constand can be composed by a single ADDI with
3167 // the zero register.
3168 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3169 return SDValue();
3170 }
3171
3172 lsb = SMIdx;
3173 NewOperand = FirstOperand;
3174 }
3175
3176 msb = lsb + SMLen - 1;
3177 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3178 DAG.getConstant(msb, DL, GRLenVT),
3179 DAG.getConstant(lsb, DL, GRLenVT));
3180 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3181 return NR0;
3182 // Try to optimize to
3183 // bstrpick $Rd, $Rs, msb, lsb
3184 // slli $Rd, $Rd, lsb
3185 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3186 DAG.getConstant(lsb, DL, GRLenVT));
3187}
3188
3191 const LoongArchSubtarget &Subtarget) {
3192 if (DCI.isBeforeLegalizeOps())
3193 return SDValue();
3194
3195 // $dst = srl (and $src, Mask), Shamt
3196 // =>
3197 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3198 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3199 //
3200
3201 SDValue FirstOperand = N->getOperand(0);
3202 ConstantSDNode *CN;
3203 EVT ValTy = N->getValueType(0);
3204 SDLoc DL(N);
3205 MVT GRLenVT = Subtarget.getGRLenVT();
3206 unsigned MaskIdx, MaskLen;
3207 uint64_t Shamt;
3208
3209 // The first operand must be an AND and the second operand of the AND must be
3210 // a shifted mask.
3211 if (FirstOperand.getOpcode() != ISD::AND ||
3212 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3213 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3214 return SDValue();
3215
3216 // The second operand (shift amount) must be an immediate.
3217 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3218 return SDValue();
3219
3220 Shamt = CN->getZExtValue();
3221 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3222 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3223 FirstOperand->getOperand(0),
3224 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3225 DAG.getConstant(Shamt, DL, GRLenVT));
3226
3227 return SDValue();
3228}
3229
3232 const LoongArchSubtarget &Subtarget) {
3233 MVT GRLenVT = Subtarget.getGRLenVT();
3234 EVT ValTy = N->getValueType(0);
3235 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3236 ConstantSDNode *CN0, *CN1;
3237 SDLoc DL(N);
3238 unsigned ValBits = ValTy.getSizeInBits();
3239 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3240 unsigned Shamt;
3241 bool SwapAndRetried = false;
3242
3243 if (DCI.isBeforeLegalizeOps())
3244 return SDValue();
3245
3246 if (ValBits != 32 && ValBits != 64)
3247 return SDValue();
3248
3249Retry:
3250 // 1st pattern to match BSTRINS:
3251 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3252 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3253 // =>
3254 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3255 if (N0.getOpcode() == ISD::AND &&
3256 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3257 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3258 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3259 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3260 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3261 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3262 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3263 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3264 (MaskIdx0 + MaskLen0 <= ValBits)) {
3265 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3266 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3267 N1.getOperand(0).getOperand(0),
3268 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3269 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3270 }
3271
3272 // 2nd pattern to match BSTRINS:
3273 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3274 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3275 // =>
3276 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3277 if (N0.getOpcode() == ISD::AND &&
3278 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3279 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3280 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3281 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3282 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3283 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3284 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3285 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3286 (MaskIdx0 + MaskLen0 <= ValBits)) {
3287 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3288 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3289 N1.getOperand(0).getOperand(0),
3290 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3291 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3292 }
3293
3294 // 3rd pattern to match BSTRINS:
3295 // R = or (and X, mask0), (and Y, mask1)
3296 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3297 // =>
3298 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3299 // where msb = lsb + size - 1
3300 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3301 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3302 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3303 (MaskIdx0 + MaskLen0 <= 64) &&
3304 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3305 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3306 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3307 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3308 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3309 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3310 DAG.getConstant(ValBits == 32
3311 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3312 : (MaskIdx0 + MaskLen0 - 1),
3313 DL, GRLenVT),
3314 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3315 }
3316
3317 // 4th pattern to match BSTRINS:
3318 // R = or (and X, mask), (shl Y, shamt)
3319 // where mask = (2**shamt - 1)
3320 // =>
3321 // R = BSTRINS X, Y, ValBits - 1, shamt
3322 // where ValBits = 32 or 64
3323 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3324 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3325 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3326 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3327 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3328 (MaskIdx0 + MaskLen0 <= ValBits)) {
3329 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3330 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3331 N1.getOperand(0),
3332 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3333 DAG.getConstant(Shamt, DL, GRLenVT));
3334 }
3335
3336 // 5th pattern to match BSTRINS:
3337 // R = or (and X, mask), const
3338 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3339 // =>
3340 // R = BSTRINS X, (const >> lsb), msb, lsb
3341 // where msb = lsb + size - 1
3342 if (N0.getOpcode() == ISD::AND &&
3343 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3344 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3345 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3346 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3347 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3348 return DAG.getNode(
3349 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3350 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3351 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3352 : (MaskIdx0 + MaskLen0 - 1),
3353 DL, GRLenVT),
3354 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3355 }
3356
3357 // 6th pattern.
3358 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3359 // by the incoming bits are known to be zero.
3360 // =>
3361 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3362 //
3363 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3364 // pattern is more common than the 1st. So we put the 1st before the 6th in
3365 // order to match as many nodes as possible.
3366 ConstantSDNode *CNMask, *CNShamt;
3367 unsigned MaskIdx, MaskLen;
3368 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3369 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3370 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3371 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3372 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3373 Shamt = CNShamt->getZExtValue();
3374 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3375 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3376 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3377 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3378 N1.getOperand(0).getOperand(0),
3379 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3380 DAG.getConstant(Shamt, DL, GRLenVT));
3381 }
3382 }
3383
3384 // 7th pattern.
3385 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3386 // overwritten by the incoming bits are known to be zero.
3387 // =>
3388 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3389 //
3390 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3391 // before the 7th in order to match as many nodes as possible.
3392 if (N1.getOpcode() == ISD::AND &&
3393 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3394 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3395 N1.getOperand(0).getOpcode() == ISD::SHL &&
3396 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3397 CNShamt->getZExtValue() == MaskIdx) {
3398 APInt ShMask(ValBits, CNMask->getZExtValue());
3399 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3400 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3401 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3402 N1.getOperand(0).getOperand(0),
3403 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3404 DAG.getConstant(MaskIdx, DL, GRLenVT));
3405 }
3406 }
3407
3408 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3409 if (!SwapAndRetried) {
3410 std::swap(N0, N1);
3411 SwapAndRetried = true;
3412 goto Retry;
3413 }
3414
3415 SwapAndRetried = false;
3416Retry2:
3417 // 8th pattern.
3418 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3419 // the incoming bits are known to be zero.
3420 // =>
3421 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3422 //
3423 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3424 // we put it here in order to match as many nodes as possible or generate less
3425 // instructions.
3426 if (N1.getOpcode() == ISD::AND &&
3427 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3428 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3429 APInt ShMask(ValBits, CNMask->getZExtValue());
3430 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3431 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3432 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3433 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3434 N1->getOperand(0),
3435 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3436 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3437 DAG.getConstant(MaskIdx, DL, GRLenVT));
3438 }
3439 }
3440 // Swap N0/N1 and retry.
3441 if (!SwapAndRetried) {
3442 std::swap(N0, N1);
3443 SwapAndRetried = true;
3444 goto Retry2;
3445 }
3446
3447 return SDValue();
3448}
3449
3450static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3451 ExtType = ISD::NON_EXTLOAD;
3452
3453 switch (V.getNode()->getOpcode()) {
3454 case ISD::LOAD: {
3455 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3456 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3457 (LoadNode->getMemoryVT() == MVT::i16)) {
3458 ExtType = LoadNode->getExtensionType();
3459 return true;
3460 }
3461 return false;
3462 }
3463 case ISD::AssertSext: {
3464 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3465 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3466 ExtType = ISD::SEXTLOAD;
3467 return true;
3468 }
3469 return false;
3470 }
3471 case ISD::AssertZext: {
3472 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3473 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3474 ExtType = ISD::ZEXTLOAD;
3475 return true;
3476 }
3477 return false;
3478 }
3479 default:
3480 return false;
3481 }
3482
3483 return false;
3484}
3485
3486// Eliminate redundant truncation and zero-extension nodes.
3487// * Case 1:
3488// +------------+ +------------+ +------------+
3489// | Input1 | | Input2 | | CC |
3490// +------------+ +------------+ +------------+
3491// | | |
3492// V V +----+
3493// +------------+ +------------+ |
3494// | TRUNCATE | | TRUNCATE | |
3495// +------------+ +------------+ |
3496// | | |
3497// V V |
3498// +------------+ +------------+ |
3499// | ZERO_EXT | | ZERO_EXT | |
3500// +------------+ +------------+ |
3501// | | |
3502// | +-------------+ |
3503// V V | |
3504// +----------------+ | |
3505// | AND | | |
3506// +----------------+ | |
3507// | | |
3508// +---------------+ | |
3509// | | |
3510// V V V
3511// +-------------+
3512// | CMP |
3513// +-------------+
3514// * Case 2:
3515// +------------+ +------------+ +-------------+ +------------+ +------------+
3516// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3517// +------------+ +------------+ +-------------+ +------------+ +------------+
3518// | | | | |
3519// V | | | |
3520// +------------+ | | | |
3521// | XOR |<---------------------+ | |
3522// +------------+ | | |
3523// | | | |
3524// V V +---------------+ |
3525// +------------+ +------------+ | |
3526// | TRUNCATE | | TRUNCATE | | +-------------------------+
3527// +------------+ +------------+ | |
3528// | | | |
3529// V V | |
3530// +------------+ +------------+ | |
3531// | ZERO_EXT | | ZERO_EXT | | |
3532// +------------+ +------------+ | |
3533// | | | |
3534// V V | |
3535// +----------------+ | |
3536// | AND | | |
3537// +----------------+ | |
3538// | | |
3539// +---------------+ | |
3540// | | |
3541// V V V
3542// +-------------+
3543// | CMP |
3544// +-------------+
3547 const LoongArchSubtarget &Subtarget) {
3548 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3549
3550 SDNode *AndNode = N->getOperand(0).getNode();
3551 if (AndNode->getOpcode() != ISD::AND)
3552 return SDValue();
3553
3554 SDValue AndInputValue2 = AndNode->getOperand(1);
3555 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3556 return SDValue();
3557
3558 SDValue CmpInputValue = N->getOperand(1);
3559 SDValue AndInputValue1 = AndNode->getOperand(0);
3560 if (AndInputValue1.getOpcode() == ISD::XOR) {
3561 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3562 return SDValue();
3563 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3564 if (!CN || CN->getSExtValue() != -1)
3565 return SDValue();
3566 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3567 if (!CN || CN->getSExtValue() != 0)
3568 return SDValue();
3569 AndInputValue1 = AndInputValue1.getOperand(0);
3570 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3571 return SDValue();
3572 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3573 if (AndInputValue2 != CmpInputValue)
3574 return SDValue();
3575 } else {
3576 return SDValue();
3577 }
3578
3579 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3580 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3581 return SDValue();
3582
3583 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3584 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3585 return SDValue();
3586
3587 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3588 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3589 ISD::LoadExtType ExtType1;
3590 ISD::LoadExtType ExtType2;
3591
3592 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3593 !checkValueWidth(TruncInputValue2, ExtType2))
3594 return SDValue();
3595
3596 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3597 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3598 return SDValue();
3599
3600 if ((ExtType2 != ISD::ZEXTLOAD) &&
3601 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3602 return SDValue();
3603
3604 // These truncation and zero-extension nodes are not necessary, remove them.
3605 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3606 TruncInputValue1, TruncInputValue2);
3607 SDValue NewSetCC =
3608 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3609 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3610 return SDValue(N, 0);
3611}
3612
3613// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3616 const LoongArchSubtarget &Subtarget) {
3617 if (DCI.isBeforeLegalizeOps())
3618 return SDValue();
3619
3620 SDValue Src = N->getOperand(0);
3621 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3622 return SDValue();
3623
3624 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3625 Src.getOperand(0));
3626}
3627
3628template <unsigned N>
3630 SelectionDAG &DAG,
3631 const LoongArchSubtarget &Subtarget,
3632 bool IsSigned = false) {
3633 SDLoc DL(Node);
3634 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3635 // Check the ImmArg.
3636 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3637 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3638 DAG.getContext()->emitError(Node->getOperationName(0) +
3639 ": argument out of range.");
3640 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3641 }
3642 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3643}
3644
3645template <unsigned N>
3646static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3647 SelectionDAG &DAG, bool IsSigned = false) {
3648 SDLoc DL(Node);
3649 EVT ResTy = Node->getValueType(0);
3650 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3651
3652 // Check the ImmArg.
3653 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3654 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3655 DAG.getContext()->emitError(Node->getOperationName(0) +
3656 ": argument out of range.");
3657 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3658 }
3659 return DAG.getConstant(
3661 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3662 DL, ResTy);
3663}
3664
3666 SDLoc DL(Node);
3667 EVT ResTy = Node->getValueType(0);
3668 SDValue Vec = Node->getOperand(2);
3669 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3670 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3671}
3672
3674 SDLoc DL(Node);
3675 EVT ResTy = Node->getValueType(0);
3676 SDValue One = DAG.getConstant(1, DL, ResTy);
3677 SDValue Bit =
3678 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3679
3680 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3681 DAG.getNOT(DL, Bit, ResTy));
3682}
3683
3684template <unsigned N>
3686 SDLoc DL(Node);
3687 EVT ResTy = Node->getValueType(0);
3688 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3689 // Check the unsigned ImmArg.
3690 if (!isUInt<N>(CImm->getZExtValue())) {
3691 DAG.getContext()->emitError(Node->getOperationName(0) +
3692 ": argument out of range.");
3693 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3694 }
3695
3696 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3697 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3698
3699 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3700}
3701
3702template <unsigned N>
3704 SDLoc DL(Node);
3705 EVT ResTy = Node->getValueType(0);
3706 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3707 // Check the unsigned ImmArg.
3708 if (!isUInt<N>(CImm->getZExtValue())) {
3709 DAG.getContext()->emitError(Node->getOperationName(0) +
3710 ": argument out of range.");
3711 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3712 }
3713
3714 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3715 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3716 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3717}
3718
3719template <unsigned N>
3721 SDLoc DL(Node);
3722 EVT ResTy = Node->getValueType(0);
3723 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3724 // Check the unsigned ImmArg.
3725 if (!isUInt<N>(CImm->getZExtValue())) {
3726 DAG.getContext()->emitError(Node->getOperationName(0) +
3727 ": argument out of range.");
3728 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3729 }
3730
3731 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3732 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3733 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3734}
3735
3736static SDValue
3739 const LoongArchSubtarget &Subtarget) {
3740 SDLoc DL(N);
3741 switch (N->getConstantOperandVal(0)) {
3742 default:
3743 break;
3744 case Intrinsic::loongarch_lsx_vadd_b:
3745 case Intrinsic::loongarch_lsx_vadd_h:
3746 case Intrinsic::loongarch_lsx_vadd_w:
3747 case Intrinsic::loongarch_lsx_vadd_d:
3748 case Intrinsic::loongarch_lasx_xvadd_b:
3749 case Intrinsic::loongarch_lasx_xvadd_h:
3750 case Intrinsic::loongarch_lasx_xvadd_w:
3751 case Intrinsic::loongarch_lasx_xvadd_d:
3752 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3753 N->getOperand(2));
3754 case Intrinsic::loongarch_lsx_vaddi_bu:
3755 case Intrinsic::loongarch_lsx_vaddi_hu:
3756 case Intrinsic::loongarch_lsx_vaddi_wu:
3757 case Intrinsic::loongarch_lsx_vaddi_du:
3758 case Intrinsic::loongarch_lasx_xvaddi_bu:
3759 case Intrinsic::loongarch_lasx_xvaddi_hu:
3760 case Intrinsic::loongarch_lasx_xvaddi_wu:
3761 case Intrinsic::loongarch_lasx_xvaddi_du:
3762 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3763 lowerVectorSplatImm<5>(N, 2, DAG));
3764 case Intrinsic::loongarch_lsx_vsub_b:
3765 case Intrinsic::loongarch_lsx_vsub_h:
3766 case Intrinsic::loongarch_lsx_vsub_w:
3767 case Intrinsic::loongarch_lsx_vsub_d:
3768 case Intrinsic::loongarch_lasx_xvsub_b:
3769 case Intrinsic::loongarch_lasx_xvsub_h:
3770 case Intrinsic::loongarch_lasx_xvsub_w:
3771 case Intrinsic::loongarch_lasx_xvsub_d:
3772 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3773 N->getOperand(2));
3774 case Intrinsic::loongarch_lsx_vsubi_bu:
3775 case Intrinsic::loongarch_lsx_vsubi_hu:
3776 case Intrinsic::loongarch_lsx_vsubi_wu:
3777 case Intrinsic::loongarch_lsx_vsubi_du:
3778 case Intrinsic::loongarch_lasx_xvsubi_bu:
3779 case Intrinsic::loongarch_lasx_xvsubi_hu:
3780 case Intrinsic::loongarch_lasx_xvsubi_wu:
3781 case Intrinsic::loongarch_lasx_xvsubi_du:
3782 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3783 lowerVectorSplatImm<5>(N, 2, DAG));
3784 case Intrinsic::loongarch_lsx_vneg_b:
3785 case Intrinsic::loongarch_lsx_vneg_h:
3786 case Intrinsic::loongarch_lsx_vneg_w:
3787 case Intrinsic::loongarch_lsx_vneg_d:
3788 case Intrinsic::loongarch_lasx_xvneg_b:
3789 case Intrinsic::loongarch_lasx_xvneg_h:
3790 case Intrinsic::loongarch_lasx_xvneg_w:
3791 case Intrinsic::loongarch_lasx_xvneg_d:
3792 return DAG.getNode(
3793 ISD::SUB, DL, N->getValueType(0),
3794 DAG.getConstant(
3795 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3796 /*isSigned=*/true),
3797 SDLoc(N), N->getValueType(0)),
3798 N->getOperand(1));
3799 case Intrinsic::loongarch_lsx_vmax_b:
3800 case Intrinsic::loongarch_lsx_vmax_h:
3801 case Intrinsic::loongarch_lsx_vmax_w:
3802 case Intrinsic::loongarch_lsx_vmax_d:
3803 case Intrinsic::loongarch_lasx_xvmax_b:
3804 case Intrinsic::loongarch_lasx_xvmax_h:
3805 case Intrinsic::loongarch_lasx_xvmax_w:
3806 case Intrinsic::loongarch_lasx_xvmax_d:
3807 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3808 N->getOperand(2));
3809 case Intrinsic::loongarch_lsx_vmax_bu:
3810 case Intrinsic::loongarch_lsx_vmax_hu:
3811 case Intrinsic::loongarch_lsx_vmax_wu:
3812 case Intrinsic::loongarch_lsx_vmax_du:
3813 case Intrinsic::loongarch_lasx_xvmax_bu:
3814 case Intrinsic::loongarch_lasx_xvmax_hu:
3815 case Intrinsic::loongarch_lasx_xvmax_wu:
3816 case Intrinsic::loongarch_lasx_xvmax_du:
3817 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3818 N->getOperand(2));
3819 case Intrinsic::loongarch_lsx_vmaxi_b:
3820 case Intrinsic::loongarch_lsx_vmaxi_h:
3821 case Intrinsic::loongarch_lsx_vmaxi_w:
3822 case Intrinsic::loongarch_lsx_vmaxi_d:
3823 case Intrinsic::loongarch_lasx_xvmaxi_b:
3824 case Intrinsic::loongarch_lasx_xvmaxi_h:
3825 case Intrinsic::loongarch_lasx_xvmaxi_w:
3826 case Intrinsic::loongarch_lasx_xvmaxi_d:
3827 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3828 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3829 case Intrinsic::loongarch_lsx_vmaxi_bu:
3830 case Intrinsic::loongarch_lsx_vmaxi_hu:
3831 case Intrinsic::loongarch_lsx_vmaxi_wu:
3832 case Intrinsic::loongarch_lsx_vmaxi_du:
3833 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3834 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3835 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3836 case Intrinsic::loongarch_lasx_xvmaxi_du:
3837 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3838 lowerVectorSplatImm<5>(N, 2, DAG));
3839 case Intrinsic::loongarch_lsx_vmin_b:
3840 case Intrinsic::loongarch_lsx_vmin_h:
3841 case Intrinsic::loongarch_lsx_vmin_w:
3842 case Intrinsic::loongarch_lsx_vmin_d:
3843 case Intrinsic::loongarch_lasx_xvmin_b:
3844 case Intrinsic::loongarch_lasx_xvmin_h:
3845 case Intrinsic::loongarch_lasx_xvmin_w:
3846 case Intrinsic::loongarch_lasx_xvmin_d:
3847 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3848 N->getOperand(2));
3849 case Intrinsic::loongarch_lsx_vmin_bu:
3850 case Intrinsic::loongarch_lsx_vmin_hu:
3851 case Intrinsic::loongarch_lsx_vmin_wu:
3852 case Intrinsic::loongarch_lsx_vmin_du:
3853 case Intrinsic::loongarch_lasx_xvmin_bu:
3854 case Intrinsic::loongarch_lasx_xvmin_hu:
3855 case Intrinsic::loongarch_lasx_xvmin_wu:
3856 case Intrinsic::loongarch_lasx_xvmin_du:
3857 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3858 N->getOperand(2));
3859 case Intrinsic::loongarch_lsx_vmini_b:
3860 case Intrinsic::loongarch_lsx_vmini_h:
3861 case Intrinsic::loongarch_lsx_vmini_w:
3862 case Intrinsic::loongarch_lsx_vmini_d:
3863 case Intrinsic::loongarch_lasx_xvmini_b:
3864 case Intrinsic::loongarch_lasx_xvmini_h:
3865 case Intrinsic::loongarch_lasx_xvmini_w:
3866 case Intrinsic::loongarch_lasx_xvmini_d:
3867 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3868 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3869 case Intrinsic::loongarch_lsx_vmini_bu:
3870 case Intrinsic::loongarch_lsx_vmini_hu:
3871 case Intrinsic::loongarch_lsx_vmini_wu:
3872 case Intrinsic::loongarch_lsx_vmini_du:
3873 case Intrinsic::loongarch_lasx_xvmini_bu:
3874 case Intrinsic::loongarch_lasx_xvmini_hu:
3875 case Intrinsic::loongarch_lasx_xvmini_wu:
3876 case Intrinsic::loongarch_lasx_xvmini_du:
3877 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3878 lowerVectorSplatImm<5>(N, 2, DAG));
3879 case Intrinsic::loongarch_lsx_vmul_b:
3880 case Intrinsic::loongarch_lsx_vmul_h:
3881 case Intrinsic::loongarch_lsx_vmul_w:
3882 case Intrinsic::loongarch_lsx_vmul_d:
3883 case Intrinsic::loongarch_lasx_xvmul_b:
3884 case Intrinsic::loongarch_lasx_xvmul_h:
3885 case Intrinsic::loongarch_lasx_xvmul_w:
3886 case Intrinsic::loongarch_lasx_xvmul_d:
3887 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3888 N->getOperand(2));
3889 case Intrinsic::loongarch_lsx_vmadd_b:
3890 case Intrinsic::loongarch_lsx_vmadd_h:
3891 case Intrinsic::loongarch_lsx_vmadd_w:
3892 case Intrinsic::loongarch_lsx_vmadd_d:
3893 case Intrinsic::loongarch_lasx_xvmadd_b:
3894 case Intrinsic::loongarch_lasx_xvmadd_h:
3895 case Intrinsic::loongarch_lasx_xvmadd_w:
3896 case Intrinsic::loongarch_lasx_xvmadd_d: {
3897 EVT ResTy = N->getValueType(0);
3898 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
3899 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3900 N->getOperand(3)));
3901 }
3902 case Intrinsic::loongarch_lsx_vmsub_b:
3903 case Intrinsic::loongarch_lsx_vmsub_h:
3904 case Intrinsic::loongarch_lsx_vmsub_w:
3905 case Intrinsic::loongarch_lsx_vmsub_d:
3906 case Intrinsic::loongarch_lasx_xvmsub_b:
3907 case Intrinsic::loongarch_lasx_xvmsub_h:
3908 case Intrinsic::loongarch_lasx_xvmsub_w:
3909 case Intrinsic::loongarch_lasx_xvmsub_d: {
3910 EVT ResTy = N->getValueType(0);
3911 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
3912 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3913 N->getOperand(3)));
3914 }
3915 case Intrinsic::loongarch_lsx_vdiv_b:
3916 case Intrinsic::loongarch_lsx_vdiv_h:
3917 case Intrinsic::loongarch_lsx_vdiv_w:
3918 case Intrinsic::loongarch_lsx_vdiv_d:
3919 case Intrinsic::loongarch_lasx_xvdiv_b:
3920 case Intrinsic::loongarch_lasx_xvdiv_h:
3921 case Intrinsic::loongarch_lasx_xvdiv_w:
3922 case Intrinsic::loongarch_lasx_xvdiv_d:
3923 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
3924 N->getOperand(2));
3925 case Intrinsic::loongarch_lsx_vdiv_bu:
3926 case Intrinsic::loongarch_lsx_vdiv_hu:
3927 case Intrinsic::loongarch_lsx_vdiv_wu:
3928 case Intrinsic::loongarch_lsx_vdiv_du:
3929 case Intrinsic::loongarch_lasx_xvdiv_bu:
3930 case Intrinsic::loongarch_lasx_xvdiv_hu:
3931 case Intrinsic::loongarch_lasx_xvdiv_wu:
3932 case Intrinsic::loongarch_lasx_xvdiv_du:
3933 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
3934 N->getOperand(2));
3935 case Intrinsic::loongarch_lsx_vmod_b:
3936 case Intrinsic::loongarch_lsx_vmod_h:
3937 case Intrinsic::loongarch_lsx_vmod_w:
3938 case Intrinsic::loongarch_lsx_vmod_d:
3939 case Intrinsic::loongarch_lasx_xvmod_b:
3940 case Intrinsic::loongarch_lasx_xvmod_h:
3941 case Intrinsic::loongarch_lasx_xvmod_w:
3942 case Intrinsic::loongarch_lasx_xvmod_d:
3943 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
3944 N->getOperand(2));
3945 case Intrinsic::loongarch_lsx_vmod_bu:
3946 case Intrinsic::loongarch_lsx_vmod_hu:
3947 case Intrinsic::loongarch_lsx_vmod_wu:
3948 case Intrinsic::loongarch_lsx_vmod_du:
3949 case Intrinsic::loongarch_lasx_xvmod_bu:
3950 case Intrinsic::loongarch_lasx_xvmod_hu:
3951 case Intrinsic::loongarch_lasx_xvmod_wu:
3952 case Intrinsic::loongarch_lasx_xvmod_du:
3953 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
3954 N->getOperand(2));
3955 case Intrinsic::loongarch_lsx_vand_v:
3956 case Intrinsic::loongarch_lasx_xvand_v:
3957 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3958 N->getOperand(2));
3959 case Intrinsic::loongarch_lsx_vor_v:
3960 case Intrinsic::loongarch_lasx_xvor_v:
3961 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3962 N->getOperand(2));
3963 case Intrinsic::loongarch_lsx_vxor_v:
3964 case Intrinsic::loongarch_lasx_xvxor_v:
3965 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3966 N->getOperand(2));
3967 case Intrinsic::loongarch_lsx_vnor_v:
3968 case Intrinsic::loongarch_lasx_xvnor_v: {
3969 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3970 N->getOperand(2));
3971 return DAG.getNOT(DL, Res, Res->getValueType(0));
3972 }
3973 case Intrinsic::loongarch_lsx_vandi_b:
3974 case Intrinsic::loongarch_lasx_xvandi_b:
3975 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3976 lowerVectorSplatImm<8>(N, 2, DAG));
3977 case Intrinsic::loongarch_lsx_vori_b:
3978 case Intrinsic::loongarch_lasx_xvori_b:
3979 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3980 lowerVectorSplatImm<8>(N, 2, DAG));
3981 case Intrinsic::loongarch_lsx_vxori_b:
3982 case Intrinsic::loongarch_lasx_xvxori_b:
3983 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3984 lowerVectorSplatImm<8>(N, 2, DAG));
3985 case Intrinsic::loongarch_lsx_vsll_b:
3986 case Intrinsic::loongarch_lsx_vsll_h:
3987 case Intrinsic::loongarch_lsx_vsll_w:
3988 case Intrinsic::loongarch_lsx_vsll_d:
3989 case Intrinsic::loongarch_lasx_xvsll_b:
3990 case Intrinsic::loongarch_lasx_xvsll_h:
3991 case Intrinsic::loongarch_lasx_xvsll_w:
3992 case Intrinsic::loongarch_lasx_xvsll_d:
3993 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3994 truncateVecElts(N, DAG));
3995 case Intrinsic::loongarch_lsx_vslli_b:
3996 case Intrinsic::loongarch_lasx_xvslli_b:
3997 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3998 lowerVectorSplatImm<3>(N, 2, DAG));
3999 case Intrinsic::loongarch_lsx_vslli_h:
4000 case Intrinsic::loongarch_lasx_xvslli_h:
4001 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4002 lowerVectorSplatImm<4>(N, 2, DAG));
4003 case Intrinsic::loongarch_lsx_vslli_w:
4004 case Intrinsic::loongarch_lasx_xvslli_w:
4005 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4006 lowerVectorSplatImm<5>(N, 2, DAG));
4007 case Intrinsic::loongarch_lsx_vslli_d:
4008 case Intrinsic::loongarch_lasx_xvslli_d:
4009 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4010 lowerVectorSplatImm<6>(N, 2, DAG));
4011 case Intrinsic::loongarch_lsx_vsrl_b:
4012 case Intrinsic::loongarch_lsx_vsrl_h:
4013 case Intrinsic::loongarch_lsx_vsrl_w:
4014 case Intrinsic::loongarch_lsx_vsrl_d:
4015 case Intrinsic::loongarch_lasx_xvsrl_b:
4016 case Intrinsic::loongarch_lasx_xvsrl_h:
4017 case Intrinsic::loongarch_lasx_xvsrl_w:
4018 case Intrinsic::loongarch_lasx_xvsrl_d:
4019 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4020 truncateVecElts(N, DAG));
4021 case Intrinsic::loongarch_lsx_vsrli_b:
4022 case Intrinsic::loongarch_lasx_xvsrli_b:
4023 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4024 lowerVectorSplatImm<3>(N, 2, DAG));
4025 case Intrinsic::loongarch_lsx_vsrli_h:
4026 case Intrinsic::loongarch_lasx_xvsrli_h:
4027 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4028 lowerVectorSplatImm<4>(N, 2, DAG));
4029 case Intrinsic::loongarch_lsx_vsrli_w:
4030 case Intrinsic::loongarch_lasx_xvsrli_w:
4031 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4032 lowerVectorSplatImm<5>(N, 2, DAG));
4033 case Intrinsic::loongarch_lsx_vsrli_d:
4034 case Intrinsic::loongarch_lasx_xvsrli_d:
4035 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4036 lowerVectorSplatImm<6>(N, 2, DAG));
4037 case Intrinsic::loongarch_lsx_vsra_b:
4038 case Intrinsic::loongarch_lsx_vsra_h:
4039 case Intrinsic::loongarch_lsx_vsra_w:
4040 case Intrinsic::loongarch_lsx_vsra_d:
4041 case Intrinsic::loongarch_lasx_xvsra_b:
4042 case Intrinsic::loongarch_lasx_xvsra_h:
4043 case Intrinsic::loongarch_lasx_xvsra_w:
4044 case Intrinsic::loongarch_lasx_xvsra_d:
4045 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4046 truncateVecElts(N, DAG));
4047 case Intrinsic::loongarch_lsx_vsrai_b:
4048 case Intrinsic::loongarch_lasx_xvsrai_b:
4049 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4050 lowerVectorSplatImm<3>(N, 2, DAG));
4051 case Intrinsic::loongarch_lsx_vsrai_h:
4052 case Intrinsic::loongarch_lasx_xvsrai_h:
4053 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4054 lowerVectorSplatImm<4>(N, 2, DAG));
4055 case Intrinsic::loongarch_lsx_vsrai_w:
4056 case Intrinsic::loongarch_lasx_xvsrai_w:
4057 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4058 lowerVectorSplatImm<5>(N, 2, DAG));
4059 case Intrinsic::loongarch_lsx_vsrai_d:
4060 case Intrinsic::loongarch_lasx_xvsrai_d:
4061 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4062 lowerVectorSplatImm<6>(N, 2, DAG));
4063 case Intrinsic::loongarch_lsx_vclz_b:
4064 case Intrinsic::loongarch_lsx_vclz_h:
4065 case Intrinsic::loongarch_lsx_vclz_w:
4066 case Intrinsic::loongarch_lsx_vclz_d:
4067 case Intrinsic::loongarch_lasx_xvclz_b:
4068 case Intrinsic::loongarch_lasx_xvclz_h:
4069 case Intrinsic::loongarch_lasx_xvclz_w:
4070 case Intrinsic::loongarch_lasx_xvclz_d:
4071 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4072 case Intrinsic::loongarch_lsx_vpcnt_b:
4073 case Intrinsic::loongarch_lsx_vpcnt_h:
4074 case Intrinsic::loongarch_lsx_vpcnt_w:
4075 case Intrinsic::loongarch_lsx_vpcnt_d:
4076 case Intrinsic::loongarch_lasx_xvpcnt_b:
4077 case Intrinsic::loongarch_lasx_xvpcnt_h:
4078 case Intrinsic::loongarch_lasx_xvpcnt_w:
4079 case Intrinsic::loongarch_lasx_xvpcnt_d:
4080 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4081 case Intrinsic::loongarch_lsx_vbitclr_b:
4082 case Intrinsic::loongarch_lsx_vbitclr_h:
4083 case Intrinsic::loongarch_lsx_vbitclr_w:
4084 case Intrinsic::loongarch_lsx_vbitclr_d:
4085 case Intrinsic::loongarch_lasx_xvbitclr_b:
4086 case Intrinsic::loongarch_lasx_xvbitclr_h:
4087 case Intrinsic::loongarch_lasx_xvbitclr_w:
4088 case Intrinsic::loongarch_lasx_xvbitclr_d:
4089 return lowerVectorBitClear(N, DAG);
4090 case Intrinsic::loongarch_lsx_vbitclri_b:
4091 case Intrinsic::loongarch_lasx_xvbitclri_b:
4092 return lowerVectorBitClearImm<3>(N, DAG);
4093 case Intrinsic::loongarch_lsx_vbitclri_h:
4094 case Intrinsic::loongarch_lasx_xvbitclri_h:
4095 return lowerVectorBitClearImm<4>(N, DAG);
4096 case Intrinsic::loongarch_lsx_vbitclri_w:
4097 case Intrinsic::loongarch_lasx_xvbitclri_w:
4098 return lowerVectorBitClearImm<5>(N, DAG);
4099 case Intrinsic::loongarch_lsx_vbitclri_d:
4100 case Intrinsic::loongarch_lasx_xvbitclri_d:
4101 return lowerVectorBitClearImm<6>(N, DAG);
4102 case Intrinsic::loongarch_lsx_vbitset_b:
4103 case Intrinsic::loongarch_lsx_vbitset_h:
4104 case Intrinsic::loongarch_lsx_vbitset_w:
4105 case Intrinsic::loongarch_lsx_vbitset_d:
4106 case Intrinsic::loongarch_lasx_xvbitset_b:
4107 case Intrinsic::loongarch_lasx_xvbitset_h:
4108 case Intrinsic::loongarch_lasx_xvbitset_w:
4109 case Intrinsic::loongarch_lasx_xvbitset_d: {
4110 EVT VecTy = N->getValueType(0);
4111 SDValue One = DAG.getConstant(1, DL, VecTy);
4112 return DAG.getNode(
4113 ISD::OR, DL, VecTy, N->getOperand(1),
4114 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4115 }
4116 case Intrinsic::loongarch_lsx_vbitseti_b:
4117 case Intrinsic::loongarch_lasx_xvbitseti_b:
4118 return lowerVectorBitSetImm<3>(N, DAG);
4119 case Intrinsic::loongarch_lsx_vbitseti_h:
4120 case Intrinsic::loongarch_lasx_xvbitseti_h:
4121 return lowerVectorBitSetImm<4>(N, DAG);
4122 case Intrinsic::loongarch_lsx_vbitseti_w:
4123 case Intrinsic::loongarch_lasx_xvbitseti_w:
4124 return lowerVectorBitSetImm<5>(N, DAG);
4125 case Intrinsic::loongarch_lsx_vbitseti_d:
4126 case Intrinsic::loongarch_lasx_xvbitseti_d:
4127 return lowerVectorBitSetImm<6>(N, DAG);
4128 case Intrinsic::loongarch_lsx_vbitrev_b:
4129 case Intrinsic::loongarch_lsx_vbitrev_h:
4130 case Intrinsic::loongarch_lsx_vbitrev_w:
4131 case Intrinsic::loongarch_lsx_vbitrev_d:
4132 case Intrinsic::loongarch_lasx_xvbitrev_b:
4133 case Intrinsic::loongarch_lasx_xvbitrev_h:
4134 case Intrinsic::loongarch_lasx_xvbitrev_w:
4135 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4136 EVT VecTy = N->getValueType(0);
4137 SDValue One = DAG.getConstant(1, DL, VecTy);
4138 return DAG.getNode(
4139 ISD::XOR, DL, VecTy, N->getOperand(1),
4140 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4141 }
4142 case Intrinsic::loongarch_lsx_vbitrevi_b:
4143 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4144 return lowerVectorBitRevImm<3>(N, DAG);
4145 case Intrinsic::loongarch_lsx_vbitrevi_h:
4146 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4147 return lowerVectorBitRevImm<4>(N, DAG);
4148 case Intrinsic::loongarch_lsx_vbitrevi_w:
4149 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4150 return lowerVectorBitRevImm<5>(N, DAG);
4151 case Intrinsic::loongarch_lsx_vbitrevi_d:
4152 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4153 return lowerVectorBitRevImm<6>(N, DAG);
4154 case Intrinsic::loongarch_lsx_vfadd_s:
4155 case Intrinsic::loongarch_lsx_vfadd_d:
4156 case Intrinsic::loongarch_lasx_xvfadd_s:
4157 case Intrinsic::loongarch_lasx_xvfadd_d:
4158 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4159 N->getOperand(2));
4160 case Intrinsic::loongarch_lsx_vfsub_s:
4161 case Intrinsic::loongarch_lsx_vfsub_d:
4162 case Intrinsic::loongarch_lasx_xvfsub_s:
4163 case Intrinsic::loongarch_lasx_xvfsub_d:
4164 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4165 N->getOperand(2));
4166 case Intrinsic::loongarch_lsx_vfmul_s:
4167 case Intrinsic::loongarch_lsx_vfmul_d:
4168 case Intrinsic::loongarch_lasx_xvfmul_s:
4169 case Intrinsic::loongarch_lasx_xvfmul_d:
4170 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4171 N->getOperand(2));
4172 case Intrinsic::loongarch_lsx_vfdiv_s:
4173 case Intrinsic::loongarch_lsx_vfdiv_d:
4174 case Intrinsic::loongarch_lasx_xvfdiv_s:
4175 case Intrinsic::loongarch_lasx_xvfdiv_d:
4176 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4177 N->getOperand(2));
4178 case Intrinsic::loongarch_lsx_vfmadd_s:
4179 case Intrinsic::loongarch_lsx_vfmadd_d:
4180 case Intrinsic::loongarch_lasx_xvfmadd_s:
4181 case Intrinsic::loongarch_lasx_xvfmadd_d:
4182 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4183 N->getOperand(2), N->getOperand(3));
4184 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4185 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4186 N->getOperand(1), N->getOperand(2),
4187 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4188 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4189 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4190 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4191 N->getOperand(1), N->getOperand(2),
4192 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4193 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4194 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4195 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4196 N->getOperand(1), N->getOperand(2),
4197 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4198 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4199 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4200 N->getOperand(1), N->getOperand(2),
4201 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4202 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4203 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4204 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4205 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4206 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4207 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4208 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4209 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
4210 EVT ResTy = N->getValueType(0);
4211 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
4212 return DAG.getBuildVector(ResTy, DL, Ops);
4213 }
4214 case Intrinsic::loongarch_lsx_vreplve_b:
4215 case Intrinsic::loongarch_lsx_vreplve_h:
4216 case Intrinsic::loongarch_lsx_vreplve_w:
4217 case Intrinsic::loongarch_lsx_vreplve_d:
4218 case Intrinsic::loongarch_lasx_xvreplve_b:
4219 case Intrinsic::loongarch_lasx_xvreplve_h:
4220 case Intrinsic::loongarch_lasx_xvreplve_w:
4221 case Intrinsic::loongarch_lasx_xvreplve_d:
4222 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4223 N->getOperand(1),
4224 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4225 N->getOperand(2)));
4226 }
4227 return SDValue();
4228}
4229
4231 DAGCombinerInfo &DCI) const {
4232 SelectionDAG &DAG = DCI.DAG;
4233 switch (N->getOpcode()) {
4234 default:
4235 break;
4236 case ISD::AND:
4237 return performANDCombine(N, DAG, DCI, Subtarget);
4238 case ISD::OR:
4239 return performORCombine(N, DAG, DCI, Subtarget);
4240 case ISD::SETCC:
4241 return performSETCCCombine(N, DAG, DCI, Subtarget);
4242 case ISD::SRL:
4243 return performSRLCombine(N, DAG, DCI, Subtarget);
4245 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4247 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4248 }
4249 return SDValue();
4250}
4251
4254 if (!ZeroDivCheck)
4255 return MBB;
4256
4257 // Build instructions:
4258 // MBB:
4259 // div(or mod) $dst, $dividend, $divisor
4260 // bnez $divisor, SinkMBB
4261 // BreakMBB:
4262 // break 7 // BRK_DIVZERO
4263 // SinkMBB:
4264 // fallthrough
4265 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4267 MachineFunction *MF = MBB->getParent();
4268 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4269 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4270 MF->insert(It, BreakMBB);
4271 MF->insert(It, SinkMBB);
4272
4273 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4274 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4275 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4276
4277 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4278 DebugLoc DL = MI.getDebugLoc();
4279 MachineOperand &Divisor = MI.getOperand(2);
4280 Register DivisorReg = Divisor.getReg();
4281
4282 // MBB:
4283 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4284 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4285 .addMBB(SinkMBB);
4286 MBB->addSuccessor(BreakMBB);
4287 MBB->addSuccessor(SinkMBB);
4288
4289 // BreakMBB:
4290 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4291 // definition of BRK_DIVZERO.
4292 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4293 BreakMBB->addSuccessor(SinkMBB);
4294
4295 // Clear Divisor's kill flag.
4296 Divisor.setIsKill(false);
4297
4298 return SinkMBB;
4299}
4300
4301static MachineBasicBlock *
4303 const LoongArchSubtarget &Subtarget) {
4304 unsigned CondOpc;
4305 switch (MI.getOpcode()) {
4306 default:
4307 llvm_unreachable("Unexpected opcode");
4308 case LoongArch::PseudoVBZ:
4309 CondOpc = LoongArch::VSETEQZ_V;
4310 break;
4311 case LoongArch::PseudoVBZ_B:
4312 CondOpc = LoongArch::VSETANYEQZ_B;
4313 break;
4314 case LoongArch::PseudoVBZ_H:
4315 CondOpc = LoongArch::VSETANYEQZ_H;
4316 break;
4317 case LoongArch::PseudoVBZ_W:
4318 CondOpc = LoongArch::VSETANYEQZ_W;
4319 break;
4320 case LoongArch::PseudoVBZ_D:
4321 CondOpc = LoongArch::VSETANYEQZ_D;
4322 break;
4323 case LoongArch::PseudoVBNZ:
4324 CondOpc = LoongArch::VSETNEZ_V;
4325 break;
4326 case LoongArch::PseudoVBNZ_B:
4327 CondOpc = LoongArch::VSETALLNEZ_B;
4328 break;
4329 case LoongArch::PseudoVBNZ_H:
4330 CondOpc = LoongArch::VSETALLNEZ_H;
4331 break;
4332 case LoongArch::PseudoVBNZ_W:
4333 CondOpc = LoongArch::VSETALLNEZ_W;
4334 break;
4335 case LoongArch::PseudoVBNZ_D:
4336 CondOpc = LoongArch::VSETALLNEZ_D;
4337 break;
4338 case LoongArch::PseudoXVBZ:
4339 CondOpc = LoongArch::XVSETEQZ_V;
4340 break;
4341 case LoongArch::PseudoXVBZ_B:
4342 CondOpc = LoongArch::XVSETANYEQZ_B;
4343 break;
4344 case LoongArch::PseudoXVBZ_H:
4345 CondOpc = LoongArch::XVSETANYEQZ_H;
4346 break;
4347 case LoongArch::PseudoXVBZ_W:
4348 CondOpc = LoongArch::XVSETANYEQZ_W;
4349 break;
4350 case LoongArch::PseudoXVBZ_D:
4351 CondOpc = LoongArch::XVSETANYEQZ_D;
4352 break;
4353 case LoongArch::PseudoXVBNZ:
4354 CondOpc = LoongArch::XVSETNEZ_V;
4355 break;
4356 case LoongArch::PseudoXVBNZ_B:
4357 CondOpc = LoongArch::XVSETALLNEZ_B;
4358 break;
4359 case LoongArch::PseudoXVBNZ_H:
4360 CondOpc = LoongArch::XVSETALLNEZ_H;
4361 break;
4362 case LoongArch::PseudoXVBNZ_W:
4363 CondOpc = LoongArch::XVSETALLNEZ_W;
4364 break;
4365 case LoongArch::PseudoXVBNZ_D:
4366 CondOpc = LoongArch::XVSETALLNEZ_D;
4367 break;
4368 }
4369
4370 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4371 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4372 DebugLoc DL = MI.getDebugLoc();
4375
4376 MachineFunction *F = BB->getParent();
4377 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4378 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4379 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4380
4381 F->insert(It, FalseBB);
4382 F->insert(It, TrueBB);
4383 F->insert(It, SinkBB);
4384
4385 // Transfer the remainder of MBB and its successor edges to Sink.
4386 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4388
4389 // Insert the real instruction to BB.
4390 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4391 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4392
4393 // Insert branch.
4394 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4395 BB->addSuccessor(FalseBB);
4396 BB->addSuccessor(TrueBB);
4397
4398 // FalseBB.
4399 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4400 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4401 .addReg(LoongArch::R0)
4402 .addImm(0);
4403 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4404 FalseBB->addSuccessor(SinkBB);
4405
4406 // TrueBB.
4407 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4408 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4409 .addReg(LoongArch::R0)
4410 .addImm(1);
4411 TrueBB->addSuccessor(SinkBB);
4412
4413 // SinkBB: merge the results.
4414 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4415 MI.getOperand(0).getReg())
4416 .addReg(RD1)
4417 .addMBB(FalseBB)
4418 .addReg(RD2)
4419 .addMBB(TrueBB);
4420
4421 // The pseudo instruction is gone now.
4422 MI.eraseFromParent();
4423 return SinkBB;
4424}
4425
4426static MachineBasicBlock *
4428 const LoongArchSubtarget &Subtarget) {
4429 unsigned InsOp;
4430 unsigned HalfSize;
4431 switch (MI.getOpcode()) {
4432 default:
4433 llvm_unreachable("Unexpected opcode");
4434 case LoongArch::PseudoXVINSGR2VR_B:
4435 HalfSize = 16;
4436 InsOp = LoongArch::VINSGR2VR_B;
4437 break;
4438 case LoongArch::PseudoXVINSGR2VR_H:
4439 HalfSize = 8;
4440 InsOp = LoongArch::VINSGR2VR_H;
4441 break;
4442 }
4443 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4444 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4445 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4446 DebugLoc DL = MI.getDebugLoc();
4448 // XDst = vector_insert XSrc, Elt, Idx
4449 Register XDst = MI.getOperand(0).getReg();
4450 Register XSrc = MI.getOperand(1).getReg();
4451 Register Elt = MI.getOperand(2).getReg();
4452 unsigned Idx = MI.getOperand(3).getImm();
4453
4454 Register ScratchReg1 = XSrc;
4455 if (Idx >= HalfSize) {
4456 ScratchReg1 = MRI.createVirtualRegister(RC);
4457 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4458 .addReg(XSrc)
4459 .addReg(XSrc)
4460 .addImm(1);
4461 }
4462
4463 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4464 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4465 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4466 .addReg(ScratchReg1, 0, LoongArch::sub_128);
4467 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4468 .addReg(ScratchSubReg1)
4469 .addReg(Elt)
4470 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4471
4472 Register ScratchReg2 = XDst;
4473 if (Idx >= HalfSize)
4474 ScratchReg2 = MRI.createVirtualRegister(RC);
4475
4476 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4477 .addImm(0)
4478 .addReg(ScratchSubReg2)
4479 .addImm(LoongArch::sub_128);
4480
4481 if (Idx >= HalfSize)
4482 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4483 .addReg(XSrc)
4484 .addReg(ScratchReg2)
4485 .addImm(2);
4486
4487 MI.eraseFromParent();
4488 return BB;
4489}
4490
4491MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4492 MachineInstr &MI, MachineBasicBlock *BB) const {
4493 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4494 DebugLoc DL = MI.getDebugLoc();
4495
4496 switch (MI.getOpcode()) {
4497 default:
4498 llvm_unreachable("Unexpected instr type to insert");
4499 case LoongArch::DIV_W:
4500 case LoongArch::DIV_WU:
4501 case LoongArch::MOD_W:
4502 case LoongArch::MOD_WU:
4503 case LoongArch::DIV_D:
4504 case LoongArch::DIV_DU:
4505 case LoongArch::MOD_D:
4506 case LoongArch::MOD_DU:
4507 return insertDivByZeroTrap(MI, BB);
4508 break;
4509 case LoongArch::WRFCSR: {
4510 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4511 LoongArch::FCSR0 + MI.getOperand(0).getImm())
4512 .addReg(MI.getOperand(1).getReg());
4513 MI.eraseFromParent();
4514 return BB;
4515 }
4516 case LoongArch::RDFCSR: {
4517 MachineInstr *ReadFCSR =
4518 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4519 MI.getOperand(0).getReg())
4520 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4521 ReadFCSR->getOperand(1).setIsUndef();
4522 MI.eraseFromParent();
4523 return BB;
4524 }
4525 case LoongArch::PseudoVBZ:
4526 case LoongArch::PseudoVBZ_B:
4527 case LoongArch::PseudoVBZ_H:
4528 case LoongArch::PseudoVBZ_W:
4529 case LoongArch::PseudoVBZ_D:
4530 case LoongArch::PseudoVBNZ:
4531 case LoongArch::PseudoVBNZ_B:
4532 case LoongArch::PseudoVBNZ_H:
4533 case LoongArch::PseudoVBNZ_W:
4534 case LoongArch::PseudoVBNZ_D:
4535 case LoongArch::PseudoXVBZ:
4536 case LoongArch::PseudoXVBZ_B:
4537 case LoongArch::PseudoXVBZ_H:
4538 case LoongArch::PseudoXVBZ_W:
4539 case LoongArch::PseudoXVBZ_D:
4540 case LoongArch::PseudoXVBNZ:
4541 case LoongArch::PseudoXVBNZ_B:
4542 case LoongArch::PseudoXVBNZ_H:
4543 case LoongArch::PseudoXVBNZ_W:
4544 case LoongArch::PseudoXVBNZ_D:
4545 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4546 case LoongArch::PseudoXVINSGR2VR_B:
4547 case LoongArch::PseudoXVINSGR2VR_H:
4548 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4549 }
4550}
4551
4553 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4554 unsigned *Fast) const {
4555 if (!Subtarget.hasUAL())
4556 return false;
4557
4558 // TODO: set reasonable speed number.
4559 if (Fast)
4560 *Fast = 1;
4561 return true;
4562}
4563
4564const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4565 switch ((LoongArchISD::NodeType)Opcode) {
4567 break;
4568
4569#define NODE_NAME_CASE(node) \
4570 case LoongArchISD::node: \
4571 return "LoongArchISD::" #node;
4572
4573 // TODO: Add more target-dependent nodes later.
4574 NODE_NAME_CASE(CALL)
4575 NODE_NAME_CASE(CALL_MEDIUM)
4576 NODE_NAME_CASE(CALL_LARGE)
4577 NODE_NAME_CASE(RET)
4578 NODE_NAME_CASE(TAIL)
4579 NODE_NAME_CASE(TAIL_MEDIUM)
4580 NODE_NAME_CASE(TAIL_LARGE)
4581 NODE_NAME_CASE(SLL_W)
4582 NODE_NAME_CASE(SRA_W)
4583 NODE_NAME_CASE(SRL_W)
4584 NODE_NAME_CASE(BSTRINS)
4585 NODE_NAME_CASE(BSTRPICK)
4586 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4587 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4588 NODE_NAME_CASE(FTINT)
4589 NODE_NAME_CASE(REVB_2H)
4590 NODE_NAME_CASE(REVB_2W)
4591 NODE_NAME_CASE(BITREV_4B)
4592 NODE_NAME_CASE(BITREV_W)
4593 NODE_NAME_CASE(ROTR_W)
4594 NODE_NAME_CASE(ROTL_W)
4595 NODE_NAME_CASE(DIV_WU)
4596 NODE_NAME_CASE(MOD_WU)
4597 NODE_NAME_CASE(CLZ_W)
4598 NODE_NAME_CASE(CTZ_W)
4599 NODE_NAME_CASE(DBAR)
4600 NODE_NAME_CASE(IBAR)
4601 NODE_NAME_CASE(BREAK)
4602 NODE_NAME_CASE(SYSCALL)
4603 NODE_NAME_CASE(CRC_W_B_W)
4604 NODE_NAME_CASE(CRC_W_H_W)
4605 NODE_NAME_CASE(CRC_W_W_W)
4606 NODE_NAME_CASE(CRC_W_D_W)
4607 NODE_NAME_CASE(CRCC_W_B_W)
4608 NODE_NAME_CASE(CRCC_W_H_W)
4609 NODE_NAME_CASE(CRCC_W_W_W)
4610 NODE_NAME_CASE(CRCC_W_D_W)
4611 NODE_NAME_CASE(CSRRD)
4612 NODE_NAME_CASE(CSRWR)
4613 NODE_NAME_CASE(CSRXCHG)
4614 NODE_NAME_CASE(IOCSRRD_B)
4615 NODE_NAME_CASE(IOCSRRD_H)
4616 NODE_NAME_CASE(IOCSRRD_W)
4617 NODE_NAME_CASE(IOCSRRD_D)
4618 NODE_NAME_CASE(IOCSRWR_B)
4619 NODE_NAME_CASE(IOCSRWR_H)
4620 NODE_NAME_CASE(IOCSRWR_W)
4621 NODE_NAME_CASE(IOCSRWR_D)
4622 NODE_NAME_CASE(CPUCFG)
4623 NODE_NAME_CASE(MOVGR2FCSR)
4624 NODE_NAME_CASE(MOVFCSR2GR)
4625 NODE_NAME_CASE(CACOP_D)
4626 NODE_NAME_CASE(CACOP_W)
4627 NODE_NAME_CASE(VSHUF)
4628 NODE_NAME_CASE(VPICKEV)
4629 NODE_NAME_CASE(VPICKOD)
4630 NODE_NAME_CASE(VPACKEV)
4631 NODE_NAME_CASE(VPACKOD)
4632 NODE_NAME_CASE(VILVL)
4633 NODE_NAME_CASE(VILVH)
4634 NODE_NAME_CASE(VSHUF4I)
4635 NODE_NAME_CASE(VREPLVEI)
4636 NODE_NAME_CASE(XVPERMI)
4637 NODE_NAME_CASE(VPICK_SEXT_ELT)
4638 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4639 NODE_NAME_CASE(VREPLVE)
4640 NODE_NAME_CASE(VALL_ZERO)
4641 NODE_NAME_CASE(VANY_ZERO)
4642 NODE_NAME_CASE(VALL_NONZERO)
4643 NODE_NAME_CASE(VANY_NONZERO)
4644 }
4645#undef NODE_NAME_CASE
4646 return nullptr;
4647}
4648
4649//===----------------------------------------------------------------------===//
4650// Calling Convention Implementation
4651//===----------------------------------------------------------------------===//
4652
4653// Eight general-purpose registers a0-a7 used for passing integer arguments,
4654// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4655// fixed-point arguments, and floating-point arguments when no FPR is available
4656// or with soft float ABI.
4657const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4658 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4659 LoongArch::R10, LoongArch::R11};
4660// Eight floating-point registers fa0-fa7 used for passing floating-point
4661// arguments, and fa0-fa1 are also used to return values.
4662const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4663 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4664 LoongArch::F6, LoongArch::F7};
4665// FPR32 and FPR64 alias each other.
4667 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4668 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4669
4670const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4671 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4672 LoongArch::VR6, LoongArch::VR7};
4673
4674const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4675 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4676 LoongArch::XR6, LoongArch::XR7};
4677
4678// Pass a 2*GRLen argument that has been split into two GRLen values through
4679// registers or the stack as necessary.
4680static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4681 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4682 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4683 ISD::ArgFlagsTy ArgFlags2) {
4684 unsigned GRLenInBytes = GRLen / 8;
4685 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4686 // At least one half can be passed via register.
4687 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4688 VA1.getLocVT(), CCValAssign::Full));
4689 } else {
4690 // Both halves must be passed on the stack, with proper alignment.
4691 Align StackAlign =
4692 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4693 State.addLoc(
4695 State.AllocateStack(GRLenInBytes, StackAlign),
4696 VA1.getLocVT(), CCValAssign::Full));
4698 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4699 LocVT2, CCValAssign::Full));
4700 return false;
4701 }
4702 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4703 // The second half can also be passed via register.
4704 State.addLoc(
4705 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4706 } else {
4707 // The second half is passed via the stack, without additional alignment.
4709 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4710 LocVT2, CCValAssign::Full));
4711 }
4712 return false;
4713}
4714
4715// Implements the LoongArch calling convention. Returns true upon failure.
4717 unsigned ValNo, MVT ValVT,
4718 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4719 CCState &State, bool IsFixed, bool IsRet,
4720 Type *OrigTy) {
4721 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4722 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4723 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4724 MVT LocVT = ValVT;
4725
4726 // Any return value split into more than two values can't be returned
4727 // directly.
4728 if (IsRet && ValNo > 1)
4729 return true;
4730
4731 // If passing a variadic argument, or if no FPR is available.
4732 bool UseGPRForFloat = true;
4733
4734 switch (ABI) {
4735 default:
4736 llvm_unreachable("Unexpected ABI");
4737 break;
4742 UseGPRForFloat = !IsFixed;
4743 break;
4746 break;
4747 }
4748
4749 // FPR32 and FPR64 alias each other.
4750 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4751 UseGPRForFloat = true;
4752
4753 if (UseGPRForFloat && ValVT == MVT::f32) {
4754 LocVT = GRLenVT;
4755 LocInfo = CCValAssign::BCvt;
4756 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4757 LocVT = MVT::i64;
4758 LocInfo = CCValAssign::BCvt;
4759 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4760 // TODO: Handle passing f64 on LA32 with D feature.
4761 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4762 }
4763
4764 // If this is a variadic argument, the LoongArch calling convention requires
4765 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4766 // byte alignment. An aligned register should be used regardless of whether
4767 // the original argument was split during legalisation or not. The argument
4768 // will not be passed by registers if the original type is larger than
4769 // 2*GRLen, so the register alignment rule does not apply.
4770 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4771 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4772 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4773 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4774 // Skip 'odd' register if necessary.
4775 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4776 State.AllocateReg(ArgGPRs);
4777 }
4778
4779 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4780 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4781 State.getPendingArgFlags();
4782
4783 assert(PendingLocs.size() == PendingArgFlags.size() &&
4784 "PendingLocs and PendingArgFlags out of sync");
4785
4786 // Split arguments might be passed indirectly, so keep track of the pending
4787 // values.
4788 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4789 LocVT = GRLenVT;
4790 LocInfo = CCValAssign::Indirect;
4791 PendingLocs.push_back(
4792 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4793 PendingArgFlags.push_back(ArgFlags);
4794 if (!ArgFlags.isSplitEnd()) {
4795 return false;
4796 }
4797 }
4798
4799 // If the split argument only had two elements, it should be passed directly
4800 // in registers or on the stack.
4801 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4802 PendingLocs.size() <= 2) {
4803 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4804 // Apply the normal calling convention rules to the first half of the
4805 // split argument.
4806 CCValAssign VA = PendingLocs[0];
4807 ISD::ArgFlagsTy AF = PendingArgFlags[0];
4808 PendingLocs.clear();
4809 PendingArgFlags.clear();
4810 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4811 ArgFlags);
4812 }
4813
4814 // Allocate to a register if possible, or else a stack slot.
4815 Register Reg;
4816 unsigned StoreSizeBytes = GRLen / 8;
4817 Align StackAlign = Align(GRLen / 8);
4818
4819 if (ValVT == MVT::f32 && !UseGPRForFloat)
4820 Reg = State.AllocateReg(ArgFPR32s);
4821 else if (ValVT == MVT::f64 && !UseGPRForFloat)
4822 Reg = State.AllocateReg(ArgFPR64s);
4823 else if (ValVT.is128BitVector())
4824 Reg = State.AllocateReg(ArgVRs);
4825 else if (ValVT.is256BitVector())
4826 Reg = State.AllocateReg(ArgXRs);
4827 else
4828 Reg = State.AllocateReg(ArgGPRs);
4829
4830 unsigned StackOffset =
4831 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
4832
4833 // If we reach this point and PendingLocs is non-empty, we must be at the
4834 // end of a split argument that must be passed indirectly.
4835 if (!PendingLocs.empty()) {
4836 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4837 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4838 for (auto &It : PendingLocs) {
4839 if (Reg)
4840 It.convertToReg(Reg);
4841 else
4842 It.convertToMem(StackOffset);
4843 State.addLoc(It);
4844 }
4845 PendingLocs.clear();
4846 PendingArgFlags.clear();
4847 return false;
4848 }
4849 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
4850 "Expected an GRLenVT at this stage");
4851
4852 if (Reg) {
4853 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4854 return false;
4855 }
4856
4857 // When a floating-point value is passed on the stack, no bit-cast is needed.
4858 if (ValVT.isFloatingPoint()) {
4859 LocVT = ValVT;
4860 LocInfo = CCValAssign::Full;
4861 }
4862
4863 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4864 return false;
4865}
4866
4867void LoongArchTargetLowering::analyzeInputArgs(
4868 MachineFunction &MF, CCState &CCInfo,
4869 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
4870 LoongArchCCAssignFn Fn) const {
4872 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4873 MVT ArgVT = Ins[i].VT;
4874 Type *ArgTy = nullptr;
4875 if (IsRet)
4876 ArgTy = FType->getReturnType();
4877 else if (Ins[i].isOrigArg())
4878 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
4881 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
4882 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
4883 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
4884 << '\n');
4885 llvm_unreachable("");
4886 }
4887 }
4888}
4889
4890void LoongArchTargetLowering::analyzeOutputArgs(
4891 MachineFunction &MF, CCState &CCInfo,
4892 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4893 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
4894 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4895 MVT ArgVT = Outs[i].VT;
4896 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
4899 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
4900 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
4901 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
4902 << "\n");
4903 llvm_unreachable("");
4904 }
4905 }
4906}
4907
4908// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
4909// values.
4911 const CCValAssign &VA, const SDLoc &DL) {
4912 switch (VA.getLocInfo()) {
4913 default:
4914 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4915 case CCValAssign::Full:
4917 break;
4918 case CCValAssign::BCvt:
4919 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4920 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
4921 else
4922 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
4923 break;
4924 }
4925 return Val;
4926}
4927
4929 const CCValAssign &VA, const SDLoc &DL,
4930 const ISD::InputArg &In,
4931 const LoongArchTargetLowering &TLI) {
4934 EVT LocVT = VA.getLocVT();
4935 SDValue Val;
4936 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
4937 Register VReg = RegInfo.createVirtualRegister(RC);
4938 RegInfo.addLiveIn(VA.getLocReg(), VReg);
4939 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
4940
4941 // If input is sign extended from 32 bits, note it for the OptW pass.
4942 if (In.isOrigArg()) {
4943 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
4944 if (OrigArg->getType()->isIntegerTy()) {
4945 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
4946 // An input zero extended from i31 can also be considered sign extended.
4947 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
4948 (BitWidth < 32 && In.Flags.isZExt())) {
4951 LAFI->addSExt32Register(VReg);
4952 }
4953 }
4954 }
4955
4956 return convertLocVTToValVT(DAG, Val, VA, DL);
4957}
4958
4959// The caller is responsible for loading the full value if the argument is
4960// passed with CCValAssign::Indirect.
4962 const CCValAssign &VA, const SDLoc &DL) {
4964 MachineFrameInfo &MFI = MF.getFrameInfo();
4965 EVT ValVT = VA.getValVT();
4966 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
4967 /*IsImmutable=*/true);
4968 SDValue FIN = DAG.getFrameIndex(
4970
4971 ISD::LoadExtType ExtType;
4972 switch (VA.getLocInfo()) {
4973 default:
4974 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4975 case CCValAssign::Full:
4977 case CCValAssign::BCvt:
4978 ExtType = ISD::NON_EXTLOAD;
4979 break;
4980 }
4981 return DAG.getExtLoad(
4982 ExtType, DL, VA.getLocVT(), Chain, FIN,
4984}
4985
4987 const CCValAssign &VA, const SDLoc &DL) {
4988 EVT LocVT = VA.getLocVT();
4989
4990 switch (VA.getLocInfo()) {
4991 default:
4992 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4993 case CCValAssign::Full:
4994 break;
4995 case CCValAssign::BCvt:
4996 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4997 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
4998 else
4999 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5000 break;
5001 }
5002 return Val;
5003}
5004
5005static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5006 CCValAssign::LocInfo LocInfo,
5007 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5008 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5009 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5010 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5011 static const MCPhysReg GPRList[] = {
5012 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5013 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5014 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5015 if (unsigned Reg = State.AllocateReg(GPRList)) {
5016 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5017 return false;
5018 }
5019 }
5020
5021 if (LocVT == MVT::f32) {
5022 // Pass in STG registers: F1, F2, F3, F4
5023 // fs0,fs1,fs2,fs3
5024 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5025 LoongArch::F26, LoongArch::F27};
5026 if (unsigned Reg = State.AllocateReg(FPR32List)) {
5027 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5028 return false;
5029 }
5030 }
5031
5032 if (LocVT == MVT::f64) {
5033 // Pass in STG registers: D1, D2, D3, D4
5034 // fs4,fs5,fs6,fs7
5035 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5036 LoongArch::F30_64, LoongArch::F31_64};
5037 if (unsigned Reg = State.AllocateReg(FPR64List)) {
5038 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5039 return false;
5040 }
5041 }
5042
5043 report_fatal_error("No registers left in GHC calling convention");
5044 return true;
5045}
5046
5047// Transform physical registers into virtual registers.
5049 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5050 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5051 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5052
5054
5055 switch (CallConv) {
5056 default:
5057 llvm_unreachable("Unsupported calling convention");
5058 case CallingConv::C:
5059 case CallingConv::Fast:
5060 break;
5061 case CallingConv::GHC:
5062 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5063 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5065 "GHC calling convention requires the F and D extensions");
5066 }
5067
5068 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5069 MVT GRLenVT = Subtarget.getGRLenVT();
5070 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5071 // Used with varargs to acumulate store chains.
5072 std::vector<SDValue> OutChains;
5073
5074 // Assign locations to all of the incoming arguments.
5076 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5077
5078 if (CallConv == CallingConv::GHC)
5080 else
5081 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5082
5083 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5084 CCValAssign &VA = ArgLocs[i];
5085 SDValue ArgValue;
5086 if (VA.isRegLoc())
5087 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5088 else
5089 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5090 if (VA.getLocInfo() == CCValAssign::Indirect) {
5091 // If the original argument was split and passed by reference, we need to
5092 // load all parts of it here (using the same address).
5093 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5095 unsigned ArgIndex = Ins[i].OrigArgIndex;
5096 unsigned ArgPartOffset = Ins[i].PartOffset;
5097 assert(ArgPartOffset == 0);
5098 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5099 CCValAssign &PartVA = ArgLocs[i + 1];
5100 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5101 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5102 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5103 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5105 ++i;
5106 }
5107 continue;
5108 }
5109 InVals.push_back(ArgValue);
5110 }
5111
5112 if (IsVarArg) {
5114 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5115 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5116 MachineFrameInfo &MFI = MF.getFrameInfo();
5117 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5118 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5119
5120 // Offset of the first variable argument from stack pointer, and size of
5121 // the vararg save area. For now, the varargs save area is either zero or
5122 // large enough to hold a0-a7.
5123 int VaArgOffset, VarArgsSaveSize;
5124
5125 // If all registers are allocated, then all varargs must be passed on the
5126 // stack and we don't need to save any argregs.
5127 if (ArgRegs.size() == Idx) {
5128 VaArgOffset = CCInfo.getStackSize();
5129 VarArgsSaveSize = 0;
5130 } else {
5131 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5132 VaArgOffset = -VarArgsSaveSize;
5133 }
5134
5135 // Record the frame index of the first variable argument
5136 // which is a value necessary to VASTART.
5137 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5138 LoongArchFI->setVarArgsFrameIndex(FI);
5139
5140 // If saving an odd number of registers then create an extra stack slot to
5141 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5142 // offsets to even-numbered registered remain 2*GRLen-aligned.
5143 if (Idx % 2) {
5144 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5145 true);
5146 VarArgsSaveSize += GRLenInBytes;
5147 }
5148
5149 // Copy the integer registers that may have been used for passing varargs
5150 // to the vararg save area.
5151 for (unsigned I = Idx; I < ArgRegs.size();
5152 ++I, VaArgOffset += GRLenInBytes) {
5153 const Register Reg = RegInfo.createVirtualRegister(RC);
5154 RegInfo.addLiveIn(ArgRegs[I], Reg);
5155 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5156 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5157 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5158 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5160 cast<StoreSDNode>(Store.getNode())
5161 ->getMemOperand()
5162 ->setValue((Value *)nullptr);
5163 OutChains.push_back(Store);
5164 }
5165 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5166 }
5167
5168 // All stores are grouped in one node to allow the matching between
5169 // the size of Ins and InVals. This only happens for vararg functions.
5170 if (!OutChains.empty()) {
5171 OutChains.push_back(Chain);
5172 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5173 }
5174
5175 return Chain;
5176}
5177
5179 return CI->isTailCall();
5180}
5181
5182// Check if the return value is used as only a return value, as otherwise
5183// we can't perform a tail-call.
5185 SDValue &Chain) const {
5186 if (N->getNumValues() != 1)
5187 return false;
5188 if (!N->hasNUsesOfValue(1, 0))
5189 return false;
5190
5191 SDNode *Copy = *N->use_begin();
5192 if (Copy->getOpcode() != ISD::CopyToReg)
5193 return false;
5194
5195 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5196 // isn't safe to perform a tail call.
5197 if (Copy->getGluedNode())
5198 return false;
5199
5200 // The copy must be used by a LoongArchISD::RET, and nothing else.
5201 bool HasRet = false;
5202 for (SDNode *Node : Copy->uses()) {
5203 if (Node->getOpcode() != LoongArchISD::RET)
5204 return false;
5205 HasRet = true;
5206 }
5207
5208 if (!HasRet)
5209 return false;
5210
5211 Chain = Copy->getOperand(0);
5212 return true;
5213}
5214
5215// Check whether the call is eligible for tail call optimization.
5216bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5217 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5218 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5219
5220 auto CalleeCC = CLI.CallConv;
5221 auto &Outs = CLI.Outs;
5222 auto &Caller = MF.getFunction();
5223 auto CallerCC = Caller.getCallingConv();
5224
5225 // Do not tail call opt if the stack is used to pass parameters.
5226 if (CCInfo.getStackSize() != 0)
5227 return false;
5228
5229 // Do not tail call opt if any parameters need to be passed indirectly.
5230 for (auto &VA : ArgLocs)
5231 if (VA.getLocInfo() == CCValAssign::Indirect)
5232 return false;
5233
5234 // Do not tail call opt if either caller or callee uses struct return
5235 // semantics.
5236 auto IsCallerStructRet = Caller.hasStructRetAttr();
5237 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5238 if (IsCallerStructRet || IsCalleeStructRet)
5239 return false;
5240
5241 // Do not tail call opt if either the callee or caller has a byval argument.
5242 for (auto &Arg : Outs)
5243 if (Arg.Flags.isByVal())
5244 return false;
5245
5246 // The callee has to preserve all registers the caller needs to preserve.
5247 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5248 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5249 if (CalleeCC != CallerCC) {
5250 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5251 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5252 return false;
5253 }
5254 return true;
5255}
5256
5258 return DAG.getDataLayout().getPrefTypeAlign(
5259 VT.getTypeForEVT(*DAG.getContext()));
5260}
5261
5262// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5263// and output parameter nodes.
5264SDValue
5266 SmallVectorImpl<SDValue> &InVals) const {
5267 SelectionDAG &DAG = CLI.DAG;
5268 SDLoc &DL = CLI.DL;
5270 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5272 SDValue Chain = CLI.Chain;
5273 SDValue Callee = CLI.Callee;
5274 CallingConv::ID CallConv = CLI.CallConv;
5275 bool IsVarArg = CLI.IsVarArg;
5276 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5277 MVT GRLenVT = Subtarget.getGRLenVT();
5278 bool &IsTailCall = CLI.IsTailCall;
5279
5281
5282 // Analyze the operands of the call, assigning locations to each operand.
5284 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5285
5286 if (CallConv == CallingConv::GHC)
5287 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5288 else
5289 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5290
5291 // Check if it's really possible to do a tail call.
5292 if (IsTailCall)
5293 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5294
5295 if (IsTailCall)
5296 ++NumTailCalls;
5297 else if (CLI.CB && CLI.CB->isMustTailCall())
5298 report_fatal_error("failed to perform tail call elimination on a call "
5299 "site marked musttail");
5300
5301 // Get a count of how many bytes are to be pushed on the stack.
5302 unsigned NumBytes = ArgCCInfo.getStackSize();
5303
5304 // Create local copies for byval args.
5305 SmallVector<SDValue> ByValArgs;
5306 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5307 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5308 if (!Flags.isByVal())
5309 continue;
5310
5311 SDValue Arg = OutVals[i];
5312 unsigned Size = Flags.getByValSize();
5313 Align Alignment = Flags.getNonZeroByValAlign();
5314
5315 int FI =
5316 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5317 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5318 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5319
5320 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5321 /*IsVolatile=*/false,
5322 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5324 ByValArgs.push_back(FIPtr);
5325 }
5326
5327 if (!IsTailCall)
5328 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5329
5330 // Copy argument values to their designated locations.
5332 SmallVector<SDValue> MemOpChains;
5333 SDValue StackPtr;
5334 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5335 CCValAssign &VA = ArgLocs[i];
5336 SDValue ArgValue = OutVals[i];
5337 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5338
5339 // Promote the value if needed.
5340 // For now, only handle fully promoted and indirect arguments.
5341 if (VA.getLocInfo() == CCValAssign::Indirect) {
5342 // Store the argument in a stack slot and pass its address.
5343 Align StackAlign =
5344 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5345 getPrefTypeAlign(ArgValue.getValueType(), DAG));
5346 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5347 // If the original argument was split and passed by reference, we need to
5348 // store the required parts of it here (and pass just one address).
5349 unsigned ArgIndex = Outs[i].OrigArgIndex;
5350 unsigned ArgPartOffset = Outs[i].PartOffset;
5351 assert(ArgPartOffset == 0);
5352 // Calculate the total size to store. We don't have access to what we're
5353 // actually storing other than performing the loop and collecting the
5354 // info.
5356 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5357 SDValue PartValue = OutVals[i + 1];
5358 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5359 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5360 EVT PartVT = PartValue.getValueType();
5361
5362 StoredSize += PartVT.getStoreSize();
5363 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5364 Parts.push_back(std::make_pair(PartValue, Offset));
5365 ++i;
5366 }
5367 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5368 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5369 MemOpChains.push_back(
5370 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5372 for (const auto &Part : Parts) {
5373 SDValue PartValue = Part.first;
5374 SDValue PartOffset = Part.second;
5376 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5377 MemOpChains.push_back(
5378 DAG.getStore(Chain, DL, PartValue, Address,
5380 }
5381 ArgValue = SpillSlot;
5382 } else {
5383 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5384 }
5385
5386 // Use local copy if it is a byval arg.
5387 if (Flags.isByVal())
5388 ArgValue = ByValArgs[j++];
5389
5390 if (VA.isRegLoc()) {
5391 // Queue up the argument copies and emit them at the end.
5392 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5393 } else {
5394 assert(VA.isMemLoc() && "Argument not register or memory");
5395 assert(!IsTailCall && "Tail call not allowed if stack is used "
5396 "for passing parameters");
5397
5398 // Work out the address of the stack slot.
5399 if (!StackPtr.getNode())
5400 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5402 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5404
5405 // Emit the store.
5406 MemOpChains.push_back(
5407 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5408 }
5409 }
5410
5411 // Join the stores, which are independent of one another.
5412 if (!MemOpChains.empty())
5413 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5414
5415 SDValue Glue;
5416
5417 // Build a sequence of copy-to-reg nodes, chained and glued together.
5418 for (auto &Reg : RegsToPass) {
5419 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5420 Glue = Chain.getValue(1);
5421 }
5422
5423 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5424 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5425 // split it and then direct call can be matched by PseudoCALL.
5426 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5427 const GlobalValue *GV = S->getGlobal();
5428 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5431 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5432 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5433 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5436 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5437 }
5438
5439 // The first call operand is the chain and the second is the target address.
5441 Ops.push_back(Chain);
5442 Ops.push_back(Callee);
5443
5444 // Add argument registers to the end of the list so that they are
5445 // known live into the call.
5446 for (auto &Reg : RegsToPass)
5447 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5448
5449 if (!IsTailCall) {
5450 // Add a register mask operand representing the call-preserved registers.
5451 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5452 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5453 assert(Mask && "Missing call preserved mask for calling convention");
5454 Ops.push_back(DAG.getRegisterMask(Mask));
5455 }
5456
5457 // Glue the call to the argument copies, if any.
5458 if (Glue.getNode())
5459 Ops.push_back(Glue);
5460
5461 // Emit the call.
5462 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5463 unsigned Op;
5464 switch (DAG.getTarget().getCodeModel()) {
5465 default:
5466 report_fatal_error("Unsupported code model");
5467 case CodeModel::Small:
5468 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5469 break;
5470 case CodeModel::Medium:
5471 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5473 break;
5474 case CodeModel::Large:
5475 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5477 break;
5478 }
5479
5480 if (IsTailCall) {
5482 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5483 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5484 return Ret;
5485 }
5486
5487 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5488 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5489 Glue = Chain.getValue(1);
5490
5491 // Mark the end of the call, which is glued to the call itself.
5492 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5493 Glue = Chain.getValue(1);
5494
5495 // Assign locations to each value returned by this call.
5497 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5498 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5499
5500 // Copy all of the result registers out of their specified physreg.
5501 for (auto &VA : RVLocs) {
5502 // Copy the value out.
5503 SDValue RetValue =
5504 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5505 // Glue the RetValue to the end of the call sequence.
5506 Chain = RetValue.getValue(1);
5507 Glue = RetValue.getValue(2);
5508
5509 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5510
5511 InVals.push_back(RetValue);
5512 }
5513
5514 return Chain;
5515}
5516
5518 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5519 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5521 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5522
5523 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5524 LoongArchABI::ABI ABI =
5525 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5526 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5527 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5528 nullptr))
5529 return false;
5530 }
5531 return true;
5532}
5533
5535 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5537 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5538 SelectionDAG &DAG) const {
5539 // Stores the assignment of the return value to a location.
5541
5542 // Info about the registers and stack slot.
5543 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5544 *DAG.getContext());
5545
5546 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5547 nullptr, CC_LoongArch);
5548 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5549 report_fatal_error("GHC functions return void only");
5550 SDValue Glue;
5551 SmallVector<SDValue, 4> RetOps(1, Chain);
5552
5553 // Copy the result values into the output registers.
5554 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5555 CCValAssign &VA = RVLocs[i];
5556 assert(VA.isRegLoc() && "Can only return in registers!");
5557
5558 // Handle a 'normal' return.
5559 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5560 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5561
5562 // Guarantee that all emitted copies are stuck together.
5563 Glue = Chain.getValue(1);
5564 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5565 }
5566
5567 RetOps[0] = Chain; // Update chain.
5568
5569 // Add the glue node if we have it.
5570 if (Glue.getNode())
5571 RetOps.push_back(Glue);
5572
5573 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5574}
5575
5577 EVT VT) const {
5578 if (!Subtarget.hasExtLSX())
5579 return false;
5580
5581 if (VT == MVT::f32) {
5582 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
5583 return (masked == 0x3e000000 || masked == 0x40000000);
5584 }
5585
5586 if (VT == MVT::f64) {
5587 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
5588 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
5589 }
5590
5591 return false;
5592}
5593
5594bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5595 bool ForCodeSize) const {
5596 // TODO: Maybe need more checks here after vector extension is supported.
5597 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5598 return false;
5599 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5600 return false;
5601 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
5602}
5603
5605 return true;
5606}
5607
5609 return true;
5610}
5611
5612bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5613 const Instruction *I) const {
5614 if (!Subtarget.is64Bit())
5615 return isa<LoadInst>(I) || isa<StoreInst>(I);
5616
5617 if (isa<LoadInst>(I))
5618 return true;
5619
5620 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5621 // require fences beacuse we can use amswap_db.[w/d].
5622 if (isa<StoreInst>(I)) {
5623 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
5624 return (Size == 8 || Size == 16);
5625 }
5626
5627 return false;
5628}
5629
5631 LLVMContext &Context,
5632 EVT VT) const {
5633 if (!VT.isVector())
5634 return getPointerTy(DL);
5636}
5637
5639 // TODO: Support vectors.
5640 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5641}
5642
5644 const CallInst &I,
5645 MachineFunction &MF,
5646 unsigned Intrinsic) const {
5647 switch (Intrinsic) {
5648 default:
5649 return false;
5650 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5651 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5652 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5653 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5655 Info.memVT = MVT::i32;
5656 Info.ptrVal = I.getArgOperand(0);
5657 Info.offset = 0;
5658 Info.align = Align(4);
5661 return true;
5662 // TODO: Add more Intrinsics later.
5663 }
5664}
5665
5668 // TODO: Add more AtomicRMWInst that needs to be extended.
5669
5670 // Since floating-point operation requires a non-trivial set of data
5671 // operations, use CmpXChg to expand.
5672 if (AI->isFloatingPointOperation() ||
5676
5677 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5678 if (Size == 8 || Size == 16)
5681}
5682
5683static Intrinsic::ID
5685 AtomicRMWInst::BinOp BinOp) {
5686 if (GRLen == 64) {
5687 switch (BinOp) {
5688 default:
5689 llvm_unreachable("Unexpected AtomicRMW BinOp");
5691 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5692 case AtomicRMWInst::Add:
5693 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5694 case AtomicRMWInst::Sub:
5695 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5697 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5699 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5701 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5702 case AtomicRMWInst::Max:
5703 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5704 case AtomicRMWInst::Min:
5705 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5706 // TODO: support other AtomicRMWInst.
5707 }
5708 }
5709
5710 if (GRLen == 32) {
5711 switch (BinOp) {
5712 default:
5713 llvm_unreachable("Unexpected AtomicRMW BinOp");
5715 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5716 case AtomicRMWInst::Add:
5717 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5718 case AtomicRMWInst::Sub:
5719 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5721 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5722 // TODO: support other AtomicRMWInst.
5723 }
5724 }
5725
5726 llvm_unreachable("Unexpected GRLen\n");
5727}
5728
5731 AtomicCmpXchgInst *CI) const {
5733 if (Size == 8 || Size == 16)
5736}
5737
5739 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5740 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5741 AtomicOrdering FailOrd = CI->getFailureOrdering();
5742 Value *FailureOrdering =
5743 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
5744
5745 // TODO: Support cmpxchg on LA32.
5746 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5747 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5748 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5749 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5750 Type *Tys[] = {AlignedAddr->getType()};
5751 Function *MaskedCmpXchg =
5752 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
5753 Value *Result = Builder.CreateCall(
5754 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5755 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5756 return Result;
5757}
5758
5760 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5761 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5762 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5763 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5764 // mask, as this produces better code than the LL/SC loop emitted by
5765 // int_loongarch_masked_atomicrmw_xchg.
5766 if (AI->getOperation() == AtomicRMWInst::Xchg &&
5767 isa<ConstantInt>(AI->getValOperand())) {
5768 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
5769 if (CVal->isZero())
5770 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
5771 Builder.CreateNot(Mask, "Inv_Mask"),
5772 AI->getAlign(), Ord);
5773 if (CVal->isMinusOne())
5774 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
5775 AI->getAlign(), Ord);
5776 }
5777
5778 unsigned GRLen = Subtarget.getGRLen();
5779 Value *Ordering =
5780 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
5781 Type *Tys[] = {AlignedAddr->getType()};
5782 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
5783 AI->getModule(),
5785
5786 if (GRLen == 64) {
5787 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
5788 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5789 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
5790 }
5791
5792 Value *Result;
5793
5794 // Must pass the shift amount needed to sign extend the loaded value prior
5795 // to performing a signed comparison for min/max. ShiftAmt is the number of
5796 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
5797 // is the number of bits to left+right shift the value in order to
5798 // sign-extend.
5799 if (AI->getOperation() == AtomicRMWInst::Min ||
5801 const DataLayout &DL = AI->getDataLayout();
5802 unsigned ValWidth =
5803 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
5804 Value *SextShamt =
5805 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
5806 Result = Builder.CreateCall(LlwOpScwLoop,
5807 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
5808 } else {
5809 Result =
5810 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
5811 }
5812
5813 if (GRLen == 64)
5814 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5815 return Result;
5816}
5817
5819 const MachineFunction &MF, EVT VT) const {
5820 VT = VT.getScalarType();
5821
5822 if (!VT.isSimple())
5823 return false;
5824
5825 switch (VT.getSimpleVT().SimpleTy) {
5826 case MVT::f32:
5827 case MVT::f64:
5828 return true;
5829 default:
5830 break;
5831 }
5832
5833 return false;
5834}
5835
5837 const Constant *PersonalityFn) const {
5838 return LoongArch::R4;
5839}
5840
5842 const Constant *PersonalityFn) const {
5843 return LoongArch::R5;
5844}
5845
5846//===----------------------------------------------------------------------===//
5847// LoongArch Inline Assembly Support
5848//===----------------------------------------------------------------------===//
5849
5851LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
5852 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
5853 //
5854 // 'f': A floating-point register (if available).
5855 // 'k': A memory operand whose address is formed by a base register and
5856 // (optionally scaled) index register.
5857 // 'l': A signed 16-bit constant.
5858 // 'm': A memory operand whose address is formed by a base register and
5859 // offset that is suitable for use in instructions with the same
5860 // addressing mode as st.w and ld.w.
5861 // 'I': A signed 12-bit constant (for arithmetic instructions).
5862 // 'J': Integer zero.
5863 // 'K': An unsigned 12-bit constant (for logic instructions).
5864 // "ZB": An address that is held in a general-purpose register. The offset is
5865 // zero.
5866 // "ZC": A memory operand whose address is formed by a base register and
5867 // offset that is suitable for use in instructions with the same
5868 // addressing mode as ll.w and sc.w.
5869 if (Constraint.size() == 1) {
5870 switch (Constraint[0]) {
5871 default:
5872 break;
5873 case 'f':
5874 return C_RegisterClass;
5875 case 'l':
5876 case 'I':
5877 case 'J':
5878 case 'K':
5879 return C_Immediate;
5880 case 'k':
5881 return C_Memory;
5882 }
5883 }
5884
5885 if (Constraint == "ZC" || Constraint == "ZB")
5886 return C_Memory;
5887
5888 // 'm' is handled here.
5889 return TargetLowering::getConstraintType(Constraint);
5890}
5891
5892InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
5893 StringRef ConstraintCode) const {
5894 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
5899}
5900
5901std::pair<unsigned, const TargetRegisterClass *>
5902LoongArchTargetLowering::getRegForInlineAsmConstraint(
5903 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5904 // First, see if this is a constraint that directly corresponds to a LoongArch
5905 // register class.
5906 if (Constraint.size() == 1) {
5907 switch (Constraint[0]) {
5908 case 'r':
5909 // TODO: Support fixed vectors up to GRLen?
5910 if (VT.isVector())
5911 break;
5912 return std::make_pair(0U, &LoongArch::GPRRegClass);
5913 case 'f':
5914 if (Subtarget.hasBasicF() && VT == MVT::f32)
5915 return std::make_pair(0U, &LoongArch::FPR32RegClass);
5916 if (Subtarget.hasBasicD() && VT == MVT::f64)
5917 return std::make_pair(0U, &LoongArch::FPR64RegClass);
5918 if (Subtarget.hasExtLSX() &&
5919 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
5920 return std::make_pair(0U, &LoongArch::LSX128RegClass);
5921 if (Subtarget.hasExtLASX() &&
5922 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
5923 return std::make_pair(0U, &LoongArch::LASX256RegClass);
5924 break;
5925 default:
5926 break;
5927 }
5928 }
5929
5930 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
5931 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
5932 // constraints while the official register name is prefixed with a '$'. So we
5933 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
5934 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
5935 // case insensitive, so no need to convert the constraint to upper case here.
5936 //
5937 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
5938 // decode the usage of register name aliases into their official names. And
5939 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
5940 // official register names.
5941 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
5942 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
5943 bool IsFP = Constraint[2] == 'f';
5944 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
5945 std::pair<unsigned, const TargetRegisterClass *> R;
5947 TRI, join_items("", Temp.first, Temp.second), VT);
5948 // Match those names to the widest floating point register type available.
5949 if (IsFP) {
5950 unsigned RegNo = R.first;
5951 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
5952 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
5953 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
5954 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
5955 }
5956 }
5957 }
5958 return R;
5959 }
5960
5961 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5962}
5963
5964void LoongArchTargetLowering::LowerAsmOperandForConstraint(
5965 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
5966 SelectionDAG &DAG) const {
5967 // Currently only support length 1 constraints.
5968 if (Constraint.size() == 1) {
5969 switch (Constraint[0]) {
5970 case 'l':
5971 // Validate & create a 16-bit signed immediate operand.
5972 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5973 uint64_t CVal = C->getSExtValue();
5974 if (isInt<16>(CVal))
5975 Ops.push_back(
5976 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5977 }
5978 return;
5979 case 'I':
5980 // Validate & create a 12-bit signed immediate operand.
5981 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5982 uint64_t CVal = C->getSExtValue();
5983 if (isInt<12>(CVal))
5984 Ops.push_back(
5985 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5986 }
5987 return;
5988 case 'J':
5989 // Validate & create an integer zero operand.
5990 if (auto *C = dyn_cast<ConstantSDNode>(Op))
5991 if (C->getZExtValue() == 0)
5992 Ops.push_back(
5993 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
5994 return;
5995 case 'K':
5996 // Validate & create a 12-bit unsigned immediate operand.
5997 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5998 uint64_t CVal = C->getZExtValue();
5999 if (isUInt<12>(CVal))
6000 Ops.push_back(
6001 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
6002 }
6003 return;
6004 default:
6005 break;
6006 }
6007 }
6008 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6009}
6010
6011#define GET_REGISTER_MATCHER
6012#include "LoongArchGenAsmMatcher.inc"
6013
6016 const MachineFunction &MF) const {
6017 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6018 std::string NewRegName = Name.second.str();
6019 Register Reg = MatchRegisterAltName(NewRegName);
6020 if (Reg == LoongArch::NoRegister)
6021 Reg = MatchRegisterName(NewRegName);
6022 if (Reg == LoongArch::NoRegister)
6024 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6025 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6026 if (!ReservedRegs.test(Reg))
6027 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6028 StringRef(RegName) + "\"."));
6029 return Reg;
6030}
6031
6033 EVT VT, SDValue C) const {
6034 // TODO: Support vectors.
6035 if (!VT.isScalarInteger())
6036 return false;
6037
6038 // Omit the optimization if the data size exceeds GRLen.
6039 if (VT.getSizeInBits() > Subtarget.getGRLen())
6040 return false;
6041
6042 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6043 const APInt &Imm = ConstNode->getAPIntValue();
6044 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6045 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6046 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6047 return true;
6048 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6049 if (ConstNode->hasOneUse() &&
6050 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6051 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6052 return true;
6053 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6054 // in which the immediate has two set bits. Or Break (MUL x, imm)
6055 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6056 // equals to (1 << s0) - (1 << s1).
6057 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6058 unsigned Shifts = Imm.countr_zero();
6059 // Reject immediates which can be composed via a single LUI.
6060 if (Shifts >= 12)
6061 return false;
6062 // Reject multiplications can be optimized to
6063 // (SLLI (ALSL x, x, 1/2/3/4), s).
6064 APInt ImmPop = Imm.ashr(Shifts);
6065 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6066 return false;
6067 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6068 // since it needs one more instruction than other 3 cases.
6069 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6070 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6071 (ImmSmall - Imm).isPowerOf2())
6072 return true;
6073 }
6074 }
6075
6076 return false;
6077}
6078
6080 const AddrMode &AM,
6081 Type *Ty, unsigned AS,
6082 Instruction *I) const {
6083 // LoongArch has four basic addressing modes:
6084 // 1. reg
6085 // 2. reg + 12-bit signed offset
6086 // 3. reg + 14-bit signed offset left-shifted by 2
6087 // 4. reg1 + reg2
6088 // TODO: Add more checks after support vector extension.
6089
6090 // No global is ever allowed as a base.
6091 if (AM.BaseGV)
6092 return false;
6093
6094 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6095 // with `UAL` feature.
6096 if (!isInt<12>(AM.BaseOffs) &&
6097 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6098 return false;
6099
6100 switch (AM.Scale) {
6101 case 0:
6102 // "r+i" or just "i", depending on HasBaseReg.
6103 break;
6104 case 1:
6105 // "r+r+i" is not allowed.
6106 if (AM.HasBaseReg && AM.BaseOffs)
6107 return false;
6108 // Otherwise we have "r+r" or "r+i".
6109 break;
6110 case 2:
6111 // "2*r+r" or "2*r+i" is not allowed.
6112 if (AM.HasBaseReg || AM.BaseOffs)
6113 return false;
6114 // Allow "2*r" as "r+r".
6115 break;
6116 default:
6117 return false;
6118 }
6119
6120 return true;
6121}
6122
6124 return isInt<12>(Imm);
6125}
6126
6128 return isInt<12>(Imm);
6129}
6130
6132 // Zexts are free if they can be combined with a load.
6133 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6134 // poorly with type legalization of compares preferring sext.
6135 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6136 EVT MemVT = LD->getMemoryVT();
6137 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6138 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6139 LD->getExtensionType() == ISD::ZEXTLOAD))
6140 return true;
6141 }
6142
6143 return TargetLowering::isZExtFree(Val, VT2);
6144}
6145
6147 EVT DstVT) const {
6148 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6149}
6150
6152 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6153}
6154
6156 // TODO: Support vectors.
6157 if (Y.getValueType().isVector())
6158 return false;
6159
6160 return !isa<ConstantSDNode>(Y);
6161}
6162
6164 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
6165 return ISD::SIGN_EXTEND;
6166}
6167
6169 EVT Type, bool IsSigned) const {
6170 if (Subtarget.is64Bit() && Type == MVT::i32)
6171 return true;
6172
6173 return IsSigned;
6174}
6175
6177 // Return false to suppress the unnecessary extensions if the LibCall
6178 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6179 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6180 Type.getSizeInBits() < Subtarget.getGRLen()))
6181 return false;
6182 return true;
6183}
6184
6185// memcpy, and other memory intrinsics, typically tries to use wider load/store
6186// if the source/dest is aligned and the copy size is large enough. We therefore
6187// want to align such objects passed to memory intrinsics.
6189 unsigned &MinSize,
6190 Align &PrefAlign) const {
6191 if (!isa<MemIntrinsic>(CI))
6192 return false;
6193
6194 if (Subtarget.is64Bit()) {
6195 MinSize = 8;
6196 PrefAlign = Align(8);
6197 } else {
6198 MinSize = 4;
6199 PrefAlign = Align(4);
6200 }
6201
6202 return true;
6203}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
const MCPhysReg ArgFPR32s[]
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:586
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:809
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:726
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:724
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:730
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:728
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
bool isFloatingPointOperation() const
Definition: Instructions.h:864
BinOp getOperation() const
Definition: Instructions.h:787
Value * getValOperand()
Definition: Instructions.h:856
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:829
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:368
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:865
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
Argument * getArg(unsigned i) const
Definition: Function.h:884
bool isDSOLocal() const
Definition: GlobalValue.h:305
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2053
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1766
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1871
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:733
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:493
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:743
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:839
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:488
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:784
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:687
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:779
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:810
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:592
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ Entry
Definition: COFF.h:826
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1194
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1190
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1223
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:840
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1296
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1301
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:963
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1480
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1056
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:980
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1145
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1124
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1219
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1041
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1279
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1109
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1214
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:193
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:204
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:314
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:209
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)