LLVM 20.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
141
145 }
146
147 // Set operations for LA32 only.
148
149 if (!Subtarget.is64Bit()) {
155 }
156
158
159 static const ISD::CondCode FPCCToExpand[] = {
162
163 // Set operations for 'F' feature.
164
165 if (Subtarget.hasBasicF()) {
166 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
167 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
168 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
169
185
186 if (Subtarget.is64Bit())
188
189 if (!Subtarget.hasBasicD()) {
191 if (Subtarget.is64Bit()) {
194 }
195 }
196 }
197
198 // Set operations for 'D' feature.
199
200 if (Subtarget.hasBasicD()) {
201 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
204 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
205 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
206
222
223 if (Subtarget.is64Bit())
225 }
226
227 // Set operations for 'LSX' feature.
228
229 if (Subtarget.hasExtLSX()) {
231 // Expand all truncating stores and extending loads.
232 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
233 setTruncStoreAction(VT, InnerVT, Expand);
236 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
237 }
238 // By default everything must be expanded. Then we will selectively turn
239 // on ones that can be effectively codegen'd.
240 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
242 }
243
244 for (MVT VT : LSXVTs) {
248
252
256 }
257 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
260 Legal);
262 VT, Legal);
269 Expand);
270 }
271 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
274 }
275 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
283 VT, Expand);
284 }
285 }
286
287 // Set operations for 'LASX' feature.
288
289 if (Subtarget.hasExtLASX()) {
290 for (MVT VT : LASXVTs) {
294
298
302 }
303 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
306 Legal);
308 VT, Legal);
315 Expand);
316 }
317 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
320 }
321 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
329 VT, Expand);
330 }
331 }
332
333 // Set DAG combine for LA32 and LA64.
334
339
340 // Set DAG combine for 'LSX' feature.
341
342 if (Subtarget.hasExtLSX())
344
345 // Compute derived properties from the register classes.
347
349
352
354
356
357 // Function alignments.
359 // Set preferred alignments.
363}
364
366 const GlobalAddressSDNode *GA) const {
367 // In order to maximise the opportunity for common subexpression elimination,
368 // keep a separate ADD node for the global address offset instead of folding
369 // it in the global address node. Later peephole optimisations may choose to
370 // fold it back in when profitable.
371 return false;
372}
373
375 SelectionDAG &DAG) const {
376 switch (Op.getOpcode()) {
378 return lowerATOMIC_FENCE(Op, DAG);
380 return lowerEH_DWARF_CFA(Op, DAG);
382 return lowerGlobalAddress(Op, DAG);
384 return lowerGlobalTLSAddress(Op, DAG);
386 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
388 return lowerINTRINSIC_W_CHAIN(Op, DAG);
390 return lowerINTRINSIC_VOID(Op, DAG);
392 return lowerBlockAddress(Op, DAG);
393 case ISD::JumpTable:
394 return lowerJumpTable(Op, DAG);
395 case ISD::SHL_PARTS:
396 return lowerShiftLeftParts(Op, DAG);
397 case ISD::SRA_PARTS:
398 return lowerShiftRightParts(Op, DAG, true);
399 case ISD::SRL_PARTS:
400 return lowerShiftRightParts(Op, DAG, false);
402 return lowerConstantPool(Op, DAG);
403 case ISD::FP_TO_SINT:
404 return lowerFP_TO_SINT(Op, DAG);
405 case ISD::BITCAST:
406 return lowerBITCAST(Op, DAG);
407 case ISD::UINT_TO_FP:
408 return lowerUINT_TO_FP(Op, DAG);
409 case ISD::SINT_TO_FP:
410 return lowerSINT_TO_FP(Op, DAG);
411 case ISD::VASTART:
412 return lowerVASTART(Op, DAG);
413 case ISD::FRAMEADDR:
414 return lowerFRAMEADDR(Op, DAG);
415 case ISD::RETURNADDR:
416 return lowerRETURNADDR(Op, DAG);
418 return lowerWRITE_REGISTER(Op, DAG);
420 return lowerINSERT_VECTOR_ELT(Op, DAG);
422 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
424 return lowerBUILD_VECTOR(Op, DAG);
426 return lowerVECTOR_SHUFFLE(Op, DAG);
427 }
428 return SDValue();
429}
430
431/// Determine whether a range fits a regular pattern of values.
432/// This function accounts for the possibility of jumping over the End iterator.
433template <typename ValType>
434static bool
436 unsigned CheckStride,
438 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
439 auto &I = Begin;
440
441 while (I != End) {
442 if (*I != -1 && *I != ExpectedIndex)
443 return false;
444 ExpectedIndex += ExpectedIndexStride;
445
446 // Incrementing past End is undefined behaviour so we must increment one
447 // step at a time and check for End at each step.
448 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
449 ; // Empty loop body.
450 }
451 return true;
452}
453
454/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
455///
456/// VREPLVEI performs vector broadcast based on an element specified by an
457/// integer immediate, with its mask being similar to:
458/// <x, x, x, ...>
459/// where x is any valid index.
460///
461/// When undef's appear in the mask they are treated as if they were whatever
462/// value is necessary in order to fit the above form.
464 MVT VT, SDValue V1, SDValue V2,
465 SelectionDAG &DAG) {
466 int SplatIndex = -1;
467 for (const auto &M : Mask) {
468 if (M != -1) {
469 SplatIndex = M;
470 break;
471 }
472 }
473
474 if (SplatIndex == -1)
475 return DAG.getUNDEF(VT);
476
477 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
478 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
479 APInt Imm(64, SplatIndex);
480 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
481 DAG.getConstant(Imm, DL, MVT::i64));
482 }
483
484 return SDValue();
485}
486
487/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
488///
489/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
490/// elements according to a <4 x i2> constant (encoded as an integer immediate).
491///
492/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
493/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
494/// When undef's appear they are treated as if they were whatever value is
495/// necessary in order to fit the above forms.
496///
497/// For example:
498/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
499/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
500/// i32 7, i32 6, i32 5, i32 4>
501/// is lowered to:
502/// (VSHUF4I_H $v0, $v1, 27)
503/// where the 27 comes from:
504/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
506 MVT VT, SDValue V1, SDValue V2,
507 SelectionDAG &DAG) {
508
509 // When the size is less than 4, lower cost instructions may be used.
510 if (Mask.size() < 4)
511 return SDValue();
512
513 int SubMask[4] = {-1, -1, -1, -1};
514 for (unsigned i = 0; i < 4; ++i) {
515 for (unsigned j = i; j < Mask.size(); j += 4) {
516 int Idx = Mask[j];
517
518 // Convert from vector index to 4-element subvector index
519 // If an index refers to an element outside of the subvector then give up
520 if (Idx != -1) {
521 Idx -= 4 * (j / 4);
522 if (Idx < 0 || Idx >= 4)
523 return SDValue();
524 }
525
526 // If the mask has an undef, replace it with the current index.
527 // Note that it might still be undef if the current index is also undef
528 if (SubMask[i] == -1)
529 SubMask[i] = Idx;
530 // Check that non-undef values are the same as in the mask. If they
531 // aren't then give up
532 else if (Idx != -1 && Idx != SubMask[i])
533 return SDValue();
534 }
535 }
536
537 // Calculate the immediate. Replace any remaining undefs with zero
538 APInt Imm(64, 0);
539 for (int i = 3; i >= 0; --i) {
540 int Idx = SubMask[i];
541
542 if (Idx == -1)
543 Idx = 0;
544
545 Imm <<= 2;
546 Imm |= Idx & 0x3;
547 }
548
549 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
550 DAG.getConstant(Imm, DL, MVT::i64));
551}
552
553/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
554///
555/// VPACKEV interleaves the even elements from each vector.
556///
557/// It is possible to lower into VPACKEV when the mask consists of two of the
558/// following forms interleaved:
559/// <0, 2, 4, ...>
560/// <n, n+2, n+4, ...>
561/// where n is the number of elements in the vector.
562/// For example:
563/// <0, 0, 2, 2, 4, 4, ...>
564/// <0, n, 2, n+2, 4, n+4, ...>
565///
566/// When undef's appear in the mask they are treated as if they were whatever
567/// value is necessary in order to fit the above forms.
569 MVT VT, SDValue V1, SDValue V2,
570 SelectionDAG &DAG) {
571
572 const auto &Begin = Mask.begin();
573 const auto &End = Mask.end();
574 SDValue OriV1 = V1, OriV2 = V2;
575
576 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
577 V1 = OriV1;
578 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
579 V1 = OriV2;
580 else
581 return SDValue();
582
583 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
584 V2 = OriV1;
585 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
586 V2 = OriV2;
587 else
588 return SDValue();
589
590 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
591}
592
593/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
594///
595/// VPACKOD interleaves the odd elements from each vector.
596///
597/// It is possible to lower into VPACKOD when the mask consists of two of the
598/// following forms interleaved:
599/// <1, 3, 5, ...>
600/// <n+1, n+3, n+5, ...>
601/// where n is the number of elements in the vector.
602/// For example:
603/// <1, 1, 3, 3, 5, 5, ...>
604/// <1, n+1, 3, n+3, 5, n+5, ...>
605///
606/// When undef's appear in the mask they are treated as if they were whatever
607/// value is necessary in order to fit the above forms.
609 MVT VT, SDValue V1, SDValue V2,
610 SelectionDAG &DAG) {
611
612 const auto &Begin = Mask.begin();
613 const auto &End = Mask.end();
614 SDValue OriV1 = V1, OriV2 = V2;
615
616 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
617 V1 = OriV1;
618 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
619 V1 = OriV2;
620 else
621 return SDValue();
622
623 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
624 V2 = OriV1;
625 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
626 V2 = OriV2;
627 else
628 return SDValue();
629
630 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
631}
632
633/// Lower VECTOR_SHUFFLE into VILVH (if possible).
634///
635/// VILVH interleaves consecutive elements from the left (highest-indexed) half
636/// of each vector.
637///
638/// It is possible to lower into VILVH when the mask consists of two of the
639/// following forms interleaved:
640/// <x, x+1, x+2, ...>
641/// <n+x, n+x+1, n+x+2, ...>
642/// where n is the number of elements in the vector and x is half n.
643/// For example:
644/// <x, x, x+1, x+1, x+2, x+2, ...>
645/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
646///
647/// When undef's appear in the mask they are treated as if they were whatever
648/// value is necessary in order to fit the above forms.
650 MVT VT, SDValue V1, SDValue V2,
651 SelectionDAG &DAG) {
652
653 const auto &Begin = Mask.begin();
654 const auto &End = Mask.end();
655 unsigned HalfSize = Mask.size() / 2;
656 SDValue OriV1 = V1, OriV2 = V2;
657
658 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
659 V1 = OriV1;
660 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
661 V1 = OriV2;
662 else
663 return SDValue();
664
665 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
666 V2 = OriV1;
667 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
668 1))
669 V2 = OriV2;
670 else
671 return SDValue();
672
673 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
674}
675
676/// Lower VECTOR_SHUFFLE into VILVL (if possible).
677///
678/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
679/// of each vector.
680///
681/// It is possible to lower into VILVL when the mask consists of two of the
682/// following forms interleaved:
683/// <0, 1, 2, ...>
684/// <n, n+1, n+2, ...>
685/// where n is the number of elements in the vector.
686/// For example:
687/// <0, 0, 1, 1, 2, 2, ...>
688/// <0, n, 1, n+1, 2, n+2, ...>
689///
690/// When undef's appear in the mask they are treated as if they were whatever
691/// value is necessary in order to fit the above forms.
693 MVT VT, SDValue V1, SDValue V2,
694 SelectionDAG &DAG) {
695
696 const auto &Begin = Mask.begin();
697 const auto &End = Mask.end();
698 SDValue OriV1 = V1, OriV2 = V2;
699
700 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
701 V1 = OriV1;
702 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
703 V1 = OriV2;
704 else
705 return SDValue();
706
707 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
708 V2 = OriV1;
709 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
710 V2 = OriV2;
711 else
712 return SDValue();
713
714 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
715}
716
717/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
718///
719/// VPICKEV copies the even elements of each vector into the result vector.
720///
721/// It is possible to lower into VPICKEV when the mask consists of two of the
722/// following forms concatenated:
723/// <0, 2, 4, ...>
724/// <n, n+2, n+4, ...>
725/// where n is the number of elements in the vector.
726/// For example:
727/// <0, 2, 4, ..., 0, 2, 4, ...>
728/// <0, 2, 4, ..., n, n+2, n+4, ...>
729///
730/// When undef's appear in the mask they are treated as if they were whatever
731/// value is necessary in order to fit the above forms.
733 MVT VT, SDValue V1, SDValue V2,
734 SelectionDAG &DAG) {
735
736 const auto &Begin = Mask.begin();
737 const auto &Mid = Mask.begin() + Mask.size() / 2;
738 const auto &End = Mask.end();
739 SDValue OriV1 = V1, OriV2 = V2;
740
741 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
742 V1 = OriV1;
743 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
744 V1 = OriV2;
745 else
746 return SDValue();
747
748 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
749 V2 = OriV1;
750 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
751 V2 = OriV2;
752
753 else
754 return SDValue();
755
756 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
757}
758
759/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
760///
761/// VPICKOD copies the odd elements of each vector into the result vector.
762///
763/// It is possible to lower into VPICKOD when the mask consists of two of the
764/// following forms concatenated:
765/// <1, 3, 5, ...>
766/// <n+1, n+3, n+5, ...>
767/// where n is the number of elements in the vector.
768/// For example:
769/// <1, 3, 5, ..., 1, 3, 5, ...>
770/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
771///
772/// When undef's appear in the mask they are treated as if they were whatever
773/// value is necessary in order to fit the above forms.
775 MVT VT, SDValue V1, SDValue V2,
776 SelectionDAG &DAG) {
777
778 const auto &Begin = Mask.begin();
779 const auto &Mid = Mask.begin() + Mask.size() / 2;
780 const auto &End = Mask.end();
781 SDValue OriV1 = V1, OriV2 = V2;
782
783 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
784 V1 = OriV1;
785 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
786 V1 = OriV2;
787 else
788 return SDValue();
789
790 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
791 V2 = OriV1;
792 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
793 V2 = OriV2;
794 else
795 return SDValue();
796
797 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
798}
799
800/// Lower VECTOR_SHUFFLE into VSHUF.
801///
802/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
803/// adding it as an operand to the resulting VSHUF.
805 MVT VT, SDValue V1, SDValue V2,
806 SelectionDAG &DAG) {
807
809 for (auto M : Mask)
810 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
811
812 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
813 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
814
815 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
816 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
817 // VSHF concatenates the vectors in a bitwise fashion:
818 // <0b00, 0b01> + <0b10, 0b11> ->
819 // 0b0100 + 0b1110 -> 0b01001110
820 // <0b10, 0b11, 0b00, 0b01>
821 // We must therefore swap the operands to get the correct result.
822 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
823}
824
825/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
826///
827/// This routine breaks down the specific type of 128-bit shuffle and
828/// dispatches to the lowering routines accordingly.
830 SDValue V1, SDValue V2, SelectionDAG &DAG) {
831 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
832 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
833 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
834 "Vector type is unsupported for lsx!");
835 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
836 "Two operands have different types!");
837 assert(VT.getVectorNumElements() == Mask.size() &&
838 "Unexpected mask size for shuffle!");
839 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
840
841 SDValue Result;
842 // TODO: Add more comparison patterns.
843 if (V2.isUndef()) {
844 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
845 return Result;
846 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
847 return Result;
848
849 // TODO: This comment may be enabled in the future to better match the
850 // pattern for instruction selection.
851 /* V2 = V1; */
852 }
853
854 // It is recommended not to change the pattern comparison order for better
855 // performance.
856 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
857 return Result;
858 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
859 return Result;
860 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
861 return Result;
862 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
863 return Result;
864 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
865 return Result;
866 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
867 return Result;
868 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
869 return Result;
870
871 return SDValue();
872}
873
874/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
875///
876/// It is a XVREPLVEI when the mask is:
877/// <x, x, x, ..., x+n, x+n, x+n, ...>
878/// where the number of x is equal to n and n is half the length of vector.
879///
880/// When undef's appear in the mask they are treated as if they were whatever
881/// value is necessary in order to fit the above form.
883 ArrayRef<int> Mask, MVT VT,
884 SDValue V1, SDValue V2,
885 SelectionDAG &DAG) {
886 int SplatIndex = -1;
887 for (const auto &M : Mask) {
888 if (M != -1) {
889 SplatIndex = M;
890 break;
891 }
892 }
893
894 if (SplatIndex == -1)
895 return DAG.getUNDEF(VT);
896
897 const auto &Begin = Mask.begin();
898 const auto &End = Mask.end();
899 unsigned HalfSize = Mask.size() / 2;
900
901 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
902 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
903 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
904 0)) {
905 APInt Imm(64, SplatIndex);
906 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
907 DAG.getConstant(Imm, DL, MVT::i64));
908 }
909
910 return SDValue();
911}
912
913/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
915 MVT VT, SDValue V1, SDValue V2,
916 SelectionDAG &DAG) {
917 // When the size is less than or equal to 4, lower cost instructions may be
918 // used.
919 if (Mask.size() <= 4)
920 return SDValue();
921 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
922}
923
924/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
926 MVT VT, SDValue V1, SDValue V2,
927 SelectionDAG &DAG) {
928 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
929}
930
931/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
933 MVT VT, SDValue V1, SDValue V2,
934 SelectionDAG &DAG) {
935 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
936}
937
938/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
940 MVT VT, SDValue V1, SDValue V2,
941 SelectionDAG &DAG) {
942
943 const auto &Begin = Mask.begin();
944 const auto &End = Mask.end();
945 unsigned HalfSize = Mask.size() / 2;
946 unsigned LeftSize = HalfSize / 2;
947 SDValue OriV1 = V1, OriV2 = V2;
948
949 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
950 1) &&
951 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
952 V1 = OriV1;
953 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
954 Mask.size() + HalfSize - LeftSize, 1) &&
955 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
956 Mask.size() + HalfSize + LeftSize, 1))
957 V1 = OriV2;
958 else
959 return SDValue();
960
961 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
962 1) &&
963 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
964 1))
965 V2 = OriV1;
966 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
967 Mask.size() + HalfSize - LeftSize, 1) &&
968 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
969 Mask.size() + HalfSize + LeftSize, 1))
970 V2 = OriV2;
971 else
972 return SDValue();
973
974 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
975}
976
977/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
979 MVT VT, SDValue V1, SDValue V2,
980 SelectionDAG &DAG) {
981
982 const auto &Begin = Mask.begin();
983 const auto &End = Mask.end();
984 unsigned HalfSize = Mask.size() / 2;
985 SDValue OriV1 = V1, OriV2 = V2;
986
987 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
988 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
989 V1 = OriV1;
990 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
991 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
992 Mask.size() + HalfSize, 1))
993 V1 = OriV2;
994 else
995 return SDValue();
996
997 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
998 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
999 V2 = OriV1;
1000 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1001 1) &&
1002 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1003 Mask.size() + HalfSize, 1))
1004 V2 = OriV2;
1005 else
1006 return SDValue();
1007
1008 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1009}
1010
1011/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1013 MVT VT, SDValue V1, SDValue V2,
1014 SelectionDAG &DAG) {
1015
1016 const auto &Begin = Mask.begin();
1017 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1018 const auto &Mid = Mask.begin() + Mask.size() / 2;
1019 const auto &RightMid = Mask.end() - Mask.size() / 4;
1020 const auto &End = Mask.end();
1021 unsigned HalfSize = Mask.size() / 2;
1022 SDValue OriV1 = V1, OriV2 = V2;
1023
1024 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1025 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1026 V1 = OriV1;
1027 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1028 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1029 V1 = OriV2;
1030 else
1031 return SDValue();
1032
1033 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1034 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1035 V2 = OriV1;
1036 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1037 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1038 V2 = OriV2;
1039
1040 else
1041 return SDValue();
1042
1043 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1044}
1045
1046/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1048 MVT VT, SDValue V1, SDValue V2,
1049 SelectionDAG &DAG) {
1050
1051 const auto &Begin = Mask.begin();
1052 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1053 const auto &Mid = Mask.begin() + Mask.size() / 2;
1054 const auto &RightMid = Mask.end() - Mask.size() / 4;
1055 const auto &End = Mask.end();
1056 unsigned HalfSize = Mask.size() / 2;
1057 SDValue OriV1 = V1, OriV2 = V2;
1058
1059 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1060 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1061 V1 = OriV1;
1062 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1063 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1064 2))
1065 V1 = OriV2;
1066 else
1067 return SDValue();
1068
1069 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1070 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1071 V2 = OriV1;
1072 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1073 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1074 2))
1075 V2 = OriV2;
1076 else
1077 return SDValue();
1078
1079 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1080}
1081
1082/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1084 MVT VT, SDValue V1, SDValue V2,
1085 SelectionDAG &DAG) {
1086
1087 int MaskSize = Mask.size();
1088 int HalfSize = Mask.size() / 2;
1089 const auto &Begin = Mask.begin();
1090 const auto &Mid = Mask.begin() + HalfSize;
1091 const auto &End = Mask.end();
1092
1093 // VECTOR_SHUFFLE concatenates the vectors:
1094 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1095 // shuffling ->
1096 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1097 //
1098 // XVSHUF concatenates the vectors:
1099 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1100 // shuffling ->
1101 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1102 SmallVector<SDValue, 8> MaskAlloc;
1103 for (auto it = Begin; it < Mid; it++) {
1104 if (*it < 0) // UNDEF
1105 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1106 else if ((*it >= 0 && *it < HalfSize) ||
1107 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1108 int M = *it < HalfSize ? *it : *it - HalfSize;
1109 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1110 } else
1111 return SDValue();
1112 }
1113 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1114
1115 for (auto it = Mid; it < End; it++) {
1116 if (*it < 0) // UNDEF
1117 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1118 else if ((*it >= HalfSize && *it < MaskSize) ||
1119 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1120 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1121 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1122 } else
1123 return SDValue();
1124 }
1125 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1126
1127 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1128 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1129 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1130}
1131
1132/// Shuffle vectors by lane to generate more optimized instructions.
1133/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1134///
1135/// Therefore, except for the following four cases, other cases are regarded
1136/// as cross-lane shuffles, where optimization is relatively limited.
1137///
1138/// - Shuffle high, low lanes of two inputs vector
1139/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1140/// - Shuffle low, high lanes of two inputs vector
1141/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1142/// - Shuffle low, low lanes of two inputs vector
1143/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1144/// - Shuffle high, high lanes of two inputs vector
1145/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1146///
1147/// The first case is the closest to LoongArch instructions and the other
1148/// cases need to be converted to it for processing.
1149///
1150/// This function may modify V1, V2 and Mask
1152 MutableArrayRef<int> Mask, MVT VT,
1153 SDValue &V1, SDValue &V2,
1154 SelectionDAG &DAG) {
1155
1156 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1157
1158 int MaskSize = Mask.size();
1159 int HalfSize = Mask.size() / 2;
1160
1161 HalfMaskType preMask = None, postMask = None;
1162
1163 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1164 return M < 0 || (M >= 0 && M < HalfSize) ||
1165 (M >= MaskSize && M < MaskSize + HalfSize);
1166 }))
1167 preMask = HighLaneTy;
1168 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1169 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1170 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1171 }))
1172 preMask = LowLaneTy;
1173
1174 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1175 return M < 0 || (M >= 0 && M < HalfSize) ||
1176 (M >= MaskSize && M < MaskSize + HalfSize);
1177 }))
1178 postMask = HighLaneTy;
1179 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1180 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1181 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1182 }))
1183 postMask = LowLaneTy;
1184
1185 // The pre-half of mask is high lane type, and the post-half of mask
1186 // is low lane type, which is closest to the LoongArch instructions.
1187 //
1188 // Note: In the LoongArch architecture, the high lane of mask corresponds
1189 // to the lower 128-bit of vector register, and the low lane of mask
1190 // corresponds the higher 128-bit of vector register.
1191 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1192 return;
1193 }
1194 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1195 V1 = DAG.getBitcast(MVT::v4i64, V1);
1196 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1197 DAG.getConstant(0b01001110, DL, MVT::i64));
1198 V1 = DAG.getBitcast(VT, V1);
1199
1200 if (!V2.isUndef()) {
1201 V2 = DAG.getBitcast(MVT::v4i64, V2);
1202 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1203 DAG.getConstant(0b01001110, DL, MVT::i64));
1204 V2 = DAG.getBitcast(VT, V2);
1205 }
1206
1207 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1208 *it = *it < 0 ? *it : *it - HalfSize;
1209 }
1210 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1211 *it = *it < 0 ? *it : *it + HalfSize;
1212 }
1213 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1214 V1 = DAG.getBitcast(MVT::v4i64, V1);
1215 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1216 DAG.getConstant(0b11101110, DL, MVT::i64));
1217 V1 = DAG.getBitcast(VT, V1);
1218
1219 if (!V2.isUndef()) {
1220 V2 = DAG.getBitcast(MVT::v4i64, V2);
1221 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1222 DAG.getConstant(0b11101110, DL, MVT::i64));
1223 V2 = DAG.getBitcast(VT, V2);
1224 }
1225
1226 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1227 *it = *it < 0 ? *it : *it - HalfSize;
1228 }
1229 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1230 V1 = DAG.getBitcast(MVT::v4i64, V1);
1231 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1232 DAG.getConstant(0b01000100, DL, MVT::i64));
1233 V1 = DAG.getBitcast(VT, V1);
1234
1235 if (!V2.isUndef()) {
1236 V2 = DAG.getBitcast(MVT::v4i64, V2);
1237 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1238 DAG.getConstant(0b01000100, DL, MVT::i64));
1239 V2 = DAG.getBitcast(VT, V2);
1240 }
1241
1242 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1243 *it = *it < 0 ? *it : *it + HalfSize;
1244 }
1245 } else { // cross-lane
1246 return;
1247 }
1248}
1249
1250/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1251///
1252/// This routine breaks down the specific type of 256-bit shuffle and
1253/// dispatches to the lowering routines accordingly.
1255 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1256 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1257 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1258 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1259 "Vector type is unsupported for lasx!");
1260 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1261 "Two operands have different types!");
1262 assert(VT.getVectorNumElements() == Mask.size() &&
1263 "Unexpected mask size for shuffle!");
1264 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1265 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1266
1267 // canonicalize non cross-lane shuffle vector
1268 SmallVector<int> NewMask(Mask);
1269 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1270
1271 SDValue Result;
1272 // TODO: Add more comparison patterns.
1273 if (V2.isUndef()) {
1274 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1275 return Result;
1276 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1277 return Result;
1278
1279 // TODO: This comment may be enabled in the future to better match the
1280 // pattern for instruction selection.
1281 /* V2 = V1; */
1282 }
1283
1284 // It is recommended not to change the pattern comparison order for better
1285 // performance.
1286 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1287 return Result;
1288 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1289 return Result;
1290 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1291 return Result;
1292 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1293 return Result;
1294 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1295 return Result;
1296 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1297 return Result;
1298 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1299 return Result;
1300
1301 return SDValue();
1302}
1303
1304SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1305 SelectionDAG &DAG) const {
1306 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1307 ArrayRef<int> OrigMask = SVOp->getMask();
1308 SDValue V1 = Op.getOperand(0);
1309 SDValue V2 = Op.getOperand(1);
1310 MVT VT = Op.getSimpleValueType();
1311 int NumElements = VT.getVectorNumElements();
1312 SDLoc DL(Op);
1313
1314 bool V1IsUndef = V1.isUndef();
1315 bool V2IsUndef = V2.isUndef();
1316 if (V1IsUndef && V2IsUndef)
1317 return DAG.getUNDEF(VT);
1318
1319 // When we create a shuffle node we put the UNDEF node to second operand,
1320 // but in some cases the first operand may be transformed to UNDEF.
1321 // In this case we should just commute the node.
1322 if (V1IsUndef)
1323 return DAG.getCommutedVectorShuffle(*SVOp);
1324
1325 // Check for non-undef masks pointing at an undef vector and make the masks
1326 // undef as well. This makes it easier to match the shuffle based solely on
1327 // the mask.
1328 if (V2IsUndef &&
1329 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1330 SmallVector<int, 8> NewMask(OrigMask);
1331 for (int &M : NewMask)
1332 if (M >= NumElements)
1333 M = -1;
1334 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1335 }
1336
1337 // Check for illegal shuffle mask element index values.
1338 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1339 (void)MaskUpperLimit;
1340 assert(llvm::all_of(OrigMask,
1341 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1342 "Out of bounds shuffle index");
1343
1344 // For each vector width, delegate to a specialized lowering routine.
1345 if (VT.is128BitVector())
1346 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1347
1348 if (VT.is256BitVector())
1349 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1350
1351 return SDValue();
1352}
1353
1354static bool isConstantOrUndef(const SDValue Op) {
1355 if (Op->isUndef())
1356 return true;
1357 if (isa<ConstantSDNode>(Op))
1358 return true;
1359 if (isa<ConstantFPSDNode>(Op))
1360 return true;
1361 return false;
1362}
1363
1365 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1366 if (isConstantOrUndef(Op->getOperand(i)))
1367 return true;
1368 return false;
1369}
1370
1371SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1372 SelectionDAG &DAG) const {
1373 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1374 EVT ResTy = Op->getValueType(0);
1375 SDLoc DL(Op);
1376 APInt SplatValue, SplatUndef;
1377 unsigned SplatBitSize;
1378 bool HasAnyUndefs;
1379 bool Is128Vec = ResTy.is128BitVector();
1380 bool Is256Vec = ResTy.is256BitVector();
1381
1382 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1383 (!Subtarget.hasExtLASX() || !Is256Vec))
1384 return SDValue();
1385
1386 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1387 /*MinSplatBits=*/8) &&
1388 SplatBitSize <= 64) {
1389 // We can only cope with 8, 16, 32, or 64-bit elements.
1390 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1391 SplatBitSize != 64)
1392 return SDValue();
1393
1394 EVT ViaVecTy;
1395
1396 switch (SplatBitSize) {
1397 default:
1398 return SDValue();
1399 case 8:
1400 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1401 break;
1402 case 16:
1403 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1404 break;
1405 case 32:
1406 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1407 break;
1408 case 64:
1409 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1410 break;
1411 }
1412
1413 // SelectionDAG::getConstant will promote SplatValue appropriately.
1414 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1415
1416 // Bitcast to the type we originally wanted.
1417 if (ViaVecTy != ResTy)
1418 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1419
1420 return Result;
1421 }
1422
1423 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1424 return Op;
1425
1427 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1428 // The resulting code is the same length as the expansion, but it doesn't
1429 // use memory operations.
1430 EVT ResTy = Node->getValueType(0);
1431
1432 assert(ResTy.isVector());
1433
1434 unsigned NumElts = ResTy.getVectorNumElements();
1435 SDValue Vector = DAG.getUNDEF(ResTy);
1436 for (unsigned i = 0; i < NumElts; ++i) {
1438 Node->getOperand(i),
1439 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1440 }
1441 return Vector;
1442 }
1443
1444 return SDValue();
1445}
1446
1447SDValue
1448LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1449 SelectionDAG &DAG) const {
1450 EVT VecTy = Op->getOperand(0)->getValueType(0);
1451 SDValue Idx = Op->getOperand(1);
1452 EVT EltTy = VecTy.getVectorElementType();
1453 unsigned NumElts = VecTy.getVectorNumElements();
1454
1455 if (isa<ConstantSDNode>(Idx) &&
1456 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1457 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1458 return Op;
1459
1460 return SDValue();
1461}
1462
1463SDValue
1464LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1465 SelectionDAG &DAG) const {
1466 if (isa<ConstantSDNode>(Op->getOperand(2)))
1467 return Op;
1468 return SDValue();
1469}
1470
1471SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1472 SelectionDAG &DAG) const {
1473 SDLoc DL(Op);
1474 SyncScope::ID FenceSSID =
1475 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1476
1477 // singlethread fences only synchronize with signal handlers on the same
1478 // thread and thus only need to preserve instruction order, not actually
1479 // enforce memory ordering.
1480 if (FenceSSID == SyncScope::SingleThread)
1481 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1482 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1483
1484 return Op;
1485}
1486
1487SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1488 SelectionDAG &DAG) const {
1489
1490 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1491 DAG.getContext()->emitError(
1492 "On LA64, only 64-bit registers can be written.");
1493 return Op.getOperand(0);
1494 }
1495
1496 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1497 DAG.getContext()->emitError(
1498 "On LA32, only 32-bit registers can be written.");
1499 return Op.getOperand(0);
1500 }
1501
1502 return Op;
1503}
1504
1505SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1506 SelectionDAG &DAG) const {
1507 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1508 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1509 "be a constant integer");
1510 return SDValue();
1511 }
1512
1515 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1516 EVT VT = Op.getValueType();
1517 SDLoc DL(Op);
1518 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1519 unsigned Depth = Op.getConstantOperandVal(0);
1520 int GRLenInBytes = Subtarget.getGRLen() / 8;
1521
1522 while (Depth--) {
1523 int Offset = -(GRLenInBytes * 2);
1524 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1526 FrameAddr =
1527 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1528 }
1529 return FrameAddr;
1530}
1531
1532SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1533 SelectionDAG &DAG) const {
1535 return SDValue();
1536
1537 // Currently only support lowering return address for current frame.
1538 if (Op.getConstantOperandVal(0) != 0) {
1539 DAG.getContext()->emitError(
1540 "return address can only be determined for the current frame");
1541 return SDValue();
1542 }
1543
1546 MVT GRLenVT = Subtarget.getGRLenVT();
1547
1548 // Return the value of the return address register, marking it an implicit
1549 // live-in.
1550 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1551 getRegClassFor(GRLenVT));
1552 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1553}
1554
1555SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1556 SelectionDAG &DAG) const {
1558 auto Size = Subtarget.getGRLen() / 8;
1559 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1560 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1561}
1562
1563SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1564 SelectionDAG &DAG) const {
1566 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1567
1568 SDLoc DL(Op);
1569 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1571
1572 // vastart just stores the address of the VarArgsFrameIndex slot into the
1573 // memory location argument.
1574 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1575 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1576 MachinePointerInfo(SV));
1577}
1578
1579SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1580 SelectionDAG &DAG) const {
1581 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1582 !Subtarget.hasBasicD() && "unexpected target features");
1583
1584 SDLoc DL(Op);
1585 SDValue Op0 = Op.getOperand(0);
1586 if (Op0->getOpcode() == ISD::AND) {
1587 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1588 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1589 return Op;
1590 }
1591
1592 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1593 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1594 Op0.getConstantOperandVal(2) == UINT64_C(0))
1595 return Op;
1596
1597 if (Op0.getOpcode() == ISD::AssertZext &&
1598 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1599 return Op;
1600
1601 EVT OpVT = Op0.getValueType();
1602 EVT RetVT = Op.getValueType();
1603 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1604 MakeLibCallOptions CallOptions;
1605 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1606 SDValue Chain = SDValue();
1608 std::tie(Result, Chain) =
1609 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1610 return Result;
1611}
1612
1613SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1614 SelectionDAG &DAG) const {
1615 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1616 !Subtarget.hasBasicD() && "unexpected target features");
1617
1618 SDLoc DL(Op);
1619 SDValue Op0 = Op.getOperand(0);
1620
1621 if ((Op0.getOpcode() == ISD::AssertSext ||
1623 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1624 return Op;
1625
1626 EVT OpVT = Op0.getValueType();
1627 EVT RetVT = Op.getValueType();
1628 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1629 MakeLibCallOptions CallOptions;
1630 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1631 SDValue Chain = SDValue();
1633 std::tie(Result, Chain) =
1634 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1635 return Result;
1636}
1637
1638SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1639 SelectionDAG &DAG) const {
1640
1641 SDLoc DL(Op);
1642 SDValue Op0 = Op.getOperand(0);
1643
1644 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1645 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1646 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1647 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1648 }
1649 return Op;
1650}
1651
1652SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1653 SelectionDAG &DAG) const {
1654
1655 SDLoc DL(Op);
1656
1657 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1658 !Subtarget.hasBasicD()) {
1659 SDValue Dst =
1660 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
1661 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1662 }
1663
1664 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1665 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
1666 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1667}
1668
1670 SelectionDAG &DAG, unsigned Flags) {
1671 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1672}
1673
1675 SelectionDAG &DAG, unsigned Flags) {
1676 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1677 Flags);
1678}
1679
1681 SelectionDAG &DAG, unsigned Flags) {
1682 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1683 N->getOffset(), Flags);
1684}
1685
1687 SelectionDAG &DAG, unsigned Flags) {
1688 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1689}
1690
1691template <class NodeTy>
1692SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1694 bool IsLocal) const {
1695 SDLoc DL(N);
1696 EVT Ty = getPointerTy(DAG.getDataLayout());
1697 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1698 SDValue Load;
1699
1700 switch (M) {
1701 default:
1702 report_fatal_error("Unsupported code model");
1703
1704 case CodeModel::Large: {
1705 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1706
1707 // This is not actually used, but is necessary for successfully matching
1708 // the PseudoLA_*_LARGE nodes.
1709 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1710 if (IsLocal) {
1711 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1712 // eventually becomes the desired 5-insn code sequence.
1713 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1714 Tmp, Addr),
1715 0);
1716 } else {
1717 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1718 // eventually becomes the desired 5-insn code sequence.
1719 Load = SDValue(
1720 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1721 0);
1722 }
1723 break;
1724 }
1725
1726 case CodeModel::Small:
1727 case CodeModel::Medium:
1728 if (IsLocal) {
1729 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1730 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1731 Load = SDValue(
1732 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1733 } else {
1734 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1735 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1736 Load =
1737 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1738 }
1739 }
1740
1741 if (!IsLocal) {
1742 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1748 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1749 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1750 }
1751
1752 return Load;
1753}
1754
1755SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1756 SelectionDAG &DAG) const {
1757 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1758 DAG.getTarget().getCodeModel());
1759}
1760
1761SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1762 SelectionDAG &DAG) const {
1763 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1764 DAG.getTarget().getCodeModel());
1765}
1766
1767SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1768 SelectionDAG &DAG) const {
1769 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1770 DAG.getTarget().getCodeModel());
1771}
1772
1773SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1774 SelectionDAG &DAG) const {
1775 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1776 assert(N->getOffset() == 0 && "unexpected offset in global node");
1777 auto CM = DAG.getTarget().getCodeModel();
1778 const GlobalValue *GV = N->getGlobal();
1779
1780 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1781 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1782 CM = *GCM;
1783 }
1784
1785 return getAddr(N, DAG, CM, GV->isDSOLocal());
1786}
1787
1788SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1789 SelectionDAG &DAG,
1790 unsigned Opc, bool UseGOT,
1791 bool Large) const {
1792 SDLoc DL(N);
1793 EVT Ty = getPointerTy(DAG.getDataLayout());
1794 MVT GRLenVT = Subtarget.getGRLenVT();
1795
1796 // This is not actually used, but is necessary for successfully matching the
1797 // PseudoLA_*_LARGE nodes.
1798 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1799 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1801 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1802 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1803 if (UseGOT) {
1804 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1810 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1811 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1812 }
1813
1814 // Add the thread pointer.
1815 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1816 DAG.getRegister(LoongArch::R2, GRLenVT));
1817}
1818
1819SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1820 SelectionDAG &DAG,
1821 unsigned Opc,
1822 bool Large) const {
1823 SDLoc DL(N);
1824 EVT Ty = getPointerTy(DAG.getDataLayout());
1825 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1826
1827 // This is not actually used, but is necessary for successfully matching the
1828 // PseudoLA_*_LARGE nodes.
1829 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1830
1831 // Use a PC-relative addressing mode to access the dynamic GOT address.
1832 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1833 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1834 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1835
1836 // Prepare argument list to generate call.
1838 ArgListEntry Entry;
1839 Entry.Node = Load;
1840 Entry.Ty = CallTy;
1841 Args.push_back(Entry);
1842
1843 // Setup call to __tls_get_addr.
1845 CLI.setDebugLoc(DL)
1846 .setChain(DAG.getEntryNode())
1847 .setLibCallee(CallingConv::C, CallTy,
1848 DAG.getExternalSymbol("__tls_get_addr", Ty),
1849 std::move(Args));
1850
1851 return LowerCallTo(CLI).first;
1852}
1853
1854SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1855 SelectionDAG &DAG, unsigned Opc,
1856 bool Large) const {
1857 SDLoc DL(N);
1858 EVT Ty = getPointerTy(DAG.getDataLayout());
1859 const GlobalValue *GV = N->getGlobal();
1860
1861 // This is not actually used, but is necessary for successfully matching the
1862 // PseudoLA_*_LARGE nodes.
1863 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1864
1865 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1866 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1867 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1868 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1869 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1870}
1871
1872SDValue
1873LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1874 SelectionDAG &DAG) const {
1877 report_fatal_error("In GHC calling convention TLS is not supported");
1878
1879 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1880 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1881
1882 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1883 assert(N->getOffset() == 0 && "unexpected offset in global node");
1884
1885 if (DAG.getTarget().useEmulatedTLS())
1886 report_fatal_error("the emulated TLS is prohibited",
1887 /*GenCrashDiag=*/false);
1888
1889 bool IsDesc = DAG.getTarget().useTLSDESC();
1890
1891 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1893 // In this model, application code calls the dynamic linker function
1894 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1895 // runtime.
1896 if (!IsDesc)
1897 return getDynamicTLSAddr(N, DAG,
1898 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1899 : LoongArch::PseudoLA_TLS_GD,
1900 Large);
1901 break;
1903 // Same as GeneralDynamic, except for assembly modifiers and relocation
1904 // records.
1905 if (!IsDesc)
1906 return getDynamicTLSAddr(N, DAG,
1907 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1908 : LoongArch::PseudoLA_TLS_LD,
1909 Large);
1910 break;
1912 // This model uses the GOT to resolve TLS offsets.
1913 return getStaticTLSAddr(N, DAG,
1914 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1915 : LoongArch::PseudoLA_TLS_IE,
1916 /*UseGOT=*/true, Large);
1918 // This model is used when static linking as the TLS offsets are resolved
1919 // during program linking.
1920 //
1921 // This node doesn't need an extra argument for the large code model.
1922 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
1923 /*UseGOT=*/false);
1924 }
1925
1926 return getTLSDescAddr(N, DAG,
1927 Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
1928 : LoongArch::PseudoLA_TLS_DESC_PC,
1929 Large);
1930}
1931
1932template <unsigned N>
1934 SelectionDAG &DAG, bool IsSigned = false) {
1935 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1936 // Check the ImmArg.
1937 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
1938 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
1939 DAG.getContext()->emitError(Op->getOperationName(0) +
1940 ": argument out of range.");
1941 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
1942 }
1943 return SDValue();
1944}
1945
1946SDValue
1947LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1948 SelectionDAG &DAG) const {
1949 SDLoc DL(Op);
1950 switch (Op.getConstantOperandVal(0)) {
1951 default:
1952 return SDValue(); // Don't custom lower most intrinsics.
1953 case Intrinsic::thread_pointer: {
1954 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1955 return DAG.getRegister(LoongArch::R2, PtrVT);
1956 }
1957 case Intrinsic::loongarch_lsx_vpickve2gr_d:
1958 case Intrinsic::loongarch_lsx_vpickve2gr_du:
1959 case Intrinsic::loongarch_lsx_vreplvei_d:
1960 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1961 return checkIntrinsicImmArg<1>(Op, 2, DAG);
1962 case Intrinsic::loongarch_lsx_vreplvei_w:
1963 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1964 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1965 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1966 case Intrinsic::loongarch_lasx_xvpickve_d:
1967 case Intrinsic::loongarch_lasx_xvpickve_d_f:
1968 return checkIntrinsicImmArg<2>(Op, 2, DAG);
1969 case Intrinsic::loongarch_lasx_xvinsve0_d:
1970 return checkIntrinsicImmArg<2>(Op, 3, DAG);
1971 case Intrinsic::loongarch_lsx_vsat_b:
1972 case Intrinsic::loongarch_lsx_vsat_bu:
1973 case Intrinsic::loongarch_lsx_vrotri_b:
1974 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1975 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1976 case Intrinsic::loongarch_lsx_vsrlri_b:
1977 case Intrinsic::loongarch_lsx_vsrari_b:
1978 case Intrinsic::loongarch_lsx_vreplvei_h:
1979 case Intrinsic::loongarch_lasx_xvsat_b:
1980 case Intrinsic::loongarch_lasx_xvsat_bu:
1981 case Intrinsic::loongarch_lasx_xvrotri_b:
1982 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1983 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1984 case Intrinsic::loongarch_lasx_xvsrlri_b:
1985 case Intrinsic::loongarch_lasx_xvsrari_b:
1986 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1987 case Intrinsic::loongarch_lasx_xvpickve_w:
1988 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1989 return checkIntrinsicImmArg<3>(Op, 2, DAG);
1990 case Intrinsic::loongarch_lasx_xvinsve0_w:
1991 return checkIntrinsicImmArg<3>(Op, 3, DAG);
1992 case Intrinsic::loongarch_lsx_vsat_h:
1993 case Intrinsic::loongarch_lsx_vsat_hu:
1994 case Intrinsic::loongarch_lsx_vrotri_h:
1995 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1996 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1997 case Intrinsic::loongarch_lsx_vsrlri_h:
1998 case Intrinsic::loongarch_lsx_vsrari_h:
1999 case Intrinsic::loongarch_lsx_vreplvei_b:
2000 case Intrinsic::loongarch_lasx_xvsat_h:
2001 case Intrinsic::loongarch_lasx_xvsat_hu:
2002 case Intrinsic::loongarch_lasx_xvrotri_h:
2003 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2004 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2005 case Intrinsic::loongarch_lasx_xvsrlri_h:
2006 case Intrinsic::loongarch_lasx_xvsrari_h:
2007 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2008 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2009 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2010 case Intrinsic::loongarch_lsx_vsrani_b_h:
2011 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2012 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2013 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2014 case Intrinsic::loongarch_lsx_vssrani_b_h:
2015 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2016 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2017 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2018 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2019 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2020 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2021 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2022 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2023 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2024 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2025 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2026 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2027 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2028 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2029 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2030 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2031 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2032 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2033 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2034 case Intrinsic::loongarch_lsx_vsat_w:
2035 case Intrinsic::loongarch_lsx_vsat_wu:
2036 case Intrinsic::loongarch_lsx_vrotri_w:
2037 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2038 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2039 case Intrinsic::loongarch_lsx_vsrlri_w:
2040 case Intrinsic::loongarch_lsx_vsrari_w:
2041 case Intrinsic::loongarch_lsx_vslei_bu:
2042 case Intrinsic::loongarch_lsx_vslei_hu:
2043 case Intrinsic::loongarch_lsx_vslei_wu:
2044 case Intrinsic::loongarch_lsx_vslei_du:
2045 case Intrinsic::loongarch_lsx_vslti_bu:
2046 case Intrinsic::loongarch_lsx_vslti_hu:
2047 case Intrinsic::loongarch_lsx_vslti_wu:
2048 case Intrinsic::loongarch_lsx_vslti_du:
2049 case Intrinsic::loongarch_lsx_vbsll_v:
2050 case Intrinsic::loongarch_lsx_vbsrl_v:
2051 case Intrinsic::loongarch_lasx_xvsat_w:
2052 case Intrinsic::loongarch_lasx_xvsat_wu:
2053 case Intrinsic::loongarch_lasx_xvrotri_w:
2054 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2055 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2056 case Intrinsic::loongarch_lasx_xvsrlri_w:
2057 case Intrinsic::loongarch_lasx_xvsrari_w:
2058 case Intrinsic::loongarch_lasx_xvslei_bu:
2059 case Intrinsic::loongarch_lasx_xvslei_hu:
2060 case Intrinsic::loongarch_lasx_xvslei_wu:
2061 case Intrinsic::loongarch_lasx_xvslei_du:
2062 case Intrinsic::loongarch_lasx_xvslti_bu:
2063 case Intrinsic::loongarch_lasx_xvslti_hu:
2064 case Intrinsic::loongarch_lasx_xvslti_wu:
2065 case Intrinsic::loongarch_lasx_xvslti_du:
2066 case Intrinsic::loongarch_lasx_xvbsll_v:
2067 case Intrinsic::loongarch_lasx_xvbsrl_v:
2068 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2069 case Intrinsic::loongarch_lsx_vseqi_b:
2070 case Intrinsic::loongarch_lsx_vseqi_h:
2071 case Intrinsic::loongarch_lsx_vseqi_w:
2072 case Intrinsic::loongarch_lsx_vseqi_d:
2073 case Intrinsic::loongarch_lsx_vslei_b:
2074 case Intrinsic::loongarch_lsx_vslei_h:
2075 case Intrinsic::loongarch_lsx_vslei_w:
2076 case Intrinsic::loongarch_lsx_vslei_d:
2077 case Intrinsic::loongarch_lsx_vslti_b:
2078 case Intrinsic::loongarch_lsx_vslti_h:
2079 case Intrinsic::loongarch_lsx_vslti_w:
2080 case Intrinsic::loongarch_lsx_vslti_d:
2081 case Intrinsic::loongarch_lasx_xvseqi_b:
2082 case Intrinsic::loongarch_lasx_xvseqi_h:
2083 case Intrinsic::loongarch_lasx_xvseqi_w:
2084 case Intrinsic::loongarch_lasx_xvseqi_d:
2085 case Intrinsic::loongarch_lasx_xvslei_b:
2086 case Intrinsic::loongarch_lasx_xvslei_h:
2087 case Intrinsic::loongarch_lasx_xvslei_w:
2088 case Intrinsic::loongarch_lasx_xvslei_d:
2089 case Intrinsic::loongarch_lasx_xvslti_b:
2090 case Intrinsic::loongarch_lasx_xvslti_h:
2091 case Intrinsic::loongarch_lasx_xvslti_w:
2092 case Intrinsic::loongarch_lasx_xvslti_d:
2093 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2094 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2095 case Intrinsic::loongarch_lsx_vsrani_h_w:
2096 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2097 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2098 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2099 case Intrinsic::loongarch_lsx_vssrani_h_w:
2100 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2101 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2102 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2103 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2104 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2105 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2106 case Intrinsic::loongarch_lsx_vfrstpi_b:
2107 case Intrinsic::loongarch_lsx_vfrstpi_h:
2108 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2109 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2110 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2111 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2112 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2113 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2114 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2115 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2116 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2117 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2118 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2119 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2120 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2121 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2122 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2123 case Intrinsic::loongarch_lsx_vsat_d:
2124 case Intrinsic::loongarch_lsx_vsat_du:
2125 case Intrinsic::loongarch_lsx_vrotri_d:
2126 case Intrinsic::loongarch_lsx_vsrlri_d:
2127 case Intrinsic::loongarch_lsx_vsrari_d:
2128 case Intrinsic::loongarch_lasx_xvsat_d:
2129 case Intrinsic::loongarch_lasx_xvsat_du:
2130 case Intrinsic::loongarch_lasx_xvrotri_d:
2131 case Intrinsic::loongarch_lasx_xvsrlri_d:
2132 case Intrinsic::loongarch_lasx_xvsrari_d:
2133 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2134 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2135 case Intrinsic::loongarch_lsx_vsrani_w_d:
2136 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2137 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2138 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2139 case Intrinsic::loongarch_lsx_vssrani_w_d:
2140 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2141 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2142 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2143 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2144 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2145 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2146 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2147 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2148 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2149 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2150 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2151 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2152 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2153 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2154 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2155 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2156 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2157 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2158 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2159 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2160 case Intrinsic::loongarch_lsx_vsrani_d_q:
2161 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2162 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2163 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2164 case Intrinsic::loongarch_lsx_vssrani_d_q:
2165 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2166 case Intrinsic::loongarch_lsx_vssrani_du_q:
2167 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2168 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2169 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2170 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2171 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2172 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2173 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2174 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2175 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2176 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2177 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2178 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2179 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2180 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2181 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2182 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2183 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2184 case Intrinsic::loongarch_lsx_vnori_b:
2185 case Intrinsic::loongarch_lsx_vshuf4i_b:
2186 case Intrinsic::loongarch_lsx_vshuf4i_h:
2187 case Intrinsic::loongarch_lsx_vshuf4i_w:
2188 case Intrinsic::loongarch_lasx_xvnori_b:
2189 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2190 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2191 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2192 case Intrinsic::loongarch_lasx_xvpermi_d:
2193 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2194 case Intrinsic::loongarch_lsx_vshuf4i_d:
2195 case Intrinsic::loongarch_lsx_vpermi_w:
2196 case Intrinsic::loongarch_lsx_vbitseli_b:
2197 case Intrinsic::loongarch_lsx_vextrins_b:
2198 case Intrinsic::loongarch_lsx_vextrins_h:
2199 case Intrinsic::loongarch_lsx_vextrins_w:
2200 case Intrinsic::loongarch_lsx_vextrins_d:
2201 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2202 case Intrinsic::loongarch_lasx_xvpermi_w:
2203 case Intrinsic::loongarch_lasx_xvpermi_q:
2204 case Intrinsic::loongarch_lasx_xvbitseli_b:
2205 case Intrinsic::loongarch_lasx_xvextrins_b:
2206 case Intrinsic::loongarch_lasx_xvextrins_h:
2207 case Intrinsic::loongarch_lasx_xvextrins_w:
2208 case Intrinsic::loongarch_lasx_xvextrins_d:
2209 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2210 case Intrinsic::loongarch_lsx_vrepli_b:
2211 case Intrinsic::loongarch_lsx_vrepli_h:
2212 case Intrinsic::loongarch_lsx_vrepli_w:
2213 case Intrinsic::loongarch_lsx_vrepli_d:
2214 case Intrinsic::loongarch_lasx_xvrepli_b:
2215 case Intrinsic::loongarch_lasx_xvrepli_h:
2216 case Intrinsic::loongarch_lasx_xvrepli_w:
2217 case Intrinsic::loongarch_lasx_xvrepli_d:
2218 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2219 case Intrinsic::loongarch_lsx_vldi:
2220 case Intrinsic::loongarch_lasx_xvldi:
2221 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2222 }
2223}
2224
2225// Helper function that emits error message for intrinsics with chain and return
2226// merge values of a UNDEF and the chain.
2228 StringRef ErrorMsg,
2229 SelectionDAG &DAG) {
2230 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2231 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2232 SDLoc(Op));
2233}
2234
2235SDValue
2236LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2237 SelectionDAG &DAG) const {
2238 SDLoc DL(Op);
2239 MVT GRLenVT = Subtarget.getGRLenVT();
2240 EVT VT = Op.getValueType();
2241 SDValue Chain = Op.getOperand(0);
2242 const StringRef ErrorMsgOOR = "argument out of range";
2243 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2244 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2245
2246 switch (Op.getConstantOperandVal(1)) {
2247 default:
2248 return Op;
2249 case Intrinsic::loongarch_crc_w_b_w:
2250 case Intrinsic::loongarch_crc_w_h_w:
2251 case Intrinsic::loongarch_crc_w_w_w:
2252 case Intrinsic::loongarch_crc_w_d_w:
2253 case Intrinsic::loongarch_crcc_w_b_w:
2254 case Intrinsic::loongarch_crcc_w_h_w:
2255 case Intrinsic::loongarch_crcc_w_w_w:
2256 case Intrinsic::loongarch_crcc_w_d_w:
2257 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2258 case Intrinsic::loongarch_csrrd_w:
2259 case Intrinsic::loongarch_csrrd_d: {
2260 unsigned Imm = Op.getConstantOperandVal(2);
2261 return !isUInt<14>(Imm)
2262 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2263 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2264 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2265 }
2266 case Intrinsic::loongarch_csrwr_w:
2267 case Intrinsic::loongarch_csrwr_d: {
2268 unsigned Imm = Op.getConstantOperandVal(3);
2269 return !isUInt<14>(Imm)
2270 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2271 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2272 {Chain, Op.getOperand(2),
2273 DAG.getConstant(Imm, DL, GRLenVT)});
2274 }
2275 case Intrinsic::loongarch_csrxchg_w:
2276 case Intrinsic::loongarch_csrxchg_d: {
2277 unsigned Imm = Op.getConstantOperandVal(4);
2278 return !isUInt<14>(Imm)
2279 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2280 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2281 {Chain, Op.getOperand(2), Op.getOperand(3),
2282 DAG.getConstant(Imm, DL, GRLenVT)});
2283 }
2284 case Intrinsic::loongarch_iocsrrd_d: {
2285 return DAG.getNode(
2286 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2287 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2288 }
2289#define IOCSRRD_CASE(NAME, NODE) \
2290 case Intrinsic::loongarch_##NAME: { \
2291 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2292 {Chain, Op.getOperand(2)}); \
2293 }
2294 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2295 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2296 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2297#undef IOCSRRD_CASE
2298 case Intrinsic::loongarch_cpucfg: {
2299 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2300 {Chain, Op.getOperand(2)});
2301 }
2302 case Intrinsic::loongarch_lddir_d: {
2303 unsigned Imm = Op.getConstantOperandVal(3);
2304 return !isUInt<8>(Imm)
2305 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2306 : Op;
2307 }
2308 case Intrinsic::loongarch_movfcsr2gr: {
2309 if (!Subtarget.hasBasicF())
2310 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2311 unsigned Imm = Op.getConstantOperandVal(2);
2312 return !isUInt<2>(Imm)
2313 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2314 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2315 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2316 }
2317 case Intrinsic::loongarch_lsx_vld:
2318 case Intrinsic::loongarch_lsx_vldrepl_b:
2319 case Intrinsic::loongarch_lasx_xvld:
2320 case Intrinsic::loongarch_lasx_xvldrepl_b:
2321 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2322 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2323 : SDValue();
2324 case Intrinsic::loongarch_lsx_vldrepl_h:
2325 case Intrinsic::loongarch_lasx_xvldrepl_h:
2326 return !isShiftedInt<11, 1>(
2327 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2329 Op, "argument out of range or not a multiple of 2", DAG)
2330 : SDValue();
2331 case Intrinsic::loongarch_lsx_vldrepl_w:
2332 case Intrinsic::loongarch_lasx_xvldrepl_w:
2333 return !isShiftedInt<10, 2>(
2334 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2336 Op, "argument out of range or not a multiple of 4", DAG)
2337 : SDValue();
2338 case Intrinsic::loongarch_lsx_vldrepl_d:
2339 case Intrinsic::loongarch_lasx_xvldrepl_d:
2340 return !isShiftedInt<9, 3>(
2341 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2343 Op, "argument out of range or not a multiple of 8", DAG)
2344 : SDValue();
2345 }
2346}
2347
2348// Helper function that emits error message for intrinsics with void return
2349// value and return the chain.
2351 SelectionDAG &DAG) {
2352
2353 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2354 return Op.getOperand(0);
2355}
2356
2357SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2358 SelectionDAG &DAG) const {
2359 SDLoc DL(Op);
2360 MVT GRLenVT = Subtarget.getGRLenVT();
2361 SDValue Chain = Op.getOperand(0);
2362 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2363 SDValue Op2 = Op.getOperand(2);
2364 const StringRef ErrorMsgOOR = "argument out of range";
2365 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2366 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2367 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2368
2369 switch (IntrinsicEnum) {
2370 default:
2371 // TODO: Add more Intrinsics.
2372 return SDValue();
2373 case Intrinsic::loongarch_cacop_d:
2374 case Intrinsic::loongarch_cacop_w: {
2375 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2376 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2377 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2378 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2379 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2380 unsigned Imm1 = Op2->getAsZExtVal();
2381 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2382 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2383 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2384 return Op;
2385 }
2386 case Intrinsic::loongarch_dbar: {
2387 unsigned Imm = Op2->getAsZExtVal();
2388 return !isUInt<15>(Imm)
2389 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2390 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2391 DAG.getConstant(Imm, DL, GRLenVT));
2392 }
2393 case Intrinsic::loongarch_ibar: {
2394 unsigned Imm = Op2->getAsZExtVal();
2395 return !isUInt<15>(Imm)
2396 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2397 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2398 DAG.getConstant(Imm, DL, GRLenVT));
2399 }
2400 case Intrinsic::loongarch_break: {
2401 unsigned Imm = Op2->getAsZExtVal();
2402 return !isUInt<15>(Imm)
2403 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2404 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2405 DAG.getConstant(Imm, DL, GRLenVT));
2406 }
2407 case Intrinsic::loongarch_movgr2fcsr: {
2408 if (!Subtarget.hasBasicF())
2409 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2410 unsigned Imm = Op2->getAsZExtVal();
2411 return !isUInt<2>(Imm)
2412 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2413 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2414 DAG.getConstant(Imm, DL, GRLenVT),
2415 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2416 Op.getOperand(3)));
2417 }
2418 case Intrinsic::loongarch_syscall: {
2419 unsigned Imm = Op2->getAsZExtVal();
2420 return !isUInt<15>(Imm)
2421 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2422 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2423 DAG.getConstant(Imm, DL, GRLenVT));
2424 }
2425#define IOCSRWR_CASE(NAME, NODE) \
2426 case Intrinsic::loongarch_##NAME: { \
2427 SDValue Op3 = Op.getOperand(3); \
2428 return Subtarget.is64Bit() \
2429 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2430 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2432 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2433 Op3); \
2434 }
2435 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2436 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2437 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2438#undef IOCSRWR_CASE
2439 case Intrinsic::loongarch_iocsrwr_d: {
2440 return !Subtarget.is64Bit()
2441 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2442 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2443 Op2,
2444 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2445 Op.getOperand(3)));
2446 }
2447#define ASRT_LE_GT_CASE(NAME) \
2448 case Intrinsic::loongarch_##NAME: { \
2449 return !Subtarget.is64Bit() \
2450 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2451 : Op; \
2452 }
2453 ASRT_LE_GT_CASE(asrtle_d)
2454 ASRT_LE_GT_CASE(asrtgt_d)
2455#undef ASRT_LE_GT_CASE
2456 case Intrinsic::loongarch_ldpte_d: {
2457 unsigned Imm = Op.getConstantOperandVal(3);
2458 return !Subtarget.is64Bit()
2459 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2460 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2461 : Op;
2462 }
2463 case Intrinsic::loongarch_lsx_vst:
2464 case Intrinsic::loongarch_lasx_xvst:
2465 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2466 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2467 : SDValue();
2468 case Intrinsic::loongarch_lasx_xvstelm_b:
2469 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2470 !isUInt<5>(Op.getConstantOperandVal(5)))
2471 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2472 : SDValue();
2473 case Intrinsic::loongarch_lsx_vstelm_b:
2474 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2475 !isUInt<4>(Op.getConstantOperandVal(5)))
2476 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2477 : SDValue();
2478 case Intrinsic::loongarch_lasx_xvstelm_h:
2479 return (!isShiftedInt<8, 1>(
2480 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2481 !isUInt<4>(Op.getConstantOperandVal(5)))
2483 Op, "argument out of range or not a multiple of 2", DAG)
2484 : SDValue();
2485 case Intrinsic::loongarch_lsx_vstelm_h:
2486 return (!isShiftedInt<8, 1>(
2487 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2488 !isUInt<3>(Op.getConstantOperandVal(5)))
2490 Op, "argument out of range or not a multiple of 2", DAG)
2491 : SDValue();
2492 case Intrinsic::loongarch_lasx_xvstelm_w:
2493 return (!isShiftedInt<8, 2>(
2494 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2495 !isUInt<3>(Op.getConstantOperandVal(5)))
2497 Op, "argument out of range or not a multiple of 4", DAG)
2498 : SDValue();
2499 case Intrinsic::loongarch_lsx_vstelm_w:
2500 return (!isShiftedInt<8, 2>(
2501 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2502 !isUInt<2>(Op.getConstantOperandVal(5)))
2504 Op, "argument out of range or not a multiple of 4", DAG)
2505 : SDValue();
2506 case Intrinsic::loongarch_lasx_xvstelm_d:
2507 return (!isShiftedInt<8, 3>(
2508 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2509 !isUInt<2>(Op.getConstantOperandVal(5)))
2511 Op, "argument out of range or not a multiple of 8", DAG)
2512 : SDValue();
2513 case Intrinsic::loongarch_lsx_vstelm_d:
2514 return (!isShiftedInt<8, 3>(
2515 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2516 !isUInt<1>(Op.getConstantOperandVal(5)))
2518 Op, "argument out of range or not a multiple of 8", DAG)
2519 : SDValue();
2520 }
2521}
2522
2523SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2524 SelectionDAG &DAG) const {
2525 SDLoc DL(Op);
2526 SDValue Lo = Op.getOperand(0);
2527 SDValue Hi = Op.getOperand(1);
2528 SDValue Shamt = Op.getOperand(2);
2529 EVT VT = Lo.getValueType();
2530
2531 // if Shamt-GRLen < 0: // Shamt < GRLen
2532 // Lo = Lo << Shamt
2533 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2534 // else:
2535 // Lo = 0
2536 // Hi = Lo << (Shamt-GRLen)
2537
2538 SDValue Zero = DAG.getConstant(0, DL, VT);
2539 SDValue One = DAG.getConstant(1, DL, VT);
2540 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2541 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2542 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2543 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2544
2545 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2546 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2547 SDValue ShiftRightLo =
2548 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2549 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2550 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2551 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2552
2553 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2554
2555 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2556 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2557
2558 SDValue Parts[2] = {Lo, Hi};
2559 return DAG.getMergeValues(Parts, DL);
2560}
2561
2562SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2563 SelectionDAG &DAG,
2564 bool IsSRA) const {
2565 SDLoc DL(Op);
2566 SDValue Lo = Op.getOperand(0);
2567 SDValue Hi = Op.getOperand(1);
2568 SDValue Shamt = Op.getOperand(2);
2569 EVT VT = Lo.getValueType();
2570
2571 // SRA expansion:
2572 // if Shamt-GRLen < 0: // Shamt < GRLen
2573 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2574 // Hi = Hi >>s Shamt
2575 // else:
2576 // Lo = Hi >>s (Shamt-GRLen);
2577 // Hi = Hi >>s (GRLen-1)
2578 //
2579 // SRL expansion:
2580 // if Shamt-GRLen < 0: // Shamt < GRLen
2581 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2582 // Hi = Hi >>u Shamt
2583 // else:
2584 // Lo = Hi >>u (Shamt-GRLen);
2585 // Hi = 0;
2586
2587 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2588
2589 SDValue Zero = DAG.getConstant(0, DL, VT);
2590 SDValue One = DAG.getConstant(1, DL, VT);
2591 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2592 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2593 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2594 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2595
2596 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2597 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2598 SDValue ShiftLeftHi =
2599 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2600 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2601 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2602 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2603 SDValue HiFalse =
2604 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2605
2606 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2607
2608 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2609 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2610
2611 SDValue Parts[2] = {Lo, Hi};
2612 return DAG.getMergeValues(Parts, DL);
2613}
2614
2615// Returns the opcode of the target-specific SDNode that implements the 32-bit
2616// form of the given Opcode.
2618 switch (Opcode) {
2619 default:
2620 llvm_unreachable("Unexpected opcode");
2621 case ISD::UDIV:
2622 return LoongArchISD::DIV_WU;
2623 case ISD::UREM:
2624 return LoongArchISD::MOD_WU;
2625 case ISD::SHL:
2626 return LoongArchISD::SLL_W;
2627 case ISD::SRA:
2628 return LoongArchISD::SRA_W;
2629 case ISD::SRL:
2630 return LoongArchISD::SRL_W;
2631 case ISD::ROTL:
2632 case ISD::ROTR:
2633 return LoongArchISD::ROTR_W;
2634 case ISD::CTTZ:
2635 return LoongArchISD::CTZ_W;
2636 case ISD::CTLZ:
2637 return LoongArchISD::CLZ_W;
2638 }
2639}
2640
2641// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2642// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2643// otherwise be promoted to i64, making it difficult to select the
2644// SLL_W/.../*W later one because the fact the operation was originally of
2645// type i8/i16/i32 is lost.
2647 unsigned ExtOpc = ISD::ANY_EXTEND) {
2648 SDLoc DL(N);
2649 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2650 SDValue NewOp0, NewRes;
2651
2652 switch (NumOp) {
2653 default:
2654 llvm_unreachable("Unexpected NumOp");
2655 case 1: {
2656 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2657 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2658 break;
2659 }
2660 case 2: {
2661 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2662 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2663 if (N->getOpcode() == ISD::ROTL) {
2664 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2665 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2666 }
2667 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2668 break;
2669 }
2670 // TODO:Handle more NumOp.
2671 }
2672
2673 // ReplaceNodeResults requires we maintain the same type for the return
2674 // value.
2675 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2676}
2677
2678// Converts the given 32-bit operation to a i64 operation with signed extension
2679// semantic to reduce the signed extension instructions.
2681 SDLoc DL(N);
2682 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2683 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2684 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2685 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2686 DAG.getValueType(MVT::i32));
2687 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2688}
2689
2690// Helper function that emits error message for intrinsics with/without chain
2691// and return a UNDEF or and the chain as the results.
2694 StringRef ErrorMsg, bool WithChain = true) {
2695 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2696 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2697 if (!WithChain)
2698 return;
2699 Results.push_back(N->getOperand(0));
2700}
2701
2702template <unsigned N>
2703static void
2705 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2706 unsigned ResOp) {
2707 const StringRef ErrorMsgOOR = "argument out of range";
2708 unsigned Imm = Node->getConstantOperandVal(2);
2709 if (!isUInt<N>(Imm)) {
2711 /*WithChain=*/false);
2712 return;
2713 }
2714 SDLoc DL(Node);
2715 SDValue Vec = Node->getOperand(1);
2716
2717 SDValue PickElt =
2718 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2719 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2721 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2722 PickElt.getValue(0)));
2723}
2724
2727 SelectionDAG &DAG,
2728 const LoongArchSubtarget &Subtarget,
2729 unsigned ResOp) {
2730 SDLoc DL(N);
2731 SDValue Vec = N->getOperand(1);
2732
2733 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2734 Results.push_back(
2735 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2736}
2737
2738static void
2740 SelectionDAG &DAG,
2741 const LoongArchSubtarget &Subtarget) {
2742 switch (N->getConstantOperandVal(0)) {
2743 default:
2744 llvm_unreachable("Unexpected Intrinsic.");
2745 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2746 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2748 break;
2749 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2750 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2751 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2753 break;
2754 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2755 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2757 break;
2758 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2759 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2761 break;
2762 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2763 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2764 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2766 break;
2767 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2768 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2770 break;
2771 case Intrinsic::loongarch_lsx_bz_b:
2772 case Intrinsic::loongarch_lsx_bz_h:
2773 case Intrinsic::loongarch_lsx_bz_w:
2774 case Intrinsic::loongarch_lsx_bz_d:
2775 case Intrinsic::loongarch_lasx_xbz_b:
2776 case Intrinsic::loongarch_lasx_xbz_h:
2777 case Intrinsic::loongarch_lasx_xbz_w:
2778 case Intrinsic::loongarch_lasx_xbz_d:
2779 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2781 break;
2782 case Intrinsic::loongarch_lsx_bz_v:
2783 case Intrinsic::loongarch_lasx_xbz_v:
2784 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2786 break;
2787 case Intrinsic::loongarch_lsx_bnz_b:
2788 case Intrinsic::loongarch_lsx_bnz_h:
2789 case Intrinsic::loongarch_lsx_bnz_w:
2790 case Intrinsic::loongarch_lsx_bnz_d:
2791 case Intrinsic::loongarch_lasx_xbnz_b:
2792 case Intrinsic::loongarch_lasx_xbnz_h:
2793 case Intrinsic::loongarch_lasx_xbnz_w:
2794 case Intrinsic::loongarch_lasx_xbnz_d:
2795 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2797 break;
2798 case Intrinsic::loongarch_lsx_bnz_v:
2799 case Intrinsic::loongarch_lasx_xbnz_v:
2800 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2802 break;
2803 }
2804}
2805
2808 SDLoc DL(N);
2809 EVT VT = N->getValueType(0);
2810 switch (N->getOpcode()) {
2811 default:
2812 llvm_unreachable("Don't know how to legalize this operation");
2813 case ISD::ADD:
2814 case ISD::SUB:
2815 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2816 "Unexpected custom legalisation");
2817 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2818 break;
2819 case ISD::UDIV:
2820 case ISD::UREM:
2821 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2822 "Unexpected custom legalisation");
2823 Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));
2824 break;
2825 case ISD::SHL:
2826 case ISD::SRA:
2827 case ISD::SRL:
2828 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2829 "Unexpected custom legalisation");
2830 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2831 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2832 break;
2833 }
2834 break;
2835 case ISD::ROTL:
2836 case ISD::ROTR:
2837 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2838 "Unexpected custom legalisation");
2839 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2840 break;
2841 case ISD::FP_TO_SINT: {
2842 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2843 "Unexpected custom legalisation");
2844 SDValue Src = N->getOperand(0);
2845 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2846 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2848 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2849 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2850 return;
2851 }
2852 // If the FP type needs to be softened, emit a library call using the 'si'
2853 // version. If we left it to default legalization we'd end up with 'di'.
2854 RTLIB::Libcall LC;
2855 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2856 MakeLibCallOptions CallOptions;
2857 EVT OpVT = Src.getValueType();
2858 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2859 SDValue Chain = SDValue();
2860 SDValue Result;
2861 std::tie(Result, Chain) =
2862 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2863 Results.push_back(Result);
2864 break;
2865 }
2866 case ISD::BITCAST: {
2867 SDValue Src = N->getOperand(0);
2868 EVT SrcVT = Src.getValueType();
2869 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2870 Subtarget.hasBasicF()) {
2871 SDValue Dst =
2872 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2873 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2874 }
2875 break;
2876 }
2877 case ISD::FP_TO_UINT: {
2878 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2879 "Unexpected custom legalisation");
2880 auto &TLI = DAG.getTargetLoweringInfo();
2881 SDValue Tmp1, Tmp2;
2882 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2883 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2884 break;
2885 }
2886 case ISD::BSWAP: {
2887 SDValue Src = N->getOperand(0);
2888 assert((VT == MVT::i16 || VT == MVT::i32) &&
2889 "Unexpected custom legalization");
2890 MVT GRLenVT = Subtarget.getGRLenVT();
2891 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2892 SDValue Tmp;
2893 switch (VT.getSizeInBits()) {
2894 default:
2895 llvm_unreachable("Unexpected operand width");
2896 case 16:
2897 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2898 break;
2899 case 32:
2900 // Only LA64 will get to here due to the size mismatch between VT and
2901 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2902 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2903 break;
2904 }
2905 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2906 break;
2907 }
2908 case ISD::BITREVERSE: {
2909 SDValue Src = N->getOperand(0);
2910 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
2911 "Unexpected custom legalization");
2912 MVT GRLenVT = Subtarget.getGRLenVT();
2913 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2914 SDValue Tmp;
2915 switch (VT.getSizeInBits()) {
2916 default:
2917 llvm_unreachable("Unexpected operand width");
2918 case 8:
2919 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
2920 break;
2921 case 32:
2922 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
2923 break;
2924 }
2925 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2926 break;
2927 }
2928 case ISD::CTLZ:
2929 case ISD::CTTZ: {
2930 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2931 "Unexpected custom legalisation");
2932 Results.push_back(customLegalizeToWOp(N, DAG, 1));
2933 break;
2934 }
2936 SDValue Chain = N->getOperand(0);
2937 SDValue Op2 = N->getOperand(2);
2938 MVT GRLenVT = Subtarget.getGRLenVT();
2939 const StringRef ErrorMsgOOR = "argument out of range";
2940 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2941 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2942
2943 switch (N->getConstantOperandVal(1)) {
2944 default:
2945 llvm_unreachable("Unexpected Intrinsic.");
2946 case Intrinsic::loongarch_movfcsr2gr: {
2947 if (!Subtarget.hasBasicF()) {
2948 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
2949 return;
2950 }
2951 unsigned Imm = Op2->getAsZExtVal();
2952 if (!isUInt<2>(Imm)) {
2953 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2954 return;
2955 }
2956 SDValue MOVFCSR2GRResults = DAG.getNode(
2957 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
2958 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2959 Results.push_back(
2960 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
2961 Results.push_back(MOVFCSR2GRResults.getValue(1));
2962 break;
2963 }
2964#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
2965 case Intrinsic::loongarch_##NAME: { \
2966 SDValue NODE = DAG.getNode( \
2967 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2968 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2969 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2970 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2971 Results.push_back(NODE.getValue(1)); \
2972 break; \
2973 }
2974 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
2975 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
2976 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
2977 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
2978 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
2979 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
2980#undef CRC_CASE_EXT_BINARYOP
2981
2982#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
2983 case Intrinsic::loongarch_##NAME: { \
2984 SDValue NODE = DAG.getNode( \
2985 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2986 {Chain, Op2, \
2987 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2988 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2989 Results.push_back(NODE.getValue(1)); \
2990 break; \
2991 }
2992 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2993 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2994#undef CRC_CASE_EXT_UNARYOP
2995#define CSR_CASE(ID) \
2996 case Intrinsic::loongarch_##ID: { \
2997 if (!Subtarget.is64Bit()) \
2998 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
2999 break; \
3000 }
3001 CSR_CASE(csrrd_d);
3002 CSR_CASE(csrwr_d);
3003 CSR_CASE(csrxchg_d);
3004 CSR_CASE(iocsrrd_d);
3005#undef CSR_CASE
3006 case Intrinsic::loongarch_csrrd_w: {
3007 unsigned Imm = Op2->getAsZExtVal();
3008 if (!isUInt<14>(Imm)) {
3009 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3010 return;
3011 }
3012 SDValue CSRRDResults =
3013 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3014 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3015 Results.push_back(
3016 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3017 Results.push_back(CSRRDResults.getValue(1));
3018 break;
3019 }
3020 case Intrinsic::loongarch_csrwr_w: {
3021 unsigned Imm = N->getConstantOperandVal(3);
3022 if (!isUInt<14>(Imm)) {
3023 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3024 return;
3025 }
3026 SDValue CSRWRResults =
3027 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3028 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3029 DAG.getConstant(Imm, DL, GRLenVT)});
3030 Results.push_back(
3031 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3032 Results.push_back(CSRWRResults.getValue(1));
3033 break;
3034 }
3035 case Intrinsic::loongarch_csrxchg_w: {
3036 unsigned Imm = N->getConstantOperandVal(4);
3037 if (!isUInt<14>(Imm)) {
3038 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3039 return;
3040 }
3041 SDValue CSRXCHGResults = DAG.getNode(
3042 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3043 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3044 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3045 DAG.getConstant(Imm, DL, GRLenVT)});
3046 Results.push_back(
3047 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3048 Results.push_back(CSRXCHGResults.getValue(1));
3049 break;
3050 }
3051#define IOCSRRD_CASE(NAME, NODE) \
3052 case Intrinsic::loongarch_##NAME: { \
3053 SDValue IOCSRRDResults = \
3054 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3055 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3056 Results.push_back( \
3057 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3058 Results.push_back(IOCSRRDResults.getValue(1)); \
3059 break; \
3060 }
3061 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3062 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3063 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3064#undef IOCSRRD_CASE
3065 case Intrinsic::loongarch_cpucfg: {
3066 SDValue CPUCFGResults =
3067 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3068 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3069 Results.push_back(
3070 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3071 Results.push_back(CPUCFGResults.getValue(1));
3072 break;
3073 }
3074 case Intrinsic::loongarch_lddir_d: {
3075 if (!Subtarget.is64Bit()) {
3076 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3077 return;
3078 }
3079 break;
3080 }
3081 }
3082 break;
3083 }
3084 case ISD::READ_REGISTER: {
3085 if (Subtarget.is64Bit())
3086 DAG.getContext()->emitError(
3087 "On LA64, only 64-bit registers can be read.");
3088 else
3089 DAG.getContext()->emitError(
3090 "On LA32, only 32-bit registers can be read.");
3091 Results.push_back(DAG.getUNDEF(VT));
3092 Results.push_back(N->getOperand(0));
3093 break;
3094 }
3096 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3097 break;
3098 }
3099 }
3100}
3101
3104 const LoongArchSubtarget &Subtarget) {
3105 if (DCI.isBeforeLegalizeOps())
3106 return SDValue();
3107
3108 SDValue FirstOperand = N->getOperand(0);
3109 SDValue SecondOperand = N->getOperand(1);
3110 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3111 EVT ValTy = N->getValueType(0);
3112 SDLoc DL(N);
3113 uint64_t lsb, msb;
3114 unsigned SMIdx, SMLen;
3115 ConstantSDNode *CN;
3116 SDValue NewOperand;
3117 MVT GRLenVT = Subtarget.getGRLenVT();
3118
3119 // Op's second operand must be a shifted mask.
3120 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3121 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3122 return SDValue();
3123
3124 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3125 // Pattern match BSTRPICK.
3126 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3127 // => BSTRPICK $dst, $src, msb, lsb
3128 // where msb = lsb + len - 1
3129
3130 // The second operand of the shift must be an immediate.
3131 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3132 return SDValue();
3133
3134 lsb = CN->getZExtValue();
3135
3136 // Return if the shifted mask does not start at bit 0 or the sum of its
3137 // length and lsb exceeds the word's size.
3138 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3139 return SDValue();
3140
3141 NewOperand = FirstOperand.getOperand(0);
3142 } else {
3143 // Pattern match BSTRPICK.
3144 // $dst = and $src, (2**len- 1) , if len > 12
3145 // => BSTRPICK $dst, $src, msb, lsb
3146 // where lsb = 0 and msb = len - 1
3147
3148 // If the mask is <= 0xfff, andi can be used instead.
3149 if (CN->getZExtValue() <= 0xfff)
3150 return SDValue();
3151
3152 // Return if the MSB exceeds.
3153 if (SMIdx + SMLen > ValTy.getSizeInBits())
3154 return SDValue();
3155
3156 if (SMIdx > 0) {
3157 // Omit if the constant has more than 2 uses. This a conservative
3158 // decision. Whether it is a win depends on the HW microarchitecture.
3159 // However it should always be better for 1 and 2 uses.
3160 if (CN->use_size() > 2)
3161 return SDValue();
3162 // Return if the constant can be composed by a single LU12I.W.
3163 if ((CN->getZExtValue() & 0xfff) == 0)
3164 return SDValue();
3165 // Return if the constand can be composed by a single ADDI with
3166 // the zero register.
3167 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3168 return SDValue();
3169 }
3170
3171 lsb = SMIdx;
3172 NewOperand = FirstOperand;
3173 }
3174
3175 msb = lsb + SMLen - 1;
3176 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3177 DAG.getConstant(msb, DL, GRLenVT),
3178 DAG.getConstant(lsb, DL, GRLenVT));
3179 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3180 return NR0;
3181 // Try to optimize to
3182 // bstrpick $Rd, $Rs, msb, lsb
3183 // slli $Rd, $Rd, lsb
3184 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3185 DAG.getConstant(lsb, DL, GRLenVT));
3186}
3187
3190 const LoongArchSubtarget &Subtarget) {
3191 if (DCI.isBeforeLegalizeOps())
3192 return SDValue();
3193
3194 // $dst = srl (and $src, Mask), Shamt
3195 // =>
3196 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3197 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3198 //
3199
3200 SDValue FirstOperand = N->getOperand(0);
3201 ConstantSDNode *CN;
3202 EVT ValTy = N->getValueType(0);
3203 SDLoc DL(N);
3204 MVT GRLenVT = Subtarget.getGRLenVT();
3205 unsigned MaskIdx, MaskLen;
3206 uint64_t Shamt;
3207
3208 // The first operand must be an AND and the second operand of the AND must be
3209 // a shifted mask.
3210 if (FirstOperand.getOpcode() != ISD::AND ||
3211 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3212 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3213 return SDValue();
3214
3215 // The second operand (shift amount) must be an immediate.
3216 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3217 return SDValue();
3218
3219 Shamt = CN->getZExtValue();
3220 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3221 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3222 FirstOperand->getOperand(0),
3223 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3224 DAG.getConstant(Shamt, DL, GRLenVT));
3225
3226 return SDValue();
3227}
3228
3231 const LoongArchSubtarget &Subtarget) {
3232 MVT GRLenVT = Subtarget.getGRLenVT();
3233 EVT ValTy = N->getValueType(0);
3234 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3235 ConstantSDNode *CN0, *CN1;
3236 SDLoc DL(N);
3237 unsigned ValBits = ValTy.getSizeInBits();
3238 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3239 unsigned Shamt;
3240 bool SwapAndRetried = false;
3241
3242 if (DCI.isBeforeLegalizeOps())
3243 return SDValue();
3244
3245 if (ValBits != 32 && ValBits != 64)
3246 return SDValue();
3247
3248Retry:
3249 // 1st pattern to match BSTRINS:
3250 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3251 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3252 // =>
3253 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3254 if (N0.getOpcode() == ISD::AND &&
3255 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3256 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3257 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3258 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3259 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3260 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3261 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3262 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3263 (MaskIdx0 + MaskLen0 <= ValBits)) {
3264 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3265 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3266 N1.getOperand(0).getOperand(0),
3267 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3268 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3269 }
3270
3271 // 2nd pattern to match BSTRINS:
3272 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3273 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3274 // =>
3275 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3276 if (N0.getOpcode() == ISD::AND &&
3277 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3278 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3279 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3280 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3281 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3282 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3283 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3284 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3285 (MaskIdx0 + MaskLen0 <= ValBits)) {
3286 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3287 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3288 N1.getOperand(0).getOperand(0),
3289 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3290 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3291 }
3292
3293 // 3rd pattern to match BSTRINS:
3294 // R = or (and X, mask0), (and Y, mask1)
3295 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3296 // =>
3297 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3298 // where msb = lsb + size - 1
3299 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3300 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3301 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3302 (MaskIdx0 + MaskLen0 <= 64) &&
3303 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3304 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3305 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3306 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3307 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3308 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3309 DAG.getConstant(ValBits == 32
3310 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3311 : (MaskIdx0 + MaskLen0 - 1),
3312 DL, GRLenVT),
3313 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3314 }
3315
3316 // 4th pattern to match BSTRINS:
3317 // R = or (and X, mask), (shl Y, shamt)
3318 // where mask = (2**shamt - 1)
3319 // =>
3320 // R = BSTRINS X, Y, ValBits - 1, shamt
3321 // where ValBits = 32 or 64
3322 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3323 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3324 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3325 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3326 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3327 (MaskIdx0 + MaskLen0 <= ValBits)) {
3328 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3329 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3330 N1.getOperand(0),
3331 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3332 DAG.getConstant(Shamt, DL, GRLenVT));
3333 }
3334
3335 // 5th pattern to match BSTRINS:
3336 // R = or (and X, mask), const
3337 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3338 // =>
3339 // R = BSTRINS X, (const >> lsb), msb, lsb
3340 // where msb = lsb + size - 1
3341 if (N0.getOpcode() == ISD::AND &&
3342 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3343 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3344 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3345 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3346 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3347 return DAG.getNode(
3348 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3349 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3350 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3351 : (MaskIdx0 + MaskLen0 - 1),
3352 DL, GRLenVT),
3353 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3354 }
3355
3356 // 6th pattern.
3357 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3358 // by the incoming bits are known to be zero.
3359 // =>
3360 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3361 //
3362 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3363 // pattern is more common than the 1st. So we put the 1st before the 6th in
3364 // order to match as many nodes as possible.
3365 ConstantSDNode *CNMask, *CNShamt;
3366 unsigned MaskIdx, MaskLen;
3367 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3368 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3369 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3370 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3371 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3372 Shamt = CNShamt->getZExtValue();
3373 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3374 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3375 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3376 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3377 N1.getOperand(0).getOperand(0),
3378 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3379 DAG.getConstant(Shamt, DL, GRLenVT));
3380 }
3381 }
3382
3383 // 7th pattern.
3384 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3385 // overwritten by the incoming bits are known to be zero.
3386 // =>
3387 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3388 //
3389 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3390 // before the 7th in order to match as many nodes as possible.
3391 if (N1.getOpcode() == ISD::AND &&
3392 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3393 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3394 N1.getOperand(0).getOpcode() == ISD::SHL &&
3395 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3396 CNShamt->getZExtValue() == MaskIdx) {
3397 APInt ShMask(ValBits, CNMask->getZExtValue());
3398 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3399 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3400 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3401 N1.getOperand(0).getOperand(0),
3402 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3403 DAG.getConstant(MaskIdx, DL, GRLenVT));
3404 }
3405 }
3406
3407 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3408 if (!SwapAndRetried) {
3409 std::swap(N0, N1);
3410 SwapAndRetried = true;
3411 goto Retry;
3412 }
3413
3414 SwapAndRetried = false;
3415Retry2:
3416 // 8th pattern.
3417 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3418 // the incoming bits are known to be zero.
3419 // =>
3420 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3421 //
3422 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3423 // we put it here in order to match as many nodes as possible or generate less
3424 // instructions.
3425 if (N1.getOpcode() == ISD::AND &&
3426 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3427 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3428 APInt ShMask(ValBits, CNMask->getZExtValue());
3429 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3430 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3431 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3432 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3433 N1->getOperand(0),
3434 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3435 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3436 DAG.getConstant(MaskIdx, DL, GRLenVT));
3437 }
3438 }
3439 // Swap N0/N1 and retry.
3440 if (!SwapAndRetried) {
3441 std::swap(N0, N1);
3442 SwapAndRetried = true;
3443 goto Retry2;
3444 }
3445
3446 return SDValue();
3447}
3448
3449static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3450 ExtType = ISD::NON_EXTLOAD;
3451
3452 switch (V.getNode()->getOpcode()) {
3453 case ISD::LOAD: {
3454 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3455 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3456 (LoadNode->getMemoryVT() == MVT::i16)) {
3457 ExtType = LoadNode->getExtensionType();
3458 return true;
3459 }
3460 return false;
3461 }
3462 case ISD::AssertSext: {
3463 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3464 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3465 ExtType = ISD::SEXTLOAD;
3466 return true;
3467 }
3468 return false;
3469 }
3470 case ISD::AssertZext: {
3471 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3472 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3473 ExtType = ISD::ZEXTLOAD;
3474 return true;
3475 }
3476 return false;
3477 }
3478 default:
3479 return false;
3480 }
3481
3482 return false;
3483}
3484
3485// Eliminate redundant truncation and zero-extension nodes.
3486// * Case 1:
3487// +------------+ +------------+ +------------+
3488// | Input1 | | Input2 | | CC |
3489// +------------+ +------------+ +------------+
3490// | | |
3491// V V +----+
3492// +------------+ +------------+ |
3493// | TRUNCATE | | TRUNCATE | |
3494// +------------+ +------------+ |
3495// | | |
3496// V V |
3497// +------------+ +------------+ |
3498// | ZERO_EXT | | ZERO_EXT | |
3499// +------------+ +------------+ |
3500// | | |
3501// | +-------------+ |
3502// V V | |
3503// +----------------+ | |
3504// | AND | | |
3505// +----------------+ | |
3506// | | |
3507// +---------------+ | |
3508// | | |
3509// V V V
3510// +-------------+
3511// | CMP |
3512// +-------------+
3513// * Case 2:
3514// +------------+ +------------+ +-------------+ +------------+ +------------+
3515// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3516// +------------+ +------------+ +-------------+ +------------+ +------------+
3517// | | | | |
3518// V | | | |
3519// +------------+ | | | |
3520// | XOR |<---------------------+ | |
3521// +------------+ | | |
3522// | | | |
3523// V V +---------------+ |
3524// +------------+ +------------+ | |
3525// | TRUNCATE | | TRUNCATE | | +-------------------------+
3526// +------------+ +------------+ | |
3527// | | | |
3528// V V | |
3529// +------------+ +------------+ | |
3530// | ZERO_EXT | | ZERO_EXT | | |
3531// +------------+ +------------+ | |
3532// | | | |
3533// V V | |
3534// +----------------+ | |
3535// | AND | | |
3536// +----------------+ | |
3537// | | |
3538// +---------------+ | |
3539// | | |
3540// V V V
3541// +-------------+
3542// | CMP |
3543// +-------------+
3546 const LoongArchSubtarget &Subtarget) {
3547 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3548
3549 SDNode *AndNode = N->getOperand(0).getNode();
3550 if (AndNode->getOpcode() != ISD::AND)
3551 return SDValue();
3552
3553 SDValue AndInputValue2 = AndNode->getOperand(1);
3554 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3555 return SDValue();
3556
3557 SDValue CmpInputValue = N->getOperand(1);
3558 SDValue AndInputValue1 = AndNode->getOperand(0);
3559 if (AndInputValue1.getOpcode() == ISD::XOR) {
3560 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3561 return SDValue();
3562 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3563 if (!CN || CN->getSExtValue() != -1)
3564 return SDValue();
3565 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3566 if (!CN || CN->getSExtValue() != 0)
3567 return SDValue();
3568 AndInputValue1 = AndInputValue1.getOperand(0);
3569 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3570 return SDValue();
3571 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3572 if (AndInputValue2 != CmpInputValue)
3573 return SDValue();
3574 } else {
3575 return SDValue();
3576 }
3577
3578 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3579 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3580 return SDValue();
3581
3582 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3583 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3584 return SDValue();
3585
3586 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3587 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3588 ISD::LoadExtType ExtType1;
3589 ISD::LoadExtType ExtType2;
3590
3591 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3592 !checkValueWidth(TruncInputValue2, ExtType2))
3593 return SDValue();
3594
3595 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3596 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3597 return SDValue();
3598
3599 if ((ExtType2 != ISD::ZEXTLOAD) &&
3600 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3601 return SDValue();
3602
3603 // These truncation and zero-extension nodes are not necessary, remove them.
3604 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3605 TruncInputValue1, TruncInputValue2);
3606 SDValue NewSetCC =
3607 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3608 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3609 return SDValue(N, 0);
3610}
3611
3612// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3615 const LoongArchSubtarget &Subtarget) {
3616 if (DCI.isBeforeLegalizeOps())
3617 return SDValue();
3618
3619 SDValue Src = N->getOperand(0);
3620 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3621 return SDValue();
3622
3623 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3624 Src.getOperand(0));
3625}
3626
3627template <unsigned N>
3629 SelectionDAG &DAG,
3630 const LoongArchSubtarget &Subtarget,
3631 bool IsSigned = false) {
3632 SDLoc DL(Node);
3633 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3634 // Check the ImmArg.
3635 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3636 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3637 DAG.getContext()->emitError(Node->getOperationName(0) +
3638 ": argument out of range.");
3639 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3640 }
3641 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3642}
3643
3644template <unsigned N>
3645static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3646 SelectionDAG &DAG, bool IsSigned = false) {
3647 SDLoc DL(Node);
3648 EVT ResTy = Node->getValueType(0);
3649 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3650
3651 // Check the ImmArg.
3652 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3653 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3654 DAG.getContext()->emitError(Node->getOperationName(0) +
3655 ": argument out of range.");
3656 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3657 }
3658 return DAG.getConstant(
3660 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3661 DL, ResTy);
3662}
3663
3665 SDLoc DL(Node);
3666 EVT ResTy = Node->getValueType(0);
3667 SDValue Vec = Node->getOperand(2);
3668 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3669 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3670}
3671
3673 SDLoc DL(Node);
3674 EVT ResTy = Node->getValueType(0);
3675 SDValue One = DAG.getConstant(1, DL, ResTy);
3676 SDValue Bit =
3677 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3678
3679 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3680 DAG.getNOT(DL, Bit, ResTy));
3681}
3682
3683template <unsigned N>
3685 SDLoc DL(Node);
3686 EVT ResTy = Node->getValueType(0);
3687 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3688 // Check the unsigned ImmArg.
3689 if (!isUInt<N>(CImm->getZExtValue())) {
3690 DAG.getContext()->emitError(Node->getOperationName(0) +
3691 ": argument out of range.");
3692 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3693 }
3694
3695 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3696 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3697
3698 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3699}
3700
3701template <unsigned N>
3703 SDLoc DL(Node);
3704 EVT ResTy = Node->getValueType(0);
3705 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3706 // Check the unsigned ImmArg.
3707 if (!isUInt<N>(CImm->getZExtValue())) {
3708 DAG.getContext()->emitError(Node->getOperationName(0) +
3709 ": argument out of range.");
3710 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3711 }
3712
3713 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3714 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3715 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3716}
3717
3718template <unsigned N>
3720 SDLoc DL(Node);
3721 EVT ResTy = Node->getValueType(0);
3722 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3723 // Check the unsigned ImmArg.
3724 if (!isUInt<N>(CImm->getZExtValue())) {
3725 DAG.getContext()->emitError(Node->getOperationName(0) +
3726 ": argument out of range.");
3727 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3728 }
3729
3730 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3731 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3732 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3733}
3734
3735static SDValue
3738 const LoongArchSubtarget &Subtarget) {
3739 SDLoc DL(N);
3740 switch (N->getConstantOperandVal(0)) {
3741 default:
3742 break;
3743 case Intrinsic::loongarch_lsx_vadd_b:
3744 case Intrinsic::loongarch_lsx_vadd_h:
3745 case Intrinsic::loongarch_lsx_vadd_w:
3746 case Intrinsic::loongarch_lsx_vadd_d:
3747 case Intrinsic::loongarch_lasx_xvadd_b:
3748 case Intrinsic::loongarch_lasx_xvadd_h:
3749 case Intrinsic::loongarch_lasx_xvadd_w:
3750 case Intrinsic::loongarch_lasx_xvadd_d:
3751 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3752 N->getOperand(2));
3753 case Intrinsic::loongarch_lsx_vaddi_bu:
3754 case Intrinsic::loongarch_lsx_vaddi_hu:
3755 case Intrinsic::loongarch_lsx_vaddi_wu:
3756 case Intrinsic::loongarch_lsx_vaddi_du:
3757 case Intrinsic::loongarch_lasx_xvaddi_bu:
3758 case Intrinsic::loongarch_lasx_xvaddi_hu:
3759 case Intrinsic::loongarch_lasx_xvaddi_wu:
3760 case Intrinsic::loongarch_lasx_xvaddi_du:
3761 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3762 lowerVectorSplatImm<5>(N, 2, DAG));
3763 case Intrinsic::loongarch_lsx_vsub_b:
3764 case Intrinsic::loongarch_lsx_vsub_h:
3765 case Intrinsic::loongarch_lsx_vsub_w:
3766 case Intrinsic::loongarch_lsx_vsub_d:
3767 case Intrinsic::loongarch_lasx_xvsub_b:
3768 case Intrinsic::loongarch_lasx_xvsub_h:
3769 case Intrinsic::loongarch_lasx_xvsub_w:
3770 case Intrinsic::loongarch_lasx_xvsub_d:
3771 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3772 N->getOperand(2));
3773 case Intrinsic::loongarch_lsx_vsubi_bu:
3774 case Intrinsic::loongarch_lsx_vsubi_hu:
3775 case Intrinsic::loongarch_lsx_vsubi_wu:
3776 case Intrinsic::loongarch_lsx_vsubi_du:
3777 case Intrinsic::loongarch_lasx_xvsubi_bu:
3778 case Intrinsic::loongarch_lasx_xvsubi_hu:
3779 case Intrinsic::loongarch_lasx_xvsubi_wu:
3780 case Intrinsic::loongarch_lasx_xvsubi_du:
3781 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3782 lowerVectorSplatImm<5>(N, 2, DAG));
3783 case Intrinsic::loongarch_lsx_vneg_b:
3784 case Intrinsic::loongarch_lsx_vneg_h:
3785 case Intrinsic::loongarch_lsx_vneg_w:
3786 case Intrinsic::loongarch_lsx_vneg_d:
3787 case Intrinsic::loongarch_lasx_xvneg_b:
3788 case Intrinsic::loongarch_lasx_xvneg_h:
3789 case Intrinsic::loongarch_lasx_xvneg_w:
3790 case Intrinsic::loongarch_lasx_xvneg_d:
3791 return DAG.getNode(
3792 ISD::SUB, DL, N->getValueType(0),
3793 DAG.getConstant(
3794 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3795 /*isSigned=*/true),
3796 SDLoc(N), N->getValueType(0)),
3797 N->getOperand(1));
3798 case Intrinsic::loongarch_lsx_vmax_b:
3799 case Intrinsic::loongarch_lsx_vmax_h:
3800 case Intrinsic::loongarch_lsx_vmax_w:
3801 case Intrinsic::loongarch_lsx_vmax_d:
3802 case Intrinsic::loongarch_lasx_xvmax_b:
3803 case Intrinsic::loongarch_lasx_xvmax_h:
3804 case Intrinsic::loongarch_lasx_xvmax_w:
3805 case Intrinsic::loongarch_lasx_xvmax_d:
3806 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3807 N->getOperand(2));
3808 case Intrinsic::loongarch_lsx_vmax_bu:
3809 case Intrinsic::loongarch_lsx_vmax_hu:
3810 case Intrinsic::loongarch_lsx_vmax_wu:
3811 case Intrinsic::loongarch_lsx_vmax_du:
3812 case Intrinsic::loongarch_lasx_xvmax_bu:
3813 case Intrinsic::loongarch_lasx_xvmax_hu:
3814 case Intrinsic::loongarch_lasx_xvmax_wu:
3815 case Intrinsic::loongarch_lasx_xvmax_du:
3816 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3817 N->getOperand(2));
3818 case Intrinsic::loongarch_lsx_vmaxi_b:
3819 case Intrinsic::loongarch_lsx_vmaxi_h:
3820 case Intrinsic::loongarch_lsx_vmaxi_w:
3821 case Intrinsic::loongarch_lsx_vmaxi_d:
3822 case Intrinsic::loongarch_lasx_xvmaxi_b:
3823 case Intrinsic::loongarch_lasx_xvmaxi_h:
3824 case Intrinsic::loongarch_lasx_xvmaxi_w:
3825 case Intrinsic::loongarch_lasx_xvmaxi_d:
3826 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3827 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3828 case Intrinsic::loongarch_lsx_vmaxi_bu:
3829 case Intrinsic::loongarch_lsx_vmaxi_hu:
3830 case Intrinsic::loongarch_lsx_vmaxi_wu:
3831 case Intrinsic::loongarch_lsx_vmaxi_du:
3832 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3833 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3834 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3835 case Intrinsic::loongarch_lasx_xvmaxi_du:
3836 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3837 lowerVectorSplatImm<5>(N, 2, DAG));
3838 case Intrinsic::loongarch_lsx_vmin_b:
3839 case Intrinsic::loongarch_lsx_vmin_h:
3840 case Intrinsic::loongarch_lsx_vmin_w:
3841 case Intrinsic::loongarch_lsx_vmin_d:
3842 case Intrinsic::loongarch_lasx_xvmin_b:
3843 case Intrinsic::loongarch_lasx_xvmin_h:
3844 case Intrinsic::loongarch_lasx_xvmin_w:
3845 case Intrinsic::loongarch_lasx_xvmin_d:
3846 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3847 N->getOperand(2));
3848 case Intrinsic::loongarch_lsx_vmin_bu:
3849 case Intrinsic::loongarch_lsx_vmin_hu:
3850 case Intrinsic::loongarch_lsx_vmin_wu:
3851 case Intrinsic::loongarch_lsx_vmin_du:
3852 case Intrinsic::loongarch_lasx_xvmin_bu:
3853 case Intrinsic::loongarch_lasx_xvmin_hu:
3854 case Intrinsic::loongarch_lasx_xvmin_wu:
3855 case Intrinsic::loongarch_lasx_xvmin_du:
3856 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3857 N->getOperand(2));
3858 case Intrinsic::loongarch_lsx_vmini_b:
3859 case Intrinsic::loongarch_lsx_vmini_h:
3860 case Intrinsic::loongarch_lsx_vmini_w:
3861 case Intrinsic::loongarch_lsx_vmini_d:
3862 case Intrinsic::loongarch_lasx_xvmini_b:
3863 case Intrinsic::loongarch_lasx_xvmini_h:
3864 case Intrinsic::loongarch_lasx_xvmini_w:
3865 case Intrinsic::loongarch_lasx_xvmini_d:
3866 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3867 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3868 case Intrinsic::loongarch_lsx_vmini_bu:
3869 case Intrinsic::loongarch_lsx_vmini_hu:
3870 case Intrinsic::loongarch_lsx_vmini_wu:
3871 case Intrinsic::loongarch_lsx_vmini_du:
3872 case Intrinsic::loongarch_lasx_xvmini_bu:
3873 case Intrinsic::loongarch_lasx_xvmini_hu:
3874 case Intrinsic::loongarch_lasx_xvmini_wu:
3875 case Intrinsic::loongarch_lasx_xvmini_du:
3876 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3877 lowerVectorSplatImm<5>(N, 2, DAG));
3878 case Intrinsic::loongarch_lsx_vmul_b:
3879 case Intrinsic::loongarch_lsx_vmul_h:
3880 case Intrinsic::loongarch_lsx_vmul_w:
3881 case Intrinsic::loongarch_lsx_vmul_d:
3882 case Intrinsic::loongarch_lasx_xvmul_b:
3883 case Intrinsic::loongarch_lasx_xvmul_h:
3884 case Intrinsic::loongarch_lasx_xvmul_w:
3885 case Intrinsic::loongarch_lasx_xvmul_d:
3886 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3887 N->getOperand(2));
3888 case Intrinsic::loongarch_lsx_vmadd_b:
3889 case Intrinsic::loongarch_lsx_vmadd_h:
3890 case Intrinsic::loongarch_lsx_vmadd_w:
3891 case Intrinsic::loongarch_lsx_vmadd_d:
3892 case Intrinsic::loongarch_lasx_xvmadd_b:
3893 case Intrinsic::loongarch_lasx_xvmadd_h:
3894 case Intrinsic::loongarch_lasx_xvmadd_w:
3895 case Intrinsic::loongarch_lasx_xvmadd_d: {
3896 EVT ResTy = N->getValueType(0);
3897 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
3898 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3899 N->getOperand(3)));
3900 }
3901 case Intrinsic::loongarch_lsx_vmsub_b:
3902 case Intrinsic::loongarch_lsx_vmsub_h:
3903 case Intrinsic::loongarch_lsx_vmsub_w:
3904 case Intrinsic::loongarch_lsx_vmsub_d:
3905 case Intrinsic::loongarch_lasx_xvmsub_b:
3906 case Intrinsic::loongarch_lasx_xvmsub_h:
3907 case Intrinsic::loongarch_lasx_xvmsub_w:
3908 case Intrinsic::loongarch_lasx_xvmsub_d: {
3909 EVT ResTy = N->getValueType(0);
3910 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
3911 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3912 N->getOperand(3)));
3913 }
3914 case Intrinsic::loongarch_lsx_vdiv_b:
3915 case Intrinsic::loongarch_lsx_vdiv_h:
3916 case Intrinsic::loongarch_lsx_vdiv_w:
3917 case Intrinsic::loongarch_lsx_vdiv_d:
3918 case Intrinsic::loongarch_lasx_xvdiv_b:
3919 case Intrinsic::loongarch_lasx_xvdiv_h:
3920 case Intrinsic::loongarch_lasx_xvdiv_w:
3921 case Intrinsic::loongarch_lasx_xvdiv_d:
3922 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
3923 N->getOperand(2));
3924 case Intrinsic::loongarch_lsx_vdiv_bu:
3925 case Intrinsic::loongarch_lsx_vdiv_hu:
3926 case Intrinsic::loongarch_lsx_vdiv_wu:
3927 case Intrinsic::loongarch_lsx_vdiv_du:
3928 case Intrinsic::loongarch_lasx_xvdiv_bu:
3929 case Intrinsic::loongarch_lasx_xvdiv_hu:
3930 case Intrinsic::loongarch_lasx_xvdiv_wu:
3931 case Intrinsic::loongarch_lasx_xvdiv_du:
3932 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
3933 N->getOperand(2));
3934 case Intrinsic::loongarch_lsx_vmod_b:
3935 case Intrinsic::loongarch_lsx_vmod_h:
3936 case Intrinsic::loongarch_lsx_vmod_w:
3937 case Intrinsic::loongarch_lsx_vmod_d:
3938 case Intrinsic::loongarch_lasx_xvmod_b:
3939 case Intrinsic::loongarch_lasx_xvmod_h:
3940 case Intrinsic::loongarch_lasx_xvmod_w:
3941 case Intrinsic::loongarch_lasx_xvmod_d:
3942 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
3943 N->getOperand(2));
3944 case Intrinsic::loongarch_lsx_vmod_bu:
3945 case Intrinsic::loongarch_lsx_vmod_hu:
3946 case Intrinsic::loongarch_lsx_vmod_wu:
3947 case Intrinsic::loongarch_lsx_vmod_du:
3948 case Intrinsic::loongarch_lasx_xvmod_bu:
3949 case Intrinsic::loongarch_lasx_xvmod_hu:
3950 case Intrinsic::loongarch_lasx_xvmod_wu:
3951 case Intrinsic::loongarch_lasx_xvmod_du:
3952 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
3953 N->getOperand(2));
3954 case Intrinsic::loongarch_lsx_vand_v:
3955 case Intrinsic::loongarch_lasx_xvand_v:
3956 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3957 N->getOperand(2));
3958 case Intrinsic::loongarch_lsx_vor_v:
3959 case Intrinsic::loongarch_lasx_xvor_v:
3960 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3961 N->getOperand(2));
3962 case Intrinsic::loongarch_lsx_vxor_v:
3963 case Intrinsic::loongarch_lasx_xvxor_v:
3964 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3965 N->getOperand(2));
3966 case Intrinsic::loongarch_lsx_vnor_v:
3967 case Intrinsic::loongarch_lasx_xvnor_v: {
3968 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3969 N->getOperand(2));
3970 return DAG.getNOT(DL, Res, Res->getValueType(0));
3971 }
3972 case Intrinsic::loongarch_lsx_vandi_b:
3973 case Intrinsic::loongarch_lasx_xvandi_b:
3974 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3975 lowerVectorSplatImm<8>(N, 2, DAG));
3976 case Intrinsic::loongarch_lsx_vori_b:
3977 case Intrinsic::loongarch_lasx_xvori_b:
3978 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3979 lowerVectorSplatImm<8>(N, 2, DAG));
3980 case Intrinsic::loongarch_lsx_vxori_b:
3981 case Intrinsic::loongarch_lasx_xvxori_b:
3982 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3983 lowerVectorSplatImm<8>(N, 2, DAG));
3984 case Intrinsic::loongarch_lsx_vsll_b:
3985 case Intrinsic::loongarch_lsx_vsll_h:
3986 case Intrinsic::loongarch_lsx_vsll_w:
3987 case Intrinsic::loongarch_lsx_vsll_d:
3988 case Intrinsic::loongarch_lasx_xvsll_b:
3989 case Intrinsic::loongarch_lasx_xvsll_h:
3990 case Intrinsic::loongarch_lasx_xvsll_w:
3991 case Intrinsic::loongarch_lasx_xvsll_d:
3992 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3993 truncateVecElts(N, DAG));
3994 case Intrinsic::loongarch_lsx_vslli_b:
3995 case Intrinsic::loongarch_lasx_xvslli_b:
3996 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3997 lowerVectorSplatImm<3>(N, 2, DAG));
3998 case Intrinsic::loongarch_lsx_vslli_h:
3999 case Intrinsic::loongarch_lasx_xvslli_h:
4000 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4001 lowerVectorSplatImm<4>(N, 2, DAG));
4002 case Intrinsic::loongarch_lsx_vslli_w:
4003 case Intrinsic::loongarch_lasx_xvslli_w:
4004 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4005 lowerVectorSplatImm<5>(N, 2, DAG));
4006 case Intrinsic::loongarch_lsx_vslli_d:
4007 case Intrinsic::loongarch_lasx_xvslli_d:
4008 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4009 lowerVectorSplatImm<6>(N, 2, DAG));
4010 case Intrinsic::loongarch_lsx_vsrl_b:
4011 case Intrinsic::loongarch_lsx_vsrl_h:
4012 case Intrinsic::loongarch_lsx_vsrl_w:
4013 case Intrinsic::loongarch_lsx_vsrl_d:
4014 case Intrinsic::loongarch_lasx_xvsrl_b:
4015 case Intrinsic::loongarch_lasx_xvsrl_h:
4016 case Intrinsic::loongarch_lasx_xvsrl_w:
4017 case Intrinsic::loongarch_lasx_xvsrl_d:
4018 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4019 truncateVecElts(N, DAG));
4020 case Intrinsic::loongarch_lsx_vsrli_b:
4021 case Intrinsic::loongarch_lasx_xvsrli_b:
4022 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4023 lowerVectorSplatImm<3>(N, 2, DAG));
4024 case Intrinsic::loongarch_lsx_vsrli_h:
4025 case Intrinsic::loongarch_lasx_xvsrli_h:
4026 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4027 lowerVectorSplatImm<4>(N, 2, DAG));
4028 case Intrinsic::loongarch_lsx_vsrli_w:
4029 case Intrinsic::loongarch_lasx_xvsrli_w:
4030 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4031 lowerVectorSplatImm<5>(N, 2, DAG));
4032 case Intrinsic::loongarch_lsx_vsrli_d:
4033 case Intrinsic::loongarch_lasx_xvsrli_d:
4034 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4035 lowerVectorSplatImm<6>(N, 2, DAG));
4036 case Intrinsic::loongarch_lsx_vsra_b:
4037 case Intrinsic::loongarch_lsx_vsra_h:
4038 case Intrinsic::loongarch_lsx_vsra_w:
4039 case Intrinsic::loongarch_lsx_vsra_d:
4040 case Intrinsic::loongarch_lasx_xvsra_b:
4041 case Intrinsic::loongarch_lasx_xvsra_h:
4042 case Intrinsic::loongarch_lasx_xvsra_w:
4043 case Intrinsic::loongarch_lasx_xvsra_d:
4044 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4045 truncateVecElts(N, DAG));
4046 case Intrinsic::loongarch_lsx_vsrai_b:
4047 case Intrinsic::loongarch_lasx_xvsrai_b:
4048 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4049 lowerVectorSplatImm<3>(N, 2, DAG));
4050 case Intrinsic::loongarch_lsx_vsrai_h:
4051 case Intrinsic::loongarch_lasx_xvsrai_h:
4052 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4053 lowerVectorSplatImm<4>(N, 2, DAG));
4054 case Intrinsic::loongarch_lsx_vsrai_w:
4055 case Intrinsic::loongarch_lasx_xvsrai_w:
4056 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4057 lowerVectorSplatImm<5>(N, 2, DAG));
4058 case Intrinsic::loongarch_lsx_vsrai_d:
4059 case Intrinsic::loongarch_lasx_xvsrai_d:
4060 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4061 lowerVectorSplatImm<6>(N, 2, DAG));
4062 case Intrinsic::loongarch_lsx_vclz_b:
4063 case Intrinsic::loongarch_lsx_vclz_h:
4064 case Intrinsic::loongarch_lsx_vclz_w:
4065 case Intrinsic::loongarch_lsx_vclz_d:
4066 case Intrinsic::loongarch_lasx_xvclz_b:
4067 case Intrinsic::loongarch_lasx_xvclz_h:
4068 case Intrinsic::loongarch_lasx_xvclz_w:
4069 case Intrinsic::loongarch_lasx_xvclz_d:
4070 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4071 case Intrinsic::loongarch_lsx_vpcnt_b:
4072 case Intrinsic::loongarch_lsx_vpcnt_h:
4073 case Intrinsic::loongarch_lsx_vpcnt_w:
4074 case Intrinsic::loongarch_lsx_vpcnt_d:
4075 case Intrinsic::loongarch_lasx_xvpcnt_b:
4076 case Intrinsic::loongarch_lasx_xvpcnt_h:
4077 case Intrinsic::loongarch_lasx_xvpcnt_w:
4078 case Intrinsic::loongarch_lasx_xvpcnt_d:
4079 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4080 case Intrinsic::loongarch_lsx_vbitclr_b:
4081 case Intrinsic::loongarch_lsx_vbitclr_h:
4082 case Intrinsic::loongarch_lsx_vbitclr_w:
4083 case Intrinsic::loongarch_lsx_vbitclr_d:
4084 case Intrinsic::loongarch_lasx_xvbitclr_b:
4085 case Intrinsic::loongarch_lasx_xvbitclr_h:
4086 case Intrinsic::loongarch_lasx_xvbitclr_w:
4087 case Intrinsic::loongarch_lasx_xvbitclr_d:
4088 return lowerVectorBitClear(N, DAG);
4089 case Intrinsic::loongarch_lsx_vbitclri_b:
4090 case Intrinsic::loongarch_lasx_xvbitclri_b:
4091 return lowerVectorBitClearImm<3>(N, DAG);
4092 case Intrinsic::loongarch_lsx_vbitclri_h:
4093 case Intrinsic::loongarch_lasx_xvbitclri_h:
4094 return lowerVectorBitClearImm<4>(N, DAG);
4095 case Intrinsic::loongarch_lsx_vbitclri_w:
4096 case Intrinsic::loongarch_lasx_xvbitclri_w:
4097 return lowerVectorBitClearImm<5>(N, DAG);
4098 case Intrinsic::loongarch_lsx_vbitclri_d:
4099 case Intrinsic::loongarch_lasx_xvbitclri_d:
4100 return lowerVectorBitClearImm<6>(N, DAG);
4101 case Intrinsic::loongarch_lsx_vbitset_b:
4102 case Intrinsic::loongarch_lsx_vbitset_h:
4103 case Intrinsic::loongarch_lsx_vbitset_w:
4104 case Intrinsic::loongarch_lsx_vbitset_d:
4105 case Intrinsic::loongarch_lasx_xvbitset_b:
4106 case Intrinsic::loongarch_lasx_xvbitset_h:
4107 case Intrinsic::loongarch_lasx_xvbitset_w:
4108 case Intrinsic::loongarch_lasx_xvbitset_d: {
4109 EVT VecTy = N->getValueType(0);
4110 SDValue One = DAG.getConstant(1, DL, VecTy);
4111 return DAG.getNode(
4112 ISD::OR, DL, VecTy, N->getOperand(1),
4113 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4114 }
4115 case Intrinsic::loongarch_lsx_vbitseti_b:
4116 case Intrinsic::loongarch_lasx_xvbitseti_b:
4117 return lowerVectorBitSetImm<3>(N, DAG);
4118 case Intrinsic::loongarch_lsx_vbitseti_h:
4119 case Intrinsic::loongarch_lasx_xvbitseti_h:
4120 return lowerVectorBitSetImm<4>(N, DAG);
4121 case Intrinsic::loongarch_lsx_vbitseti_w:
4122 case Intrinsic::loongarch_lasx_xvbitseti_w:
4123 return lowerVectorBitSetImm<5>(N, DAG);
4124 case Intrinsic::loongarch_lsx_vbitseti_d:
4125 case Intrinsic::loongarch_lasx_xvbitseti_d:
4126 return lowerVectorBitSetImm<6>(N, DAG);
4127 case Intrinsic::loongarch_lsx_vbitrev_b:
4128 case Intrinsic::loongarch_lsx_vbitrev_h:
4129 case Intrinsic::loongarch_lsx_vbitrev_w:
4130 case Intrinsic::loongarch_lsx_vbitrev_d:
4131 case Intrinsic::loongarch_lasx_xvbitrev_b:
4132 case Intrinsic::loongarch_lasx_xvbitrev_h:
4133 case Intrinsic::loongarch_lasx_xvbitrev_w:
4134 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4135 EVT VecTy = N->getValueType(0);
4136 SDValue One = DAG.getConstant(1, DL, VecTy);
4137 return DAG.getNode(
4138 ISD::XOR, DL, VecTy, N->getOperand(1),
4139 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4140 }
4141 case Intrinsic::loongarch_lsx_vbitrevi_b:
4142 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4143 return lowerVectorBitRevImm<3>(N, DAG);
4144 case Intrinsic::loongarch_lsx_vbitrevi_h:
4145 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4146 return lowerVectorBitRevImm<4>(N, DAG);
4147 case Intrinsic::loongarch_lsx_vbitrevi_w:
4148 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4149 return lowerVectorBitRevImm<5>(N, DAG);
4150 case Intrinsic::loongarch_lsx_vbitrevi_d:
4151 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4152 return lowerVectorBitRevImm<6>(N, DAG);
4153 case Intrinsic::loongarch_lsx_vfadd_s:
4154 case Intrinsic::loongarch_lsx_vfadd_d:
4155 case Intrinsic::loongarch_lasx_xvfadd_s:
4156 case Intrinsic::loongarch_lasx_xvfadd_d:
4157 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4158 N->getOperand(2));
4159 case Intrinsic::loongarch_lsx_vfsub_s:
4160 case Intrinsic::loongarch_lsx_vfsub_d:
4161 case Intrinsic::loongarch_lasx_xvfsub_s:
4162 case Intrinsic::loongarch_lasx_xvfsub_d:
4163 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4164 N->getOperand(2));
4165 case Intrinsic::loongarch_lsx_vfmul_s:
4166 case Intrinsic::loongarch_lsx_vfmul_d:
4167 case Intrinsic::loongarch_lasx_xvfmul_s:
4168 case Intrinsic::loongarch_lasx_xvfmul_d:
4169 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4170 N->getOperand(2));
4171 case Intrinsic::loongarch_lsx_vfdiv_s:
4172 case Intrinsic::loongarch_lsx_vfdiv_d:
4173 case Intrinsic::loongarch_lasx_xvfdiv_s:
4174 case Intrinsic::loongarch_lasx_xvfdiv_d:
4175 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4176 N->getOperand(2));
4177 case Intrinsic::loongarch_lsx_vfmadd_s:
4178 case Intrinsic::loongarch_lsx_vfmadd_d:
4179 case Intrinsic::loongarch_lasx_xvfmadd_s:
4180 case Intrinsic::loongarch_lasx_xvfmadd_d:
4181 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4182 N->getOperand(2), N->getOperand(3));
4183 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4184 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4185 N->getOperand(1), N->getOperand(2),
4186 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4187 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4188 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4189 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4190 N->getOperand(1), N->getOperand(2),
4191 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4192 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4193 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4194 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4195 N->getOperand(1), N->getOperand(2),
4196 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4197 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4198 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4199 N->getOperand(1), N->getOperand(2),
4200 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4201 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4202 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4203 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4204 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4205 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4206 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4207 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4208 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
4209 EVT ResTy = N->getValueType(0);
4210 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
4211 return DAG.getBuildVector(ResTy, DL, Ops);
4212 }
4213 case Intrinsic::loongarch_lsx_vreplve_b:
4214 case Intrinsic::loongarch_lsx_vreplve_h:
4215 case Intrinsic::loongarch_lsx_vreplve_w:
4216 case Intrinsic::loongarch_lsx_vreplve_d:
4217 case Intrinsic::loongarch_lasx_xvreplve_b:
4218 case Intrinsic::loongarch_lasx_xvreplve_h:
4219 case Intrinsic::loongarch_lasx_xvreplve_w:
4220 case Intrinsic::loongarch_lasx_xvreplve_d:
4221 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4222 N->getOperand(1),
4223 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4224 N->getOperand(2)));
4225 }
4226 return SDValue();
4227}
4228
4230 DAGCombinerInfo &DCI) const {
4231 SelectionDAG &DAG = DCI.DAG;
4232 switch (N->getOpcode()) {
4233 default:
4234 break;
4235 case ISD::AND:
4236 return performANDCombine(N, DAG, DCI, Subtarget);
4237 case ISD::OR:
4238 return performORCombine(N, DAG, DCI, Subtarget);
4239 case ISD::SETCC:
4240 return performSETCCCombine(N, DAG, DCI, Subtarget);
4241 case ISD::SRL:
4242 return performSRLCombine(N, DAG, DCI, Subtarget);
4244 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4246 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4247 }
4248 return SDValue();
4249}
4250
4253 if (!ZeroDivCheck)
4254 return MBB;
4255
4256 // Build instructions:
4257 // MBB:
4258 // div(or mod) $dst, $dividend, $divisor
4259 // bnez $divisor, SinkMBB
4260 // BreakMBB:
4261 // break 7 // BRK_DIVZERO
4262 // SinkMBB:
4263 // fallthrough
4264 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4266 MachineFunction *MF = MBB->getParent();
4267 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4268 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4269 MF->insert(It, BreakMBB);
4270 MF->insert(It, SinkMBB);
4271
4272 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4273 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4274 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4275
4276 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4277 DebugLoc DL = MI.getDebugLoc();
4278 MachineOperand &Divisor = MI.getOperand(2);
4279 Register DivisorReg = Divisor.getReg();
4280
4281 // MBB:
4282 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4283 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4284 .addMBB(SinkMBB);
4285 MBB->addSuccessor(BreakMBB);
4286 MBB->addSuccessor(SinkMBB);
4287
4288 // BreakMBB:
4289 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4290 // definition of BRK_DIVZERO.
4291 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4292 BreakMBB->addSuccessor(SinkMBB);
4293
4294 // Clear Divisor's kill flag.
4295 Divisor.setIsKill(false);
4296
4297 return SinkMBB;
4298}
4299
4300static MachineBasicBlock *
4302 const LoongArchSubtarget &Subtarget) {
4303 unsigned CondOpc;
4304 switch (MI.getOpcode()) {
4305 default:
4306 llvm_unreachable("Unexpected opcode");
4307 case LoongArch::PseudoVBZ:
4308 CondOpc = LoongArch::VSETEQZ_V;
4309 break;
4310 case LoongArch::PseudoVBZ_B:
4311 CondOpc = LoongArch::VSETANYEQZ_B;
4312 break;
4313 case LoongArch::PseudoVBZ_H:
4314 CondOpc = LoongArch::VSETANYEQZ_H;
4315 break;
4316 case LoongArch::PseudoVBZ_W:
4317 CondOpc = LoongArch::VSETANYEQZ_W;
4318 break;
4319 case LoongArch::PseudoVBZ_D:
4320 CondOpc = LoongArch::VSETANYEQZ_D;
4321 break;
4322 case LoongArch::PseudoVBNZ:
4323 CondOpc = LoongArch::VSETNEZ_V;
4324 break;
4325 case LoongArch::PseudoVBNZ_B:
4326 CondOpc = LoongArch::VSETALLNEZ_B;
4327 break;
4328 case LoongArch::PseudoVBNZ_H:
4329 CondOpc = LoongArch::VSETALLNEZ_H;
4330 break;
4331 case LoongArch::PseudoVBNZ_W:
4332 CondOpc = LoongArch::VSETALLNEZ_W;
4333 break;
4334 case LoongArch::PseudoVBNZ_D:
4335 CondOpc = LoongArch::VSETALLNEZ_D;
4336 break;
4337 case LoongArch::PseudoXVBZ:
4338 CondOpc = LoongArch::XVSETEQZ_V;
4339 break;
4340 case LoongArch::PseudoXVBZ_B:
4341 CondOpc = LoongArch::XVSETANYEQZ_B;
4342 break;
4343 case LoongArch::PseudoXVBZ_H:
4344 CondOpc = LoongArch::XVSETANYEQZ_H;
4345 break;
4346 case LoongArch::PseudoXVBZ_W:
4347 CondOpc = LoongArch::XVSETANYEQZ_W;
4348 break;
4349 case LoongArch::PseudoXVBZ_D:
4350 CondOpc = LoongArch::XVSETANYEQZ_D;
4351 break;
4352 case LoongArch::PseudoXVBNZ:
4353 CondOpc = LoongArch::XVSETNEZ_V;
4354 break;
4355 case LoongArch::PseudoXVBNZ_B:
4356 CondOpc = LoongArch::XVSETALLNEZ_B;
4357 break;
4358 case LoongArch::PseudoXVBNZ_H:
4359 CondOpc = LoongArch::XVSETALLNEZ_H;
4360 break;
4361 case LoongArch::PseudoXVBNZ_W:
4362 CondOpc = LoongArch::XVSETALLNEZ_W;
4363 break;
4364 case LoongArch::PseudoXVBNZ_D:
4365 CondOpc = LoongArch::XVSETALLNEZ_D;
4366 break;
4367 }
4368
4369 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4370 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4371 DebugLoc DL = MI.getDebugLoc();
4374
4375 MachineFunction *F = BB->getParent();
4376 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4377 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4378 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4379
4380 F->insert(It, FalseBB);
4381 F->insert(It, TrueBB);
4382 F->insert(It, SinkBB);
4383
4384 // Transfer the remainder of MBB and its successor edges to Sink.
4385 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4387
4388 // Insert the real instruction to BB.
4389 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4390 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4391
4392 // Insert branch.
4393 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4394 BB->addSuccessor(FalseBB);
4395 BB->addSuccessor(TrueBB);
4396
4397 // FalseBB.
4398 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4399 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4400 .addReg(LoongArch::R0)
4401 .addImm(0);
4402 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4403 FalseBB->addSuccessor(SinkBB);
4404
4405 // TrueBB.
4406 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4407 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4408 .addReg(LoongArch::R0)
4409 .addImm(1);
4410 TrueBB->addSuccessor(SinkBB);
4411
4412 // SinkBB: merge the results.
4413 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4414 MI.getOperand(0).getReg())
4415 .addReg(RD1)
4416 .addMBB(FalseBB)
4417 .addReg(RD2)
4418 .addMBB(TrueBB);
4419
4420 // The pseudo instruction is gone now.
4421 MI.eraseFromParent();
4422 return SinkBB;
4423}
4424
4425static MachineBasicBlock *
4427 const LoongArchSubtarget &Subtarget) {
4428 unsigned InsOp;
4429 unsigned HalfSize;
4430 switch (MI.getOpcode()) {
4431 default:
4432 llvm_unreachable("Unexpected opcode");
4433 case LoongArch::PseudoXVINSGR2VR_B:
4434 HalfSize = 16;
4435 InsOp = LoongArch::VINSGR2VR_B;
4436 break;
4437 case LoongArch::PseudoXVINSGR2VR_H:
4438 HalfSize = 8;
4439 InsOp = LoongArch::VINSGR2VR_H;
4440 break;
4441 }
4442 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4443 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4444 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4445 DebugLoc DL = MI.getDebugLoc();
4447 // XDst = vector_insert XSrc, Elt, Idx
4448 Register XDst = MI.getOperand(0).getReg();
4449 Register XSrc = MI.getOperand(1).getReg();
4450 Register Elt = MI.getOperand(2).getReg();
4451 unsigned Idx = MI.getOperand(3).getImm();
4452
4453 Register ScratchReg1 = XSrc;
4454 if (Idx >= HalfSize) {
4455 ScratchReg1 = MRI.createVirtualRegister(RC);
4456 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4457 .addReg(XSrc)
4458 .addReg(XSrc)
4459 .addImm(1);
4460 }
4461
4462 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4463 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4464 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4465 .addReg(ScratchReg1, 0, LoongArch::sub_128);
4466 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4467 .addReg(ScratchSubReg1)
4468 .addReg(Elt)
4469 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4470
4471 Register ScratchReg2 = XDst;
4472 if (Idx >= HalfSize)
4473 ScratchReg2 = MRI.createVirtualRegister(RC);
4474
4475 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4476 .addImm(0)
4477 .addReg(ScratchSubReg2)
4478 .addImm(LoongArch::sub_128);
4479
4480 if (Idx >= HalfSize)
4481 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4482 .addReg(XSrc)
4483 .addReg(ScratchReg2)
4484 .addImm(2);
4485
4486 MI.eraseFromParent();
4487 return BB;
4488}
4489
4490MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4491 MachineInstr &MI, MachineBasicBlock *BB) const {
4492 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4493 DebugLoc DL = MI.getDebugLoc();
4494
4495 switch (MI.getOpcode()) {
4496 default:
4497 llvm_unreachable("Unexpected instr type to insert");
4498 case LoongArch::DIV_W:
4499 case LoongArch::DIV_WU:
4500 case LoongArch::MOD_W:
4501 case LoongArch::MOD_WU:
4502 case LoongArch::DIV_D:
4503 case LoongArch::DIV_DU:
4504 case LoongArch::MOD_D:
4505 case LoongArch::MOD_DU:
4506 return insertDivByZeroTrap(MI, BB);
4507 break;
4508 case LoongArch::WRFCSR: {
4509 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4510 LoongArch::FCSR0 + MI.getOperand(0).getImm())
4511 .addReg(MI.getOperand(1).getReg());
4512 MI.eraseFromParent();
4513 return BB;
4514 }
4515 case LoongArch::RDFCSR: {
4516 MachineInstr *ReadFCSR =
4517 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4518 MI.getOperand(0).getReg())
4519 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4520 ReadFCSR->getOperand(1).setIsUndef();
4521 MI.eraseFromParent();
4522 return BB;
4523 }
4524 case LoongArch::PseudoVBZ:
4525 case LoongArch::PseudoVBZ_B:
4526 case LoongArch::PseudoVBZ_H:
4527 case LoongArch::PseudoVBZ_W:
4528 case LoongArch::PseudoVBZ_D:
4529 case LoongArch::PseudoVBNZ:
4530 case LoongArch::PseudoVBNZ_B:
4531 case LoongArch::PseudoVBNZ_H:
4532 case LoongArch::PseudoVBNZ_W:
4533 case LoongArch::PseudoVBNZ_D:
4534 case LoongArch::PseudoXVBZ:
4535 case LoongArch::PseudoXVBZ_B:
4536 case LoongArch::PseudoXVBZ_H:
4537 case LoongArch::PseudoXVBZ_W:
4538 case LoongArch::PseudoXVBZ_D:
4539 case LoongArch::PseudoXVBNZ:
4540 case LoongArch::PseudoXVBNZ_B:
4541 case LoongArch::PseudoXVBNZ_H:
4542 case LoongArch::PseudoXVBNZ_W:
4543 case LoongArch::PseudoXVBNZ_D:
4544 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4545 case LoongArch::PseudoXVINSGR2VR_B:
4546 case LoongArch::PseudoXVINSGR2VR_H:
4547 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4548 }
4549}
4550
4552 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4553 unsigned *Fast) const {
4554 if (!Subtarget.hasUAL())
4555 return false;
4556
4557 // TODO: set reasonable speed number.
4558 if (Fast)
4559 *Fast = 1;
4560 return true;
4561}
4562
4563const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4564 switch ((LoongArchISD::NodeType)Opcode) {
4566 break;
4567
4568#define NODE_NAME_CASE(node) \
4569 case LoongArchISD::node: \
4570 return "LoongArchISD::" #node;
4571
4572 // TODO: Add more target-dependent nodes later.
4573 NODE_NAME_CASE(CALL)
4574 NODE_NAME_CASE(CALL_MEDIUM)
4575 NODE_NAME_CASE(CALL_LARGE)
4576 NODE_NAME_CASE(RET)
4577 NODE_NAME_CASE(TAIL)
4578 NODE_NAME_CASE(TAIL_MEDIUM)
4579 NODE_NAME_CASE(TAIL_LARGE)
4580 NODE_NAME_CASE(SLL_W)
4581 NODE_NAME_CASE(SRA_W)
4582 NODE_NAME_CASE(SRL_W)
4583 NODE_NAME_CASE(BSTRINS)
4584 NODE_NAME_CASE(BSTRPICK)
4585 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4586 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4587 NODE_NAME_CASE(FTINT)
4588 NODE_NAME_CASE(REVB_2H)
4589 NODE_NAME_CASE(REVB_2W)
4590 NODE_NAME_CASE(BITREV_4B)
4591 NODE_NAME_CASE(BITREV_W)
4592 NODE_NAME_CASE(ROTR_W)
4593 NODE_NAME_CASE(ROTL_W)
4594 NODE_NAME_CASE(DIV_WU)
4595 NODE_NAME_CASE(MOD_WU)
4596 NODE_NAME_CASE(CLZ_W)
4597 NODE_NAME_CASE(CTZ_W)
4598 NODE_NAME_CASE(DBAR)
4599 NODE_NAME_CASE(IBAR)
4600 NODE_NAME_CASE(BREAK)
4601 NODE_NAME_CASE(SYSCALL)
4602 NODE_NAME_CASE(CRC_W_B_W)
4603 NODE_NAME_CASE(CRC_W_H_W)
4604 NODE_NAME_CASE(CRC_W_W_W)
4605 NODE_NAME_CASE(CRC_W_D_W)
4606 NODE_NAME_CASE(CRCC_W_B_W)
4607 NODE_NAME_CASE(CRCC_W_H_W)
4608 NODE_NAME_CASE(CRCC_W_W_W)
4609 NODE_NAME_CASE(CRCC_W_D_W)
4610 NODE_NAME_CASE(CSRRD)
4611 NODE_NAME_CASE(CSRWR)
4612 NODE_NAME_CASE(CSRXCHG)
4613 NODE_NAME_CASE(IOCSRRD_B)
4614 NODE_NAME_CASE(IOCSRRD_H)
4615 NODE_NAME_CASE(IOCSRRD_W)
4616 NODE_NAME_CASE(IOCSRRD_D)
4617 NODE_NAME_CASE(IOCSRWR_B)
4618 NODE_NAME_CASE(IOCSRWR_H)
4619 NODE_NAME_CASE(IOCSRWR_W)
4620 NODE_NAME_CASE(IOCSRWR_D)
4621 NODE_NAME_CASE(CPUCFG)
4622 NODE_NAME_CASE(MOVGR2FCSR)
4623 NODE_NAME_CASE(MOVFCSR2GR)
4624 NODE_NAME_CASE(CACOP_D)
4625 NODE_NAME_CASE(CACOP_W)
4626 NODE_NAME_CASE(VSHUF)
4627 NODE_NAME_CASE(VPICKEV)
4628 NODE_NAME_CASE(VPICKOD)
4629 NODE_NAME_CASE(VPACKEV)
4630 NODE_NAME_CASE(VPACKOD)
4631 NODE_NAME_CASE(VILVL)
4632 NODE_NAME_CASE(VILVH)
4633 NODE_NAME_CASE(VSHUF4I)
4634 NODE_NAME_CASE(VREPLVEI)
4635 NODE_NAME_CASE(XVPERMI)
4636 NODE_NAME_CASE(VPICK_SEXT_ELT)
4637 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4638 NODE_NAME_CASE(VREPLVE)
4639 NODE_NAME_CASE(VALL_ZERO)
4640 NODE_NAME_CASE(VANY_ZERO)
4641 NODE_NAME_CASE(VALL_NONZERO)
4642 NODE_NAME_CASE(VANY_NONZERO)
4643 }
4644#undef NODE_NAME_CASE
4645 return nullptr;
4646}
4647
4648//===----------------------------------------------------------------------===//
4649// Calling Convention Implementation
4650//===----------------------------------------------------------------------===//
4651
4652// Eight general-purpose registers a0-a7 used for passing integer arguments,
4653// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4654// fixed-point arguments, and floating-point arguments when no FPR is available
4655// or with soft float ABI.
4656const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4657 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4658 LoongArch::R10, LoongArch::R11};
4659// Eight floating-point registers fa0-fa7 used for passing floating-point
4660// arguments, and fa0-fa1 are also used to return values.
4661const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4662 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4663 LoongArch::F6, LoongArch::F7};
4664// FPR32 and FPR64 alias each other.
4666 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4667 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4668
4669const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4670 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4671 LoongArch::VR6, LoongArch::VR7};
4672
4673const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4674 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4675 LoongArch::XR6, LoongArch::XR7};
4676
4677// Pass a 2*GRLen argument that has been split into two GRLen values through
4678// registers or the stack as necessary.
4679static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4680 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4681 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4682 ISD::ArgFlagsTy ArgFlags2) {
4683 unsigned GRLenInBytes = GRLen / 8;
4684 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4685 // At least one half can be passed via register.
4686 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4687 VA1.getLocVT(), CCValAssign::Full));
4688 } else {
4689 // Both halves must be passed on the stack, with proper alignment.
4690 Align StackAlign =
4691 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4692 State.addLoc(
4694 State.AllocateStack(GRLenInBytes, StackAlign),
4695 VA1.getLocVT(), CCValAssign::Full));
4697 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4698 LocVT2, CCValAssign::Full));
4699 return false;
4700 }
4701 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4702 // The second half can also be passed via register.
4703 State.addLoc(
4704 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4705 } else {
4706 // The second half is passed via the stack, without additional alignment.
4708 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4709 LocVT2, CCValAssign::Full));
4710 }
4711 return false;
4712}
4713
4714// Implements the LoongArch calling convention. Returns true upon failure.
4716 unsigned ValNo, MVT ValVT,
4717 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4718 CCState &State, bool IsFixed, bool IsRet,
4719 Type *OrigTy) {
4720 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4721 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4722 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4723 MVT LocVT = ValVT;
4724
4725 // Any return value split into more than two values can't be returned
4726 // directly.
4727 if (IsRet && ValNo > 1)
4728 return true;
4729
4730 // If passing a variadic argument, or if no FPR is available.
4731 bool UseGPRForFloat = true;
4732
4733 switch (ABI) {
4734 default:
4735 llvm_unreachable("Unexpected ABI");
4736 break;
4741 UseGPRForFloat = !IsFixed;
4742 break;
4745 break;
4746 }
4747
4748 // FPR32 and FPR64 alias each other.
4749 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4750 UseGPRForFloat = true;
4751
4752 if (UseGPRForFloat && ValVT == MVT::f32) {
4753 LocVT = GRLenVT;
4754 LocInfo = CCValAssign::BCvt;
4755 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4756 LocVT = MVT::i64;
4757 LocInfo = CCValAssign::BCvt;
4758 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4759 // TODO: Handle passing f64 on LA32 with D feature.
4760 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4761 }
4762
4763 // If this is a variadic argument, the LoongArch calling convention requires
4764 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4765 // byte alignment. An aligned register should be used regardless of whether
4766 // the original argument was split during legalisation or not. The argument
4767 // will not be passed by registers if the original type is larger than
4768 // 2*GRLen, so the register alignment rule does not apply.
4769 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4770 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4771 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4772 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4773 // Skip 'odd' register if necessary.
4774 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4775 State.AllocateReg(ArgGPRs);
4776 }
4777
4778 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4779 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4780 State.getPendingArgFlags();
4781
4782 assert(PendingLocs.size() == PendingArgFlags.size() &&
4783 "PendingLocs and PendingArgFlags out of sync");
4784
4785 // Split arguments might be passed indirectly, so keep track of the pending
4786 // values.
4787 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4788 LocVT = GRLenVT;
4789 LocInfo = CCValAssign::Indirect;
4790 PendingLocs.push_back(
4791 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4792 PendingArgFlags.push_back(ArgFlags);
4793 if (!ArgFlags.isSplitEnd()) {
4794 return false;
4795 }
4796 }
4797
4798 // If the split argument only had two elements, it should be passed directly
4799 // in registers or on the stack.
4800 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4801 PendingLocs.size() <= 2) {
4802 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4803 // Apply the normal calling convention rules to the first half of the
4804 // split argument.
4805 CCValAssign VA = PendingLocs[0];
4806 ISD::ArgFlagsTy AF = PendingArgFlags[0];
4807 PendingLocs.clear();
4808 PendingArgFlags.clear();
4809 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4810 ArgFlags);
4811 }
4812
4813 // Allocate to a register if possible, or else a stack slot.
4814 Register Reg;
4815 unsigned StoreSizeBytes = GRLen / 8;
4816 Align StackAlign = Align(GRLen / 8);
4817
4818 if (ValVT == MVT::f32 && !UseGPRForFloat)
4819 Reg = State.AllocateReg(ArgFPR32s);
4820 else if (ValVT == MVT::f64 && !UseGPRForFloat)
4821 Reg = State.AllocateReg(ArgFPR64s);
4822 else if (ValVT.is128BitVector())
4823 Reg = State.AllocateReg(ArgVRs);
4824 else if (ValVT.is256BitVector())
4825 Reg = State.AllocateReg(ArgXRs);
4826 else
4827 Reg = State.AllocateReg(ArgGPRs);
4828
4829 unsigned StackOffset =
4830 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
4831
4832 // If we reach this point and PendingLocs is non-empty, we must be at the
4833 // end of a split argument that must be passed indirectly.
4834 if (!PendingLocs.empty()) {
4835 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4836 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4837 for (auto &It : PendingLocs) {
4838 if (Reg)
4839 It.convertToReg(Reg);
4840 else
4841 It.convertToMem(StackOffset);
4842 State.addLoc(It);
4843 }
4844 PendingLocs.clear();
4845 PendingArgFlags.clear();
4846 return false;
4847 }
4848 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
4849 "Expected an GRLenVT at this stage");
4850
4851 if (Reg) {
4852 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4853 return false;
4854 }
4855
4856 // When a floating-point value is passed on the stack, no bit-cast is needed.
4857 if (ValVT.isFloatingPoint()) {
4858 LocVT = ValVT;
4859 LocInfo = CCValAssign::Full;
4860 }
4861
4862 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4863 return false;
4864}
4865
4866void LoongArchTargetLowering::analyzeInputArgs(
4867 MachineFunction &MF, CCState &CCInfo,
4868 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
4869 LoongArchCCAssignFn Fn) const {
4871 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4872 MVT ArgVT = Ins[i].VT;
4873 Type *ArgTy = nullptr;
4874 if (IsRet)
4875 ArgTy = FType->getReturnType();
4876 else if (Ins[i].isOrigArg())
4877 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
4880 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
4881 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
4882 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
4883 << '\n');
4884 llvm_unreachable("");
4885 }
4886 }
4887}
4888
4889void LoongArchTargetLowering::analyzeOutputArgs(
4890 MachineFunction &MF, CCState &CCInfo,
4891 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4892 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
4893 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4894 MVT ArgVT = Outs[i].VT;
4895 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
4898 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
4899 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
4900 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
4901 << "\n");
4902 llvm_unreachable("");
4903 }
4904 }
4905}
4906
4907// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
4908// values.
4910 const CCValAssign &VA, const SDLoc &DL) {
4911 switch (VA.getLocInfo()) {
4912 default:
4913 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4914 case CCValAssign::Full:
4916 break;
4917 case CCValAssign::BCvt:
4918 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4919 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
4920 else
4921 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
4922 break;
4923 }
4924 return Val;
4925}
4926
4928 const CCValAssign &VA, const SDLoc &DL,
4929 const ISD::InputArg &In,
4930 const LoongArchTargetLowering &TLI) {
4933 EVT LocVT = VA.getLocVT();
4934 SDValue Val;
4935 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
4936 Register VReg = RegInfo.createVirtualRegister(RC);
4937 RegInfo.addLiveIn(VA.getLocReg(), VReg);
4938 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
4939
4940 // If input is sign extended from 32 bits, note it for the OptW pass.
4941 if (In.isOrigArg()) {
4942 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
4943 if (OrigArg->getType()->isIntegerTy()) {
4944 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
4945 // An input zero extended from i31 can also be considered sign extended.
4946 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
4947 (BitWidth < 32 && In.Flags.isZExt())) {
4950 LAFI->addSExt32Register(VReg);
4951 }
4952 }
4953 }
4954
4955 return convertLocVTToValVT(DAG, Val, VA, DL);
4956}
4957
4958// The caller is responsible for loading the full value if the argument is
4959// passed with CCValAssign::Indirect.
4961 const CCValAssign &VA, const SDLoc &DL) {
4963 MachineFrameInfo &MFI = MF.getFrameInfo();
4964 EVT ValVT = VA.getValVT();
4965 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
4966 /*IsImmutable=*/true);
4967 SDValue FIN = DAG.getFrameIndex(
4969
4970 ISD::LoadExtType ExtType;
4971 switch (VA.getLocInfo()) {
4972 default:
4973 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4974 case CCValAssign::Full:
4976 case CCValAssign::BCvt:
4977 ExtType = ISD::NON_EXTLOAD;
4978 break;
4979 }
4980 return DAG.getExtLoad(
4981 ExtType, DL, VA.getLocVT(), Chain, FIN,
4983}
4984
4986 const CCValAssign &VA, const SDLoc &DL) {
4987 EVT LocVT = VA.getLocVT();
4988
4989 switch (VA.getLocInfo()) {
4990 default:
4991 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4992 case CCValAssign::Full:
4993 break;
4994 case CCValAssign::BCvt:
4995 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4996 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
4997 else
4998 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
4999 break;
5000 }
5001 return Val;
5002}
5003
5004static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5005 CCValAssign::LocInfo LocInfo,
5006 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5007 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5008 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5009 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5010 static const MCPhysReg GPRList[] = {
5011 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5012 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5013 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5014 if (unsigned Reg = State.AllocateReg(GPRList)) {
5015 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5016 return false;
5017 }
5018 }
5019
5020 if (LocVT == MVT::f32) {
5021 // Pass in STG registers: F1, F2, F3, F4
5022 // fs0,fs1,fs2,fs3
5023 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5024 LoongArch::F26, LoongArch::F27};
5025 if (unsigned Reg = State.AllocateReg(FPR32List)) {
5026 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5027 return false;
5028 }
5029 }
5030
5031 if (LocVT == MVT::f64) {
5032 // Pass in STG registers: D1, D2, D3, D4
5033 // fs4,fs5,fs6,fs7
5034 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5035 LoongArch::F30_64, LoongArch::F31_64};
5036 if (unsigned Reg = State.AllocateReg(FPR64List)) {
5037 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5038 return false;
5039 }
5040 }
5041
5042 report_fatal_error("No registers left in GHC calling convention");
5043 return true;
5044}
5045
5046// Transform physical registers into virtual registers.
5048 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5049 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5050 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5051
5053
5054 switch (CallConv) {
5055 default:
5056 llvm_unreachable("Unsupported calling convention");
5057 case CallingConv::C:
5058 case CallingConv::Fast:
5059 break;
5060 case CallingConv::GHC:
5061 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5062 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5064 "GHC calling convention requires the F and D extensions");
5065 }
5066
5067 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5068 MVT GRLenVT = Subtarget.getGRLenVT();
5069 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5070 // Used with varargs to acumulate store chains.
5071 std::vector<SDValue> OutChains;
5072
5073 // Assign locations to all of the incoming arguments.
5075 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5076
5077 if (CallConv == CallingConv::GHC)
5079 else
5080 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5081
5082 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5083 CCValAssign &VA = ArgLocs[i];
5084 SDValue ArgValue;
5085 if (VA.isRegLoc())
5086 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5087 else
5088 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5089 if (VA.getLocInfo() == CCValAssign::Indirect) {
5090 // If the original argument was split and passed by reference, we need to
5091 // load all parts of it here (using the same address).
5092 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5094 unsigned ArgIndex = Ins[i].OrigArgIndex;
5095 unsigned ArgPartOffset = Ins[i].PartOffset;
5096 assert(ArgPartOffset == 0);
5097 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5098 CCValAssign &PartVA = ArgLocs[i + 1];
5099 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5100 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5101 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5102 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5104 ++i;
5105 }
5106 continue;
5107 }
5108 InVals.push_back(ArgValue);
5109 }
5110
5111 if (IsVarArg) {
5113 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5114 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5115 MachineFrameInfo &MFI = MF.getFrameInfo();
5116 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5117 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5118
5119 // Offset of the first variable argument from stack pointer, and size of
5120 // the vararg save area. For now, the varargs save area is either zero or
5121 // large enough to hold a0-a7.
5122 int VaArgOffset, VarArgsSaveSize;
5123
5124 // If all registers are allocated, then all varargs must be passed on the
5125 // stack and we don't need to save any argregs.
5126 if (ArgRegs.size() == Idx) {
5127 VaArgOffset = CCInfo.getStackSize();
5128 VarArgsSaveSize = 0;
5129 } else {
5130 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5131 VaArgOffset = -VarArgsSaveSize;
5132 }
5133
5134 // Record the frame index of the first variable argument
5135 // which is a value necessary to VASTART.
5136 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5137 LoongArchFI->setVarArgsFrameIndex(FI);
5138
5139 // If saving an odd number of registers then create an extra stack slot to
5140 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5141 // offsets to even-numbered registered remain 2*GRLen-aligned.
5142 if (Idx % 2) {
5143 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5144 true);
5145 VarArgsSaveSize += GRLenInBytes;
5146 }
5147
5148 // Copy the integer registers that may have been used for passing varargs
5149 // to the vararg save area.
5150 for (unsigned I = Idx; I < ArgRegs.size();
5151 ++I, VaArgOffset += GRLenInBytes) {
5152 const Register Reg = RegInfo.createVirtualRegister(RC);
5153 RegInfo.addLiveIn(ArgRegs[I], Reg);
5154 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5155 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5156 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5157 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5159 cast<StoreSDNode>(Store.getNode())
5160 ->getMemOperand()
5161 ->setValue((Value *)nullptr);
5162 OutChains.push_back(Store);
5163 }
5164 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5165 }
5166
5167 // All stores are grouped in one node to allow the matching between
5168 // the size of Ins and InVals. This only happens for vararg functions.
5169 if (!OutChains.empty()) {
5170 OutChains.push_back(Chain);
5171 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5172 }
5173
5174 return Chain;
5175}
5176
5178 return CI->isTailCall();
5179}
5180
5181// Check if the return value is used as only a return value, as otherwise
5182// we can't perform a tail-call.
5184 SDValue &Chain) const {
5185 if (N->getNumValues() != 1)
5186 return false;
5187 if (!N->hasNUsesOfValue(1, 0))
5188 return false;
5189
5190 SDNode *Copy = *N->use_begin();
5191 if (Copy->getOpcode() != ISD::CopyToReg)
5192 return false;
5193
5194 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5195 // isn't safe to perform a tail call.
5196 if (Copy->getGluedNode())
5197 return false;
5198
5199 // The copy must be used by a LoongArchISD::RET, and nothing else.
5200 bool HasRet = false;
5201 for (SDNode *Node : Copy->uses()) {
5202 if (Node->getOpcode() != LoongArchISD::RET)
5203 return false;
5204 HasRet = true;
5205 }
5206
5207 if (!HasRet)
5208 return false;
5209
5210 Chain = Copy->getOperand(0);
5211 return true;
5212}
5213
5214// Check whether the call is eligible for tail call optimization.
5215bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5216 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5217 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5218
5219 auto CalleeCC = CLI.CallConv;
5220 auto &Outs = CLI.Outs;
5221 auto &Caller = MF.getFunction();
5222 auto CallerCC = Caller.getCallingConv();
5223
5224 // Do not tail call opt if the stack is used to pass parameters.
5225 if (CCInfo.getStackSize() != 0)
5226 return false;
5227
5228 // Do not tail call opt if any parameters need to be passed indirectly.
5229 for (auto &VA : ArgLocs)
5230 if (VA.getLocInfo() == CCValAssign::Indirect)
5231 return false;
5232
5233 // Do not tail call opt if either caller or callee uses struct return
5234 // semantics.
5235 auto IsCallerStructRet = Caller.hasStructRetAttr();
5236 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5237 if (IsCallerStructRet || IsCalleeStructRet)
5238 return false;
5239
5240 // Do not tail call opt if either the callee or caller has a byval argument.
5241 for (auto &Arg : Outs)
5242 if (Arg.Flags.isByVal())
5243 return false;
5244
5245 // The callee has to preserve all registers the caller needs to preserve.
5246 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5247 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5248 if (CalleeCC != CallerCC) {
5249 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5250 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5251 return false;
5252 }
5253 return true;
5254}
5255
5257 return DAG.getDataLayout().getPrefTypeAlign(
5258 VT.getTypeForEVT(*DAG.getContext()));
5259}
5260
5261// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5262// and output parameter nodes.
5263SDValue
5265 SmallVectorImpl<SDValue> &InVals) const {
5266 SelectionDAG &DAG = CLI.DAG;
5267 SDLoc &DL = CLI.DL;
5269 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5271 SDValue Chain = CLI.Chain;
5272 SDValue Callee = CLI.Callee;
5273 CallingConv::ID CallConv = CLI.CallConv;
5274 bool IsVarArg = CLI.IsVarArg;
5275 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5276 MVT GRLenVT = Subtarget.getGRLenVT();
5277 bool &IsTailCall = CLI.IsTailCall;
5278
5280
5281 // Analyze the operands of the call, assigning locations to each operand.
5283 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5284
5285 if (CallConv == CallingConv::GHC)
5286 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5287 else
5288 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5289
5290 // Check if it's really possible to do a tail call.
5291 if (IsTailCall)
5292 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5293
5294 if (IsTailCall)
5295 ++NumTailCalls;
5296 else if (CLI.CB && CLI.CB->isMustTailCall())
5297 report_fatal_error("failed to perform tail call elimination on a call "
5298 "site marked musttail");
5299
5300 // Get a count of how many bytes are to be pushed on the stack.
5301 unsigned NumBytes = ArgCCInfo.getStackSize();
5302
5303 // Create local copies for byval args.
5304 SmallVector<SDValue> ByValArgs;
5305 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5306 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5307 if (!Flags.isByVal())
5308 continue;
5309
5310 SDValue Arg = OutVals[i];
5311 unsigned Size = Flags.getByValSize();
5312 Align Alignment = Flags.getNonZeroByValAlign();
5313
5314 int FI =
5315 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5316 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5317 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5318
5319 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5320 /*IsVolatile=*/false,
5321 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5323 ByValArgs.push_back(FIPtr);
5324 }
5325
5326 if (!IsTailCall)
5327 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5328
5329 // Copy argument values to their designated locations.
5331 SmallVector<SDValue> MemOpChains;
5332 SDValue StackPtr;
5333 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5334 CCValAssign &VA = ArgLocs[i];
5335 SDValue ArgValue = OutVals[i];
5336 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5337
5338 // Promote the value if needed.
5339 // For now, only handle fully promoted and indirect arguments.
5340 if (VA.getLocInfo() == CCValAssign::Indirect) {
5341 // Store the argument in a stack slot and pass its address.
5342 Align StackAlign =
5343 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5344 getPrefTypeAlign(ArgValue.getValueType(), DAG));
5345 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5346 // If the original argument was split and passed by reference, we need to
5347 // store the required parts of it here (and pass just one address).
5348 unsigned ArgIndex = Outs[i].OrigArgIndex;
5349 unsigned ArgPartOffset = Outs[i].PartOffset;
5350 assert(ArgPartOffset == 0);
5351 // Calculate the total size to store. We don't have access to what we're
5352 // actually storing other than performing the loop and collecting the
5353 // info.
5355 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5356 SDValue PartValue = OutVals[i + 1];
5357 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5358 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5359 EVT PartVT = PartValue.getValueType();
5360
5361 StoredSize += PartVT.getStoreSize();
5362 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5363 Parts.push_back(std::make_pair(PartValue, Offset));
5364 ++i;
5365 }
5366 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5367 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5368 MemOpChains.push_back(
5369 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5371 for (const auto &Part : Parts) {
5372 SDValue PartValue = Part.first;
5373 SDValue PartOffset = Part.second;
5375 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5376 MemOpChains.push_back(
5377 DAG.getStore(Chain, DL, PartValue, Address,
5379 }
5380 ArgValue = SpillSlot;
5381 } else {
5382 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5383 }
5384
5385 // Use local copy if it is a byval arg.
5386 if (Flags.isByVal())
5387 ArgValue = ByValArgs[j++];
5388
5389 if (VA.isRegLoc()) {
5390 // Queue up the argument copies and emit them at the end.
5391 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5392 } else {
5393 assert(VA.isMemLoc() && "Argument not register or memory");
5394 assert(!IsTailCall && "Tail call not allowed if stack is used "
5395 "for passing parameters");
5396
5397 // Work out the address of the stack slot.
5398 if (!StackPtr.getNode())
5399 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5401 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5403
5404 // Emit the store.
5405 MemOpChains.push_back(
5406 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5407 }
5408 }
5409
5410 // Join the stores, which are independent of one another.
5411 if (!MemOpChains.empty())
5412 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5413
5414 SDValue Glue;
5415
5416 // Build a sequence of copy-to-reg nodes, chained and glued together.
5417 for (auto &Reg : RegsToPass) {
5418 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5419 Glue = Chain.getValue(1);
5420 }
5421
5422 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5423 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5424 // split it and then direct call can be matched by PseudoCALL.
5425 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5426 const GlobalValue *GV = S->getGlobal();
5427 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5430 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5431 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5432 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5435 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5436 }
5437
5438 // The first call operand is the chain and the second is the target address.
5440 Ops.push_back(Chain);
5441 Ops.push_back(Callee);
5442
5443 // Add argument registers to the end of the list so that they are
5444 // known live into the call.
5445 for (auto &Reg : RegsToPass)
5446 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5447
5448 if (!IsTailCall) {
5449 // Add a register mask operand representing the call-preserved registers.
5450 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5451 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5452 assert(Mask && "Missing call preserved mask for calling convention");
5453 Ops.push_back(DAG.getRegisterMask(Mask));
5454 }
5455
5456 // Glue the call to the argument copies, if any.
5457 if (Glue.getNode())
5458 Ops.push_back(Glue);
5459
5460 // Emit the call.
5461 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5462 unsigned Op;
5463 switch (DAG.getTarget().getCodeModel()) {
5464 default:
5465 report_fatal_error("Unsupported code model");
5466 case CodeModel::Small:
5467 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5468 break;
5469 case CodeModel::Medium:
5470 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5472 break;
5473 case CodeModel::Large:
5474 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5476 break;
5477 }
5478
5479 if (IsTailCall) {
5481 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5482 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5483 return Ret;
5484 }
5485
5486 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5487 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5488 Glue = Chain.getValue(1);
5489
5490 // Mark the end of the call, which is glued to the call itself.
5491 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5492 Glue = Chain.getValue(1);
5493
5494 // Assign locations to each value returned by this call.
5496 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5497 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5498
5499 // Copy all of the result registers out of their specified physreg.
5500 for (auto &VA : RVLocs) {
5501 // Copy the value out.
5502 SDValue RetValue =
5503 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5504 // Glue the RetValue to the end of the call sequence.
5505 Chain = RetValue.getValue(1);
5506 Glue = RetValue.getValue(2);
5507
5508 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5509
5510 InVals.push_back(RetValue);
5511 }
5512
5513 return Chain;
5514}
5515
5517 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5518 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5520 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5521
5522 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5523 LoongArchABI::ABI ABI =
5524 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5525 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5526 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5527 nullptr))
5528 return false;
5529 }
5530 return true;
5531}
5532
5534 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5536 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5537 SelectionDAG &DAG) const {
5538 // Stores the assignment of the return value to a location.
5540
5541 // Info about the registers and stack slot.
5542 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5543 *DAG.getContext());
5544
5545 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5546 nullptr, CC_LoongArch);
5547 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5548 report_fatal_error("GHC functions return void only");
5549 SDValue Glue;
5550 SmallVector<SDValue, 4> RetOps(1, Chain);
5551
5552 // Copy the result values into the output registers.
5553 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5554 CCValAssign &VA = RVLocs[i];
5555 assert(VA.isRegLoc() && "Can only return in registers!");
5556
5557 // Handle a 'normal' return.
5558 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5559 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5560
5561 // Guarantee that all emitted copies are stuck together.
5562 Glue = Chain.getValue(1);
5563 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5564 }
5565
5566 RetOps[0] = Chain; // Update chain.
5567
5568 // Add the glue node if we have it.
5569 if (Glue.getNode())
5570 RetOps.push_back(Glue);
5571
5572 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5573}
5574
5575bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5576 bool ForCodeSize) const {
5577 // TODO: Maybe need more checks here after vector extension is supported.
5578 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5579 return false;
5580 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5581 return false;
5582 return (Imm.isZero() || Imm.isExactlyValue(+1.0));
5583}
5584
5586 return true;
5587}
5588
5590 return true;
5591}
5592
5593bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5594 const Instruction *I) const {
5595 if (!Subtarget.is64Bit())
5596 return isa<LoadInst>(I) || isa<StoreInst>(I);
5597
5598 if (isa<LoadInst>(I))
5599 return true;
5600
5601 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5602 // require fences beacuse we can use amswap_db.[w/d].
5603 if (isa<StoreInst>(I)) {
5604 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
5605 return (Size == 8 || Size == 16);
5606 }
5607
5608 return false;
5609}
5610
5612 LLVMContext &Context,
5613 EVT VT) const {
5614 if (!VT.isVector())
5615 return getPointerTy(DL);
5617}
5618
5620 // TODO: Support vectors.
5621 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5622}
5623
5625 const CallInst &I,
5626 MachineFunction &MF,
5627 unsigned Intrinsic) const {
5628 switch (Intrinsic) {
5629 default:
5630 return false;
5631 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5632 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5633 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5634 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5636 Info.memVT = MVT::i32;
5637 Info.ptrVal = I.getArgOperand(0);
5638 Info.offset = 0;
5639 Info.align = Align(4);
5642 return true;
5643 // TODO: Add more Intrinsics later.
5644 }
5645}
5646
5649 // TODO: Add more AtomicRMWInst that needs to be extended.
5650
5651 // Since floating-point operation requires a non-trivial set of data
5652 // operations, use CmpXChg to expand.
5653 if (AI->isFloatingPointOperation() ||
5657
5658 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5659 if (Size == 8 || Size == 16)
5662}
5663
5664static Intrinsic::ID
5666 AtomicRMWInst::BinOp BinOp) {
5667 if (GRLen == 64) {
5668 switch (BinOp) {
5669 default:
5670 llvm_unreachable("Unexpected AtomicRMW BinOp");
5672 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5673 case AtomicRMWInst::Add:
5674 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5675 case AtomicRMWInst::Sub:
5676 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5678 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5680 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5682 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5683 case AtomicRMWInst::Max:
5684 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5685 case AtomicRMWInst::Min:
5686 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5687 // TODO: support other AtomicRMWInst.
5688 }
5689 }
5690
5691 if (GRLen == 32) {
5692 switch (BinOp) {
5693 default:
5694 llvm_unreachable("Unexpected AtomicRMW BinOp");
5696 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5697 case AtomicRMWInst::Add:
5698 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5699 case AtomicRMWInst::Sub:
5700 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5702 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5703 // TODO: support other AtomicRMWInst.
5704 }
5705 }
5706
5707 llvm_unreachable("Unexpected GRLen\n");
5708}
5709
5712 AtomicCmpXchgInst *CI) const {
5714 if (Size == 8 || Size == 16)
5717}
5718
5720 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5721 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5722 AtomicOrdering FailOrd = CI->getFailureOrdering();
5723 Value *FailureOrdering =
5724 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
5725
5726 // TODO: Support cmpxchg on LA32.
5727 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5728 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5729 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5730 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5731 Type *Tys[] = {AlignedAddr->getType()};
5732 Function *MaskedCmpXchg =
5733 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
5734 Value *Result = Builder.CreateCall(
5735 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5736 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5737 return Result;
5738}
5739
5741 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5742 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5743 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5744 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5745 // mask, as this produces better code than the LL/SC loop emitted by
5746 // int_loongarch_masked_atomicrmw_xchg.
5747 if (AI->getOperation() == AtomicRMWInst::Xchg &&
5748 isa<ConstantInt>(AI->getValOperand())) {
5749 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
5750 if (CVal->isZero())
5751 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
5752 Builder.CreateNot(Mask, "Inv_Mask"),
5753 AI->getAlign(), Ord);
5754 if (CVal->isMinusOne())
5755 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
5756 AI->getAlign(), Ord);
5757 }
5758
5759 unsigned GRLen = Subtarget.getGRLen();
5760 Value *Ordering =
5761 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
5762 Type *Tys[] = {AlignedAddr->getType()};
5763 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
5764 AI->getModule(),
5766
5767 if (GRLen == 64) {
5768 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
5769 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5770 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
5771 }
5772
5773 Value *Result;
5774
5775 // Must pass the shift amount needed to sign extend the loaded value prior
5776 // to performing a signed comparison for min/max. ShiftAmt is the number of
5777 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
5778 // is the number of bits to left+right shift the value in order to
5779 // sign-extend.
5780 if (AI->getOperation() == AtomicRMWInst::Min ||
5782 const DataLayout &DL = AI->getDataLayout();
5783 unsigned ValWidth =
5784 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
5785 Value *SextShamt =
5786 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
5787 Result = Builder.CreateCall(LlwOpScwLoop,
5788 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
5789 } else {
5790 Result =
5791 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
5792 }
5793
5794 if (GRLen == 64)
5795 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5796 return Result;
5797}
5798
5800 const MachineFunction &MF, EVT VT) const {
5801 VT = VT.getScalarType();
5802
5803 if (!VT.isSimple())
5804 return false;
5805
5806 switch (VT.getSimpleVT().SimpleTy) {
5807 case MVT::f32:
5808 case MVT::f64:
5809 return true;
5810 default:
5811 break;
5812 }
5813
5814 return false;
5815}
5816
5818 const Constant *PersonalityFn) const {
5819 return LoongArch::R4;
5820}
5821
5823 const Constant *PersonalityFn) const {
5824 return LoongArch::R5;
5825}
5826
5827//===----------------------------------------------------------------------===//
5828// LoongArch Inline Assembly Support
5829//===----------------------------------------------------------------------===//
5830
5832LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
5833 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
5834 //
5835 // 'f': A floating-point register (if available).
5836 // 'k': A memory operand whose address is formed by a base register and
5837 // (optionally scaled) index register.
5838 // 'l': A signed 16-bit constant.
5839 // 'm': A memory operand whose address is formed by a base register and
5840 // offset that is suitable for use in instructions with the same
5841 // addressing mode as st.w and ld.w.
5842 // 'I': A signed 12-bit constant (for arithmetic instructions).
5843 // 'J': Integer zero.
5844 // 'K': An unsigned 12-bit constant (for logic instructions).
5845 // "ZB": An address that is held in a general-purpose register. The offset is
5846 // zero.
5847 // "ZC": A memory operand whose address is formed by a base register and
5848 // offset that is suitable for use in instructions with the same
5849 // addressing mode as ll.w and sc.w.
5850 if (Constraint.size() == 1) {
5851 switch (Constraint[0]) {
5852 default:
5853 break;
5854 case 'f':
5855 return C_RegisterClass;
5856 case 'l':
5857 case 'I':
5858 case 'J':
5859 case 'K':
5860 return C_Immediate;
5861 case 'k':
5862 return C_Memory;
5863 }
5864 }
5865
5866 if (Constraint == "ZC" || Constraint == "ZB")
5867 return C_Memory;
5868
5869 // 'm' is handled here.
5870 return TargetLowering::getConstraintType(Constraint);
5871}
5872
5873InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
5874 StringRef ConstraintCode) const {
5875 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
5880}
5881
5882std::pair<unsigned, const TargetRegisterClass *>
5883LoongArchTargetLowering::getRegForInlineAsmConstraint(
5884 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5885 // First, see if this is a constraint that directly corresponds to a LoongArch
5886 // register class.
5887 if (Constraint.size() == 1) {
5888 switch (Constraint[0]) {
5889 case 'r':
5890 // TODO: Support fixed vectors up to GRLen?
5891 if (VT.isVector())
5892 break;
5893 return std::make_pair(0U, &LoongArch::GPRRegClass);
5894 case 'f':
5895 if (Subtarget.hasBasicF() && VT == MVT::f32)
5896 return std::make_pair(0U, &LoongArch::FPR32RegClass);
5897 if (Subtarget.hasBasicD() && VT == MVT::f64)
5898 return std::make_pair(0U, &LoongArch::FPR64RegClass);
5899 if (Subtarget.hasExtLSX() &&
5900 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
5901 return std::make_pair(0U, &LoongArch::LSX128RegClass);
5902 if (Subtarget.hasExtLASX() &&
5903 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
5904 return std::make_pair(0U, &LoongArch::LASX256RegClass);
5905 break;
5906 default:
5907 break;
5908 }
5909 }
5910
5911 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
5912 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
5913 // constraints while the official register name is prefixed with a '$'. So we
5914 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
5915 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
5916 // case insensitive, so no need to convert the constraint to upper case here.
5917 //
5918 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
5919 // decode the usage of register name aliases into their official names. And
5920 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
5921 // official register names.
5922 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
5923 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
5924 bool IsFP = Constraint[2] == 'f';
5925 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
5926 std::pair<unsigned, const TargetRegisterClass *> R;
5928 TRI, join_items("", Temp.first, Temp.second), VT);
5929 // Match those names to the widest floating point register type available.
5930 if (IsFP) {
5931 unsigned RegNo = R.first;
5932 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
5933 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
5934 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
5935 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
5936 }
5937 }
5938 }
5939 return R;
5940 }
5941
5942 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5943}
5944
5945void LoongArchTargetLowering::LowerAsmOperandForConstraint(
5946 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
5947 SelectionDAG &DAG) const {
5948 // Currently only support length 1 constraints.
5949 if (Constraint.size() == 1) {
5950 switch (Constraint[0]) {
5951 case 'l':
5952 // Validate & create a 16-bit signed immediate operand.
5953 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5954 uint64_t CVal = C->getSExtValue();
5955 if (isInt<16>(CVal))
5956 Ops.push_back(
5957 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5958 }
5959 return;
5960 case 'I':
5961 // Validate & create a 12-bit signed immediate operand.
5962 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5963 uint64_t CVal = C->getSExtValue();
5964 if (isInt<12>(CVal))
5965 Ops.push_back(
5966 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5967 }
5968 return;
5969 case 'J':
5970 // Validate & create an integer zero operand.
5971 if (auto *C = dyn_cast<ConstantSDNode>(Op))
5972 if (C->getZExtValue() == 0)
5973 Ops.push_back(
5974 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
5975 return;
5976 case 'K':
5977 // Validate & create a 12-bit unsigned immediate operand.
5978 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5979 uint64_t CVal = C->getZExtValue();
5980 if (isUInt<12>(CVal))
5981 Ops.push_back(
5982 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5983 }
5984 return;
5985 default:
5986 break;
5987 }
5988 }
5989 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5990}
5991
5992#define GET_REGISTER_MATCHER
5993#include "LoongArchGenAsmMatcher.inc"
5994
5997 const MachineFunction &MF) const {
5998 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
5999 std::string NewRegName = Name.second.str();
6000 Register Reg = MatchRegisterAltName(NewRegName);
6001 if (Reg == LoongArch::NoRegister)
6002 Reg = MatchRegisterName(NewRegName);
6003 if (Reg == LoongArch::NoRegister)
6005 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6006 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6007 if (!ReservedRegs.test(Reg))
6008 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6009 StringRef(RegName) + "\"."));
6010 return Reg;
6011}
6012
6014 EVT VT, SDValue C) const {
6015 // TODO: Support vectors.
6016 if (!VT.isScalarInteger())
6017 return false;
6018
6019 // Omit the optimization if the data size exceeds GRLen.
6020 if (VT.getSizeInBits() > Subtarget.getGRLen())
6021 return false;
6022
6023 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6024 const APInt &Imm = ConstNode->getAPIntValue();
6025 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6026 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6027 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6028 return true;
6029 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6030 if (ConstNode->hasOneUse() &&
6031 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6032 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6033 return true;
6034 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6035 // in which the immediate has two set bits. Or Break (MUL x, imm)
6036 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6037 // equals to (1 << s0) - (1 << s1).
6038 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6039 unsigned Shifts = Imm.countr_zero();
6040 // Reject immediates which can be composed via a single LUI.
6041 if (Shifts >= 12)
6042 return false;
6043 // Reject multiplications can be optimized to
6044 // (SLLI (ALSL x, x, 1/2/3/4), s).
6045 APInt ImmPop = Imm.ashr(Shifts);
6046 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6047 return false;
6048 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6049 // since it needs one more instruction than other 3 cases.
6050 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6051 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6052 (ImmSmall - Imm).isPowerOf2())
6053 return true;
6054 }
6055 }
6056
6057 return false;
6058}
6059
6061 const AddrMode &AM,
6062 Type *Ty, unsigned AS,
6063 Instruction *I) const {
6064 // LoongArch has four basic addressing modes:
6065 // 1. reg
6066 // 2. reg + 12-bit signed offset
6067 // 3. reg + 14-bit signed offset left-shifted by 2
6068 // 4. reg1 + reg2
6069 // TODO: Add more checks after support vector extension.
6070
6071 // No global is ever allowed as a base.
6072 if (AM.BaseGV)
6073 return false;
6074
6075 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6076 // with `UAL` feature.
6077 if (!isInt<12>(AM.BaseOffs) &&
6078 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6079 return false;
6080
6081 switch (AM.Scale) {
6082 case 0:
6083 // "r+i" or just "i", depending on HasBaseReg.
6084 break;
6085 case 1:
6086 // "r+r+i" is not allowed.
6087 if (AM.HasBaseReg && AM.BaseOffs)
6088 return false;
6089 // Otherwise we have "r+r" or "r+i".
6090 break;
6091 case 2:
6092 // "2*r+r" or "2*r+i" is not allowed.
6093 if (AM.HasBaseReg || AM.BaseOffs)
6094 return false;
6095 // Allow "2*r" as "r+r".
6096 break;
6097 default:
6098 return false;
6099 }
6100
6101 return true;
6102}
6103
6105 return isInt<12>(Imm);
6106}
6107
6109 return isInt<12>(Imm);
6110}
6111
6113 // Zexts are free if they can be combined with a load.
6114 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6115 // poorly with type legalization of compares preferring sext.
6116 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6117 EVT MemVT = LD->getMemoryVT();
6118 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6119 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6120 LD->getExtensionType() == ISD::ZEXTLOAD))
6121 return true;
6122 }
6123
6124 return TargetLowering::isZExtFree(Val, VT2);
6125}
6126
6128 EVT DstVT) const {
6129 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6130}
6131
6133 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6134}
6135
6137 // TODO: Support vectors.
6138 if (Y.getValueType().isVector())
6139 return false;
6140
6141 return !isa<ConstantSDNode>(Y);
6142}
6143
6145 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
6146 return ISD::SIGN_EXTEND;
6147}
6148
6150 EVT Type, bool IsSigned) const {
6151 if (Subtarget.is64Bit() && Type == MVT::i32)
6152 return true;
6153
6154 return IsSigned;
6155}
6156
6158 // Return false to suppress the unnecessary extensions if the LibCall
6159 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6160 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6161 Type.getSizeInBits() < Subtarget.getGRLen()))
6162 return false;
6163 return true;
6164}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
const MCPhysReg ArgFPR32s[]
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:586
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:809
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:726
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:724
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:730
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:728
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
bool isFloatingPointOperation() const
Definition: Instructions.h:864
BinOp getOperation() const
Definition: Instructions.h:787
Value * getValOperand()
Definition: Instructions.h:856
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:829
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:218
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:207
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
Argument * getArg(unsigned i) const
Definition: Function.h:849
bool isDSOLocal() const
Definition: GlobalValue.h:305
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2038
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1754
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1349
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1859
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2012
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2417
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:737
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:494
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:747
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:843
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:488
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:489
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:788
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:691
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:783
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:483
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:814
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:501
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:754
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:571
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:591
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ Entry
Definition: COFF.h:811
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1169
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1165
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1198
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:820
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1271
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1276
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:943
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1455
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1031
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:960
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1120
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1099
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1194
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1021
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1254
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1084
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:866
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1251
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1189
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1513
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:193
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:208
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)