LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/IntrinsicsLoongArch.h"
33#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "loongarch-isel-lowering"
42
43STATISTIC(NumTailCalls, "Number of tail calls");
44
53
55 "loongarch-materialize-float-imm", cl::Hidden,
56 cl::desc("Maximum number of instructions used (including code sequence "
57 "to generate the value and moving the value to FPR) when "
58 "materializing floating-point immediates (default = 3)"),
60 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
62 "Materialize FP immediate within 2 instructions"),
64 "Materialize FP immediate within 3 instructions"),
66 "Materialize FP immediate within 4 instructions"),
68 "Materialize FP immediate within 5 instructions"),
70 "Materialize FP immediate within 6 instructions "
71 "(behaves same as 5 on loongarch64)")));
72
73static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
74 cl::desc("Trap on integer division by zero."),
75 cl::init(false));
76
78 const LoongArchSubtarget &STI)
79 : TargetLowering(TM), Subtarget(STI) {
80
81 MVT GRLenVT = Subtarget.getGRLenVT();
82
83 // Set up the register classes.
84
85 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
86 if (Subtarget.hasBasicF())
87 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
88 if (Subtarget.hasBasicD())
89 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
90
91 static const MVT::SimpleValueType LSXVTs[] = {
92 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
93 static const MVT::SimpleValueType LASXVTs[] = {
94 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
95
96 if (Subtarget.hasExtLSX())
97 for (MVT VT : LSXVTs)
98 addRegisterClass(VT, &LoongArch::LSX128RegClass);
99
100 if (Subtarget.hasExtLASX())
101 for (MVT VT : LASXVTs)
102 addRegisterClass(VT, &LoongArch::LASX256RegClass);
103
104 // Set operations for LA32 and LA64.
105
107 MVT::i1, Promote);
108
115
118 GRLenVT, Custom);
119
121
122 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
123 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
126
127 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
128 setOperationAction(ISD::TRAP, MVT::Other, Legal);
129
133
134 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
135
136 // BITREV/REVB requires the 32S feature.
137 if (STI.has32S()) {
138 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
139 // we get to know which of sll and revb.2h is faster.
142
143 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
144 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
145 // and i32 could still be byte-swapped relatively cheaply.
147 } else {
155 }
156
157 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
159 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
163
166
167 // Set operations for LA64 only.
168
169 if (Subtarget.is64Bit()) {
176 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
187
191 Custom);
192 setOperationAction(ISD::LROUND, MVT::i32, Custom);
193 }
194
195 // Set operations for LA32 only.
196
197 if (!Subtarget.is64Bit()) {
203 if (Subtarget.hasBasicD())
204 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
205 }
206
207 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
208
209 static const ISD::CondCode FPCCToExpand[] = {
212
213 // Set operations for 'F' feature.
214
215 if (Subtarget.hasBasicF()) {
216 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
217 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
218 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
219 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
220 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
221
224 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
226 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
227 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
228 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
229 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
234 setOperationAction(ISD::FSIN, MVT::f32, Expand);
235 setOperationAction(ISD::FCOS, MVT::f32, Expand);
236 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
237 setOperationAction(ISD::FPOW, MVT::f32, Expand);
239 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
240 Subtarget.isSoftFPABI() ? LibCall : Custom);
241 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
242 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
244 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
245 Subtarget.isSoftFPABI() ? LibCall : Custom);
246
247 if (Subtarget.is64Bit()) {
248 setOperationAction(ISD::FRINT, MVT::f32, Legal);
249 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
250 }
251
252 if (!Subtarget.hasBasicD()) {
254 if (Subtarget.is64Bit()) {
257 }
258 }
259 }
260
261 // Set operations for 'D' feature.
262
263 if (Subtarget.hasBasicD()) {
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
266 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
269 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
270 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
271
274 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
278 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
279 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
280 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
282 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
284 setOperationAction(ISD::FSIN, MVT::f64, Expand);
285 setOperationAction(ISD::FCOS, MVT::f64, Expand);
286 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
287 setOperationAction(ISD::FPOW, MVT::f64, Expand);
289 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
290 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
291 Subtarget.isSoftFPABI() ? LibCall : Custom);
292 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
293 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
294 Subtarget.isSoftFPABI() ? LibCall : Custom);
295
296 if (Subtarget.is64Bit()) {
297 setOperationAction(ISD::FRINT, MVT::f64, Legal);
298 setOperationAction(ISD::FLOG2, MVT::f64, Legal);
299 }
300 }
301
302 // Set operations for 'LSX' feature.
303
304 if (Subtarget.hasExtLSX()) {
306 // Expand all truncating stores and extending loads.
307 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
308 setTruncStoreAction(VT, InnerVT, Expand);
311 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
312 }
313 // By default everything must be expanded. Then we will selectively turn
314 // on ones that can be effectively codegen'd.
315 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
317 }
318
319 for (MVT VT : LSXVTs) {
320 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
321 setOperationAction(ISD::BITCAST, VT, Legal);
323
327
332 }
333 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
336 Legal);
338 VT, Legal);
345 Expand);
354 }
355 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
357 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
359 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
362 }
363 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
367 setOperationAction(ISD::FSQRT, VT, Legal);
368 setOperationAction(ISD::FNEG, VT, Legal);
369 setOperationAction(ISD::FLOG2, VT, Legal);
372 VT, Expand);
374 setOperationAction(ISD::FCEIL, VT, Legal);
375 setOperationAction(ISD::FFLOOR, VT, Legal);
376 setOperationAction(ISD::FTRUNC, VT, Legal);
377 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
378 setOperationAction(ISD::FMINNUM, VT, Legal);
379 setOperationAction(ISD::FMAXNUM, VT, Legal);
380 }
382 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
384 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
385 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
386
387 for (MVT VT :
388 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
389 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
391 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
392 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
393 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
394 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
395 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
396 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
397 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
398 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
399 }
400 }
401
402 // Set operations for 'LASX' feature.
403
404 if (Subtarget.hasExtLASX()) {
405 for (MVT VT : LASXVTs) {
406 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
407 setOperationAction(ISD::BITCAST, VT, Legal);
409
415
419 }
420 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
423 Legal);
425 VT, Legal);
432 Expand);
441 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
455 setOperationAction(ISD::FSQRT, VT, Legal);
456 setOperationAction(ISD::FNEG, VT, Legal);
457 setOperationAction(ISD::FLOG2, VT, Legal);
460 VT, Expand);
462 setOperationAction(ISD::FCEIL, VT, Legal);
463 setOperationAction(ISD::FFLOOR, VT, Legal);
464 setOperationAction(ISD::FTRUNC, VT, Legal);
465 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
466 setOperationAction(ISD::FMINNUM, VT, Legal);
467 setOperationAction(ISD::FMAXNUM, VT, Legal);
468 }
469 }
470
471 // Set DAG combine for LA32 and LA64.
472
477
478 // Set DAG combine for 'LSX' feature.
479
480 if (Subtarget.hasExtLSX()) {
482 setTargetDAGCombine(ISD::BITCAST);
483 }
484
485 // Set DAG combine for 'LASX' feature.
486
487 if (Subtarget.hasExtLASX())
489
490 // Compute derived properties from the register classes.
491 computeRegisterProperties(Subtarget.getRegisterInfo());
492
494
497
498 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
499
501
502 // Function alignments.
504 // Set preferred alignments.
505 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
506 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
507 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
508
509 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
510 if (Subtarget.hasLAMCAS())
512
513 if (Subtarget.hasSCQ()) {
515 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
516 }
517}
518
520 const GlobalAddressSDNode *GA) const {
521 // In order to maximise the opportunity for common subexpression elimination,
522 // keep a separate ADD node for the global address offset instead of folding
523 // it in the global address node. Later peephole optimisations may choose to
524 // fold it back in when profitable.
525 return false;
526}
527
529 SelectionDAG &DAG) const {
530 switch (Op.getOpcode()) {
531 case ISD::ATOMIC_FENCE:
532 return lowerATOMIC_FENCE(Op, DAG);
534 return lowerEH_DWARF_CFA(Op, DAG);
536 return lowerGlobalAddress(Op, DAG);
538 return lowerGlobalTLSAddress(Op, DAG);
540 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
542 return lowerINTRINSIC_W_CHAIN(Op, DAG);
544 return lowerINTRINSIC_VOID(Op, DAG);
546 return lowerBlockAddress(Op, DAG);
547 case ISD::JumpTable:
548 return lowerJumpTable(Op, DAG);
549 case ISD::SHL_PARTS:
550 return lowerShiftLeftParts(Op, DAG);
551 case ISD::SRA_PARTS:
552 return lowerShiftRightParts(Op, DAG, true);
553 case ISD::SRL_PARTS:
554 return lowerShiftRightParts(Op, DAG, false);
556 return lowerConstantPool(Op, DAG);
557 case ISD::FP_TO_SINT:
558 return lowerFP_TO_SINT(Op, DAG);
559 case ISD::BITCAST:
560 return lowerBITCAST(Op, DAG);
561 case ISD::UINT_TO_FP:
562 return lowerUINT_TO_FP(Op, DAG);
563 case ISD::SINT_TO_FP:
564 return lowerSINT_TO_FP(Op, DAG);
565 case ISD::VASTART:
566 return lowerVASTART(Op, DAG);
567 case ISD::FRAMEADDR:
568 return lowerFRAMEADDR(Op, DAG);
569 case ISD::RETURNADDR:
570 return lowerRETURNADDR(Op, DAG);
572 return lowerWRITE_REGISTER(Op, DAG);
574 return lowerINSERT_VECTOR_ELT(Op, DAG);
576 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
578 return lowerBUILD_VECTOR(Op, DAG);
580 return lowerCONCAT_VECTORS(Op, DAG);
582 return lowerVECTOR_SHUFFLE(Op, DAG);
583 case ISD::BITREVERSE:
584 return lowerBITREVERSE(Op, DAG);
586 return lowerSCALAR_TO_VECTOR(Op, DAG);
587 case ISD::PREFETCH:
588 return lowerPREFETCH(Op, DAG);
589 case ISD::SELECT:
590 return lowerSELECT(Op, DAG);
591 case ISD::BRCOND:
592 return lowerBRCOND(Op, DAG);
593 case ISD::FP_TO_FP16:
594 return lowerFP_TO_FP16(Op, DAG);
595 case ISD::FP16_TO_FP:
596 return lowerFP16_TO_FP(Op, DAG);
597 case ISD::FP_TO_BF16:
598 return lowerFP_TO_BF16(Op, DAG);
599 case ISD::BF16_TO_FP:
600 return lowerBF16_TO_FP(Op, DAG);
601 case ISD::VECREDUCE_ADD:
602 return lowerVECREDUCE_ADD(Op, DAG);
603 case ISD::VECREDUCE_AND:
604 case ISD::VECREDUCE_OR:
605 case ISD::VECREDUCE_XOR:
606 case ISD::VECREDUCE_SMAX:
607 case ISD::VECREDUCE_SMIN:
608 case ISD::VECREDUCE_UMAX:
609 case ISD::VECREDUCE_UMIN:
610 return lowerVECREDUCE(Op, DAG);
611 case ISD::ConstantFP:
612 return lowerConstantFP(Op, DAG);
613 }
614 return SDValue();
615}
616
617SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
618 SelectionDAG &DAG) const {
619 EVT VT = Op.getValueType();
621 const APFloat &FPVal = CFP->getValueAPF();
622 SDLoc DL(CFP);
623
624 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
625 (VT == MVT::f64 && Subtarget.hasBasicD()));
626
627 // If value is 0.0 or -0.0, just ignore it.
628 if (FPVal.isZero())
629 return SDValue();
630
631 // If lsx enabled, use cheaper 'vldi' instruction if possible.
632 if (isFPImmVLDILegal(FPVal, VT))
633 return SDValue();
634
635 // Construct as integer, and move to float register.
636 APInt INTVal = FPVal.bitcastToAPInt();
637
638 // If more than MaterializeFPImmInsNum instructions will be used to
639 // generate the INTVal and move it to float register, fallback to
640 // use floating point load from the constant pool.
642 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
643 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
644 return SDValue();
645
646 switch (VT.getSimpleVT().SimpleTy) {
647 default:
648 llvm_unreachable("Unexpected floating point type!");
649 break;
650 case MVT::f32: {
651 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
652 if (Subtarget.is64Bit())
653 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
654 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
656 DL, VT, NewVal);
657 }
658 case MVT::f64: {
659 if (Subtarget.is64Bit()) {
660 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
661 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
662 }
663 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
664 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
665 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
666 }
667 }
668
669 return SDValue();
670}
671
672// Lower vecreduce_add using vhaddw instructions.
673// For Example:
674// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
675// can be lowered to:
676// VHADDW_D_W vr0, vr0, vr0
677// VHADDW_Q_D vr0, vr0, vr0
678// VPICKVE2GR_D a0, vr0, 0
679// ADDI_W a0, a0, 0
680SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
681 SelectionDAG &DAG) const {
682
683 SDLoc DL(Op);
684 MVT OpVT = Op.getSimpleValueType();
685 SDValue Val = Op.getOperand(0);
686
687 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
688 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
689 unsigned ResBits = OpVT.getScalarSizeInBits();
690
691 unsigned LegalVecSize = 128;
692 bool isLASX256Vector =
693 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
694
695 // Ensure operand type legal or enable it legal.
696 while (!isTypeLegal(Val.getSimpleValueType())) {
697 Val = DAG.WidenVector(Val, DL);
698 }
699
700 // NumEles is designed for iterations count, v4i32 for LSX
701 // and v8i32 for LASX should have the same count.
702 if (isLASX256Vector) {
703 NumEles /= 2;
704 LegalVecSize = 256;
705 }
706
707 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
708 MVT IntTy = MVT::getIntegerVT(EleBits);
709 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
710 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
711 }
712
713 if (isLASX256Vector) {
714 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
715 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
716 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
717 }
718
719 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
720 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
721 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
722}
723
724// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
725// For Example:
726// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
727// can be lowered to:
728// VBSRL_V vr1, vr0, 8
729// VMAX_W vr0, vr1, vr0
730// VBSRL_V vr1, vr0, 4
731// VMAX_W vr0, vr1, vr0
732// VPICKVE2GR_W a0, vr0, 0
733// For 256 bit vector, it is illegal and will be spilt into
734// two 128 bit vector by default then processed by this.
735SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
736 SelectionDAG &DAG) const {
737 SDLoc DL(Op);
738
739 MVT OpVT = Op.getSimpleValueType();
740 SDValue Val = Op.getOperand(0);
741
742 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
743 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
744
745 // Ensure operand type legal or enable it legal.
746 while (!isTypeLegal(Val.getSimpleValueType())) {
747 Val = DAG.WidenVector(Val, DL);
748 }
749
750 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
751 MVT VecTy = Val.getSimpleValueType();
752 MVT GRLenVT = Subtarget.getGRLenVT();
753
754 for (int i = NumEles; i > 1; i /= 2) {
755 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
756 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
757 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
758 }
759
760 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
761 DAG.getConstant(0, DL, GRLenVT));
762}
763
764SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
765 SelectionDAG &DAG) const {
766 unsigned IsData = Op.getConstantOperandVal(4);
767
768 // We don't support non-data prefetch.
769 // Just preserve the chain.
770 if (!IsData)
771 return Op.getOperand(0);
772
773 return Op;
774}
775
776// Return true if Val is equal to (setcc LHS, RHS, CC).
777// Return false if Val is the inverse of (setcc LHS, RHS, CC).
778// Otherwise, return std::nullopt.
779static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
780 ISD::CondCode CC, SDValue Val) {
781 assert(Val->getOpcode() == ISD::SETCC);
782 SDValue LHS2 = Val.getOperand(0);
783 SDValue RHS2 = Val.getOperand(1);
784 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
785
786 if (LHS == LHS2 && RHS == RHS2) {
787 if (CC == CC2)
788 return true;
789 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
790 return false;
791 } else if (LHS == RHS2 && RHS == LHS2) {
793 if (CC == CC2)
794 return true;
795 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
796 return false;
797 }
798
799 return std::nullopt;
800}
801
803 const LoongArchSubtarget &Subtarget) {
804 SDValue CondV = N->getOperand(0);
805 SDValue TrueV = N->getOperand(1);
806 SDValue FalseV = N->getOperand(2);
807 MVT VT = N->getSimpleValueType(0);
808 SDLoc DL(N);
809
810 // (select c, -1, y) -> -c | y
811 if (isAllOnesConstant(TrueV)) {
812 SDValue Neg = DAG.getNegative(CondV, DL, VT);
813 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
814 }
815 // (select c, y, -1) -> (c-1) | y
816 if (isAllOnesConstant(FalseV)) {
817 SDValue Neg =
818 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
819 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
820 }
821
822 // (select c, 0, y) -> (c-1) & y
823 if (isNullConstant(TrueV)) {
824 SDValue Neg =
825 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
826 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
827 }
828 // (select c, y, 0) -> -c & y
829 if (isNullConstant(FalseV)) {
830 SDValue Neg = DAG.getNegative(CondV, DL, VT);
831 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
832 }
833
834 // select c, ~x, x --> xor -c, x
835 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
836 const APInt &TrueVal = TrueV->getAsAPIntVal();
837 const APInt &FalseVal = FalseV->getAsAPIntVal();
838 if (~TrueVal == FalseVal) {
839 SDValue Neg = DAG.getNegative(CondV, DL, VT);
840 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
841 }
842 }
843
844 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
845 // when both truev and falsev are also setcc.
846 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
847 FalseV.getOpcode() == ISD::SETCC) {
848 SDValue LHS = CondV.getOperand(0);
849 SDValue RHS = CondV.getOperand(1);
850 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
851
852 // (select x, x, y) -> x | y
853 // (select !x, x, y) -> x & y
854 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
855 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
856 DAG.getFreeze(FalseV));
857 }
858 // (select x, y, x) -> x & y
859 // (select !x, y, x) -> x | y
860 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
861 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
862 DAG.getFreeze(TrueV), FalseV);
863 }
864 }
865
866 return SDValue();
867}
868
869// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
870// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
871// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
872// being `0` or `-1`. In such cases we can replace `select` with `and`.
873// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
874// than `c0`?
875static SDValue
877 const LoongArchSubtarget &Subtarget) {
878 unsigned SelOpNo = 0;
879 SDValue Sel = BO->getOperand(0);
880 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
881 SelOpNo = 1;
882 Sel = BO->getOperand(1);
883 }
884
885 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
886 return SDValue();
887
888 unsigned ConstSelOpNo = 1;
889 unsigned OtherSelOpNo = 2;
890 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
891 ConstSelOpNo = 2;
892 OtherSelOpNo = 1;
893 }
894 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
895 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
896 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
897 return SDValue();
898
899 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
900 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
901 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
902 return SDValue();
903
904 SDLoc DL(Sel);
905 EVT VT = BO->getValueType(0);
906
907 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
908 if (SelOpNo == 1)
909 std::swap(NewConstOps[0], NewConstOps[1]);
910
911 SDValue NewConstOp =
912 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
913 if (!NewConstOp)
914 return SDValue();
915
916 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
917 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
918 return SDValue();
919
920 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
921 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
922 if (SelOpNo == 1)
923 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
924 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
925
926 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
927 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
928 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
929}
930
931// Changes the condition code and swaps operands if necessary, so the SetCC
932// operation matches one of the comparisons supported directly by branches
933// in the LoongArch ISA. May adjust compares to favor compare with 0 over
934// compare with 1/-1.
936 ISD::CondCode &CC, SelectionDAG &DAG) {
937 // If this is a single bit test that can't be handled by ANDI, shift the
938 // bit to be tested to the MSB and perform a signed compare with 0.
939 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
940 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
941 isa<ConstantSDNode>(LHS.getOperand(1))) {
942 uint64_t Mask = LHS.getConstantOperandVal(1);
943 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
944 unsigned ShAmt = 0;
945 if (isPowerOf2_64(Mask)) {
946 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
947 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
948 } else {
949 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
950 }
951
952 LHS = LHS.getOperand(0);
953 if (ShAmt != 0)
954 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
955 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
956 return;
957 }
958 }
959
960 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
961 int64_t C = RHSC->getSExtValue();
962 switch (CC) {
963 default:
964 break;
965 case ISD::SETGT:
966 // Convert X > -1 to X >= 0.
967 if (C == -1) {
968 RHS = DAG.getConstant(0, DL, RHS.getValueType());
969 CC = ISD::SETGE;
970 return;
971 }
972 break;
973 case ISD::SETLT:
974 // Convert X < 1 to 0 >= X.
975 if (C == 1) {
976 RHS = LHS;
977 LHS = DAG.getConstant(0, DL, RHS.getValueType());
978 CC = ISD::SETGE;
979 return;
980 }
981 break;
982 }
983 }
984
985 switch (CC) {
986 default:
987 break;
988 case ISD::SETGT:
989 case ISD::SETLE:
990 case ISD::SETUGT:
991 case ISD::SETULE:
993 std::swap(LHS, RHS);
994 break;
995 }
996}
997
998SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
999 SelectionDAG &DAG) const {
1000 SDValue CondV = Op.getOperand(0);
1001 SDValue TrueV = Op.getOperand(1);
1002 SDValue FalseV = Op.getOperand(2);
1003 SDLoc DL(Op);
1004 MVT VT = Op.getSimpleValueType();
1005 MVT GRLenVT = Subtarget.getGRLenVT();
1006
1007 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1008 return V;
1009
1010 if (Op.hasOneUse()) {
1011 unsigned UseOpc = Op->user_begin()->getOpcode();
1012 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1013 SDNode *BinOp = *Op->user_begin();
1014 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1015 DAG, Subtarget)) {
1016 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1017 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1018 // may return a constant node and cause crash in lowerSELECT.
1019 if (NewSel.getOpcode() == ISD::SELECT)
1020 return lowerSELECT(NewSel, DAG);
1021 return NewSel;
1022 }
1023 }
1024 }
1025
1026 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1027 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1028 // (select condv, truev, falsev)
1029 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1030 if (CondV.getOpcode() != ISD::SETCC ||
1031 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1032 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1033 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1034
1035 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1036
1037 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1038 }
1039
1040 // If the CondV is the output of a SETCC node which operates on GRLenVT
1041 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1042 // to take advantage of the integer compare+branch instructions. i.e.: (select
1043 // (setcc lhs, rhs, cc), truev, falsev)
1044 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1045 SDValue LHS = CondV.getOperand(0);
1046 SDValue RHS = CondV.getOperand(1);
1047 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1048
1049 // Special case for a select of 2 constants that have a difference of 1.
1050 // Normally this is done by DAGCombine, but if the select is introduced by
1051 // type legalization or op legalization, we miss it. Restricting to SETLT
1052 // case for now because that is what signed saturating add/sub need.
1053 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1054 // but we would probably want to swap the true/false values if the condition
1055 // is SETGE/SETLE to avoid an XORI.
1056 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1057 CCVal == ISD::SETLT) {
1058 const APInt &TrueVal = TrueV->getAsAPIntVal();
1059 const APInt &FalseVal = FalseV->getAsAPIntVal();
1060 if (TrueVal - 1 == FalseVal)
1061 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1062 if (TrueVal + 1 == FalseVal)
1063 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1064 }
1065
1066 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1067 // 1 < x ? x : 1 -> 0 < x ? x : 1
1068 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1069 RHS == TrueV && LHS == FalseV) {
1070 LHS = DAG.getConstant(0, DL, VT);
1071 // 0 <u x is the same as x != 0.
1072 if (CCVal == ISD::SETULT) {
1073 std::swap(LHS, RHS);
1074 CCVal = ISD::SETNE;
1075 }
1076 }
1077
1078 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1079 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1080 RHS == FalseV) {
1081 RHS = DAG.getConstant(0, DL, VT);
1082 }
1083
1084 SDValue TargetCC = DAG.getCondCode(CCVal);
1085
1086 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1087 // (select (setcc lhs, rhs, CC), constant, falsev)
1088 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1089 std::swap(TrueV, FalseV);
1090 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1091 }
1092
1093 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1094 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1095}
1096
1097SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1098 SelectionDAG &DAG) const {
1099 SDValue CondV = Op.getOperand(1);
1100 SDLoc DL(Op);
1101 MVT GRLenVT = Subtarget.getGRLenVT();
1102
1103 if (CondV.getOpcode() == ISD::SETCC) {
1104 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1105 SDValue LHS = CondV.getOperand(0);
1106 SDValue RHS = CondV.getOperand(1);
1107 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1108
1109 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1110
1111 SDValue TargetCC = DAG.getCondCode(CCVal);
1112 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1113 Op.getOperand(0), LHS, RHS, TargetCC,
1114 Op.getOperand(2));
1115 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1116 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1117 Op.getOperand(0), CondV, Op.getOperand(2));
1118 }
1119 }
1120
1121 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1122 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1123 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1124}
1125
1126SDValue
1127LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1128 SelectionDAG &DAG) const {
1129 SDLoc DL(Op);
1130 MVT OpVT = Op.getSimpleValueType();
1131
1132 SDValue Vector = DAG.getUNDEF(OpVT);
1133 SDValue Val = Op.getOperand(0);
1134 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1135
1136 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1137}
1138
1139SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1140 SelectionDAG &DAG) const {
1141 EVT ResTy = Op->getValueType(0);
1142 SDValue Src = Op->getOperand(0);
1143 SDLoc DL(Op);
1144
1145 // LoongArchISD::BITREV_8B is not supported on LA32.
1146 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1147 return SDValue();
1148
1149 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1150 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1151 unsigned int NewEltNum = NewVT.getVectorNumElements();
1152
1153 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1154
1156 for (unsigned int i = 0; i < NewEltNum; i++) {
1157 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1158 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1159 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1160 ? (unsigned)LoongArchISD::BITREV_8B
1161 : (unsigned)ISD::BITREVERSE;
1162 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1163 }
1164 SDValue Res =
1165 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1166
1167 switch (ResTy.getSimpleVT().SimpleTy) {
1168 default:
1169 return SDValue();
1170 case MVT::v16i8:
1171 case MVT::v32i8:
1172 return Res;
1173 case MVT::v8i16:
1174 case MVT::v16i16:
1175 case MVT::v4i32:
1176 case MVT::v8i32: {
1178 for (unsigned int i = 0; i < NewEltNum; i++)
1179 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1180 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1181 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1182 }
1183 }
1184}
1185
1186// Widen element type to get a new mask value (if possible).
1187// For example:
1188// shufflevector <4 x i32> %a, <4 x i32> %b,
1189// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1190// is equivalent to:
1191// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1192// can be lowered to:
1193// VPACKOD_D vr0, vr0, vr1
1195 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1196 unsigned EltBits = VT.getScalarSizeInBits();
1197
1198 if (EltBits > 32 || EltBits == 1)
1199 return SDValue();
1200
1201 SmallVector<int, 8> NewMask;
1202 if (widenShuffleMaskElts(Mask, NewMask)) {
1203 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1204 : MVT::getIntegerVT(EltBits * 2);
1205 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1206 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1207 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1208 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1209 return DAG.getBitcast(
1210 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1211 }
1212 }
1213
1214 return SDValue();
1215}
1216
1217/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1218/// instruction.
1219// The funciton matches elements from one of the input vector shuffled to the
1220// left or right with zeroable elements 'shifted in'. It handles both the
1221// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1222// lane.
1223// Mostly copied from X86.
1224static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1225 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1226 int MaskOffset, const APInt &Zeroable) {
1227 int Size = Mask.size();
1228 unsigned SizeInBits = Size * ScalarSizeInBits;
1229
1230 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1231 for (int i = 0; i < Size; i += Scale)
1232 for (int j = 0; j < Shift; ++j)
1233 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1234 return false;
1235
1236 return true;
1237 };
1238
1239 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1240 int Step = 1) {
1241 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1242 if (!(Mask[i] == -1 || Mask[i] == Low))
1243 return false;
1244 return true;
1245 };
1246
1247 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1248 for (int i = 0; i != Size; i += Scale) {
1249 unsigned Pos = Left ? i + Shift : i;
1250 unsigned Low = Left ? i : i + Shift;
1251 unsigned Len = Scale - Shift;
1252 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1253 return -1;
1254 }
1255
1256 int ShiftEltBits = ScalarSizeInBits * Scale;
1257 bool ByteShift = ShiftEltBits > 64;
1258 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1259 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1260 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1261
1262 // Normalize the scale for byte shifts to still produce an i64 element
1263 // type.
1264 Scale = ByteShift ? Scale / 2 : Scale;
1265
1266 // We need to round trip through the appropriate type for the shift.
1267 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1268 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1269 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1270 return (int)ShiftAmt;
1271 };
1272
1273 unsigned MaxWidth = 128;
1274 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1275 for (int Shift = 1; Shift != Scale; ++Shift)
1276 for (bool Left : {true, false})
1277 if (CheckZeros(Shift, Scale, Left)) {
1278 int ShiftAmt = MatchShift(Shift, Scale, Left);
1279 if (0 < ShiftAmt)
1280 return ShiftAmt;
1281 }
1282
1283 // no match
1284 return -1;
1285}
1286
1287/// Lower VECTOR_SHUFFLE as shift (if possible).
1288///
1289/// For example:
1290/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1291/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1292/// is lowered to:
1293/// (VBSLL_V $v0, $v0, 4)
1294///
1295/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1296/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1297/// is lowered to:
1298/// (VSLLI_D $v0, $v0, 32)
1300 MVT VT, SDValue V1, SDValue V2,
1301 SelectionDAG &DAG,
1302 const LoongArchSubtarget &Subtarget,
1303 const APInt &Zeroable) {
1304 int Size = Mask.size();
1305 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1306
1307 MVT ShiftVT;
1308 SDValue V = V1;
1309 unsigned Opcode;
1310
1311 // Try to match shuffle against V1 shift.
1312 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1313 Mask, 0, Zeroable);
1314
1315 // If V1 failed, try to match shuffle against V2 shift.
1316 if (ShiftAmt < 0) {
1317 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1318 Mask, Size, Zeroable);
1319 V = V2;
1320 }
1321
1322 if (ShiftAmt < 0)
1323 return SDValue();
1324
1325 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1326 "Illegal integer vector type");
1327 V = DAG.getBitcast(ShiftVT, V);
1328 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1329 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1330 return DAG.getBitcast(VT, V);
1331}
1332
1333/// Determine whether a range fits a regular pattern of values.
1334/// This function accounts for the possibility of jumping over the End iterator.
1335template <typename ValType>
1336static bool
1338 unsigned CheckStride,
1340 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1341 auto &I = Begin;
1342
1343 while (I != End) {
1344 if (*I != -1 && *I != ExpectedIndex)
1345 return false;
1346 ExpectedIndex += ExpectedIndexStride;
1347
1348 // Incrementing past End is undefined behaviour so we must increment one
1349 // step at a time and check for End at each step.
1350 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1351 ; // Empty loop body.
1352 }
1353 return true;
1354}
1355
1356/// Compute whether each element of a shuffle is zeroable.
1357///
1358/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1360 SDValue V2, APInt &KnownUndef,
1361 APInt &KnownZero) {
1362 int Size = Mask.size();
1363 KnownUndef = KnownZero = APInt::getZero(Size);
1364
1365 V1 = peekThroughBitcasts(V1);
1366 V2 = peekThroughBitcasts(V2);
1367
1368 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1369 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1370
1371 int VectorSizeInBits = V1.getValueSizeInBits();
1372 int ScalarSizeInBits = VectorSizeInBits / Size;
1373 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1374 (void)ScalarSizeInBits;
1375
1376 for (int i = 0; i < Size; ++i) {
1377 int M = Mask[i];
1378 if (M < 0) {
1379 KnownUndef.setBit(i);
1380 continue;
1381 }
1382 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1383 KnownZero.setBit(i);
1384 continue;
1385 }
1386 }
1387}
1388
1389/// Test whether a shuffle mask is equivalent within each sub-lane.
1390///
1391/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1392/// non-trivial to compute in the face of undef lanes. The representation is
1393/// suitable for use with existing 128-bit shuffles as entries from the second
1394/// vector have been remapped to [LaneSize, 2*LaneSize).
1395static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1396 ArrayRef<int> Mask,
1397 SmallVectorImpl<int> &RepeatedMask) {
1398 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1399 RepeatedMask.assign(LaneSize, -1);
1400 int Size = Mask.size();
1401 for (int i = 0; i < Size; ++i) {
1402 assert(Mask[i] == -1 || Mask[i] >= 0);
1403 if (Mask[i] < 0)
1404 continue;
1405 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1406 // This entry crosses lanes, so there is no way to model this shuffle.
1407 return false;
1408
1409 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1410 // Adjust second vector indices to start at LaneSize instead of Size.
1411 int LocalM =
1412 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1413 if (RepeatedMask[i % LaneSize] < 0)
1414 // This is the first non-undef entry in this slot of a 128-bit lane.
1415 RepeatedMask[i % LaneSize] = LocalM;
1416 else if (RepeatedMask[i % LaneSize] != LocalM)
1417 // Found a mismatch with the repeated mask.
1418 return false;
1419 }
1420 return true;
1421}
1422
1423/// Attempts to match vector shuffle as byte rotation.
1425 ArrayRef<int> Mask) {
1426
1427 SDValue Lo, Hi;
1428 SmallVector<int, 16> RepeatedMask;
1429
1430 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1431 return -1;
1432
1433 int NumElts = RepeatedMask.size();
1434 int Rotation = 0;
1435 int Scale = 16 / NumElts;
1436
1437 for (int i = 0; i < NumElts; ++i) {
1438 int M = RepeatedMask[i];
1439 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1440 "Unexpected mask index.");
1441 if (M < 0)
1442 continue;
1443
1444 // Determine where a rotated vector would have started.
1445 int StartIdx = i - (M % NumElts);
1446 if (StartIdx == 0)
1447 return -1;
1448
1449 // If we found the tail of a vector the rotation must be the missing
1450 // front. If we found the head of a vector, it must be how much of the
1451 // head.
1452 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1453
1454 if (Rotation == 0)
1455 Rotation = CandidateRotation;
1456 else if (Rotation != CandidateRotation)
1457 return -1;
1458
1459 // Compute which value this mask is pointing at.
1460 SDValue MaskV = M < NumElts ? V1 : V2;
1461
1462 // Compute which of the two target values this index should be assigned
1463 // to. This reflects whether the high elements are remaining or the low
1464 // elements are remaining.
1465 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1466
1467 // Either set up this value if we've not encountered it before, or check
1468 // that it remains consistent.
1469 if (!TargetV)
1470 TargetV = MaskV;
1471 else if (TargetV != MaskV)
1472 return -1;
1473 }
1474
1475 // Check that we successfully analyzed the mask, and normalize the results.
1476 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1477 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1478 if (!Lo)
1479 Lo = Hi;
1480 else if (!Hi)
1481 Hi = Lo;
1482
1483 V1 = Lo;
1484 V2 = Hi;
1485
1486 return Rotation * Scale;
1487}
1488
1489/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1490///
1491/// For example:
1492/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1493/// <2 x i32> <i32 3, i32 0>
1494/// is lowered to:
1495/// (VBSRL_V $v1, $v1, 8)
1496/// (VBSLL_V $v0, $v0, 8)
1497/// (VOR_V $v0, $V0, $v1)
1498static SDValue
1500 SDValue V1, SDValue V2, SelectionDAG &DAG,
1501 const LoongArchSubtarget &Subtarget) {
1502
1503 SDValue Lo = V1, Hi = V2;
1504 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1505 if (ByteRotation <= 0)
1506 return SDValue();
1507
1508 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1509 Lo = DAG.getBitcast(ByteVT, Lo);
1510 Hi = DAG.getBitcast(ByteVT, Hi);
1511
1512 int LoByteShift = 16 - ByteRotation;
1513 int HiByteShift = ByteRotation;
1514 MVT GRLenVT = Subtarget.getGRLenVT();
1515
1516 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1517 DAG.getConstant(LoByteShift, DL, GRLenVT));
1518 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1519 DAG.getConstant(HiByteShift, DL, GRLenVT));
1520 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1521}
1522
1523/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1524///
1525/// For example:
1526/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1527/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1528/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1529/// is lowered to:
1530/// (VREPLI $v1, 0)
1531/// (VILVL $v0, $v1, $v0)
1533 ArrayRef<int> Mask, MVT VT,
1534 SDValue V1, SDValue V2,
1535 SelectionDAG &DAG,
1536 const APInt &Zeroable) {
1537 int Bits = VT.getSizeInBits();
1538 int EltBits = VT.getScalarSizeInBits();
1539 int NumElements = VT.getVectorNumElements();
1540
1541 if (Zeroable.isAllOnes())
1542 return DAG.getConstant(0, DL, VT);
1543
1544 // Define a helper function to check a particular ext-scale and lower to it if
1545 // valid.
1546 auto Lower = [&](int Scale) -> SDValue {
1547 SDValue InputV;
1548 bool AnyExt = true;
1549 int Offset = 0;
1550 for (int i = 0; i < NumElements; i++) {
1551 int M = Mask[i];
1552 if (M < 0)
1553 continue;
1554 if (i % Scale != 0) {
1555 // Each of the extended elements need to be zeroable.
1556 if (!Zeroable[i])
1557 return SDValue();
1558
1559 AnyExt = false;
1560 continue;
1561 }
1562
1563 // Each of the base elements needs to be consecutive indices into the
1564 // same input vector.
1565 SDValue V = M < NumElements ? V1 : V2;
1566 M = M % NumElements;
1567 if (!InputV) {
1568 InputV = V;
1569 Offset = M - (i / Scale);
1570
1571 // These offset can't be handled
1572 if (Offset % (NumElements / Scale))
1573 return SDValue();
1574 } else if (InputV != V)
1575 return SDValue();
1576
1577 if (M != (Offset + (i / Scale)))
1578 return SDValue(); // Non-consecutive strided elements.
1579 }
1580
1581 // If we fail to find an input, we have a zero-shuffle which should always
1582 // have already been handled.
1583 if (!InputV)
1584 return SDValue();
1585
1586 do {
1587 unsigned VilVLoHi = LoongArchISD::VILVL;
1588 if (Offset >= (NumElements / 2)) {
1589 VilVLoHi = LoongArchISD::VILVH;
1590 Offset -= (NumElements / 2);
1591 }
1592
1593 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1594 SDValue Ext =
1595 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1596 InputV = DAG.getBitcast(InputVT, InputV);
1597 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1598 Scale /= 2;
1599 EltBits *= 2;
1600 NumElements /= 2;
1601 } while (Scale > 1);
1602 return DAG.getBitcast(VT, InputV);
1603 };
1604
1605 // Each iteration, try extending the elements half as much, but into twice as
1606 // many elements.
1607 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1608 NumExtElements *= 2) {
1609 if (SDValue V = Lower(NumElements / NumExtElements))
1610 return V;
1611 }
1612 return SDValue();
1613}
1614
1615/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1616///
1617/// VREPLVEI performs vector broadcast based on an element specified by an
1618/// integer immediate, with its mask being similar to:
1619/// <x, x, x, ...>
1620/// where x is any valid index.
1621///
1622/// When undef's appear in the mask they are treated as if they were whatever
1623/// value is necessary in order to fit the above form.
1624static SDValue
1626 SDValue V1, SelectionDAG &DAG,
1627 const LoongArchSubtarget &Subtarget) {
1628 int SplatIndex = -1;
1629 for (const auto &M : Mask) {
1630 if (M != -1) {
1631 SplatIndex = M;
1632 break;
1633 }
1634 }
1635
1636 if (SplatIndex == -1)
1637 return DAG.getUNDEF(VT);
1638
1639 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1640 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1641 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1642 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1643 }
1644
1645 return SDValue();
1646}
1647
1648/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1649///
1650/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1651/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1652///
1653/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1654/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1655/// When undef's appear they are treated as if they were whatever value is
1656/// necessary in order to fit the above forms.
1657///
1658/// For example:
1659/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1660/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1661/// i32 7, i32 6, i32 5, i32 4>
1662/// is lowered to:
1663/// (VSHUF4I_H $v0, $v1, 27)
1664/// where the 27 comes from:
1665/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1666static SDValue
1668 SDValue V1, SDValue V2, SelectionDAG &DAG,
1669 const LoongArchSubtarget &Subtarget) {
1670
1671 unsigned SubVecSize = 4;
1672 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1673 SubVecSize = 2;
1674
1675 int SubMask[4] = {-1, -1, -1, -1};
1676 for (unsigned i = 0; i < SubVecSize; ++i) {
1677 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1678 int M = Mask[j];
1679
1680 // Convert from vector index to 4-element subvector index
1681 // If an index refers to an element outside of the subvector then give up
1682 if (M != -1) {
1683 M -= 4 * (j / SubVecSize);
1684 if (M < 0 || M >= 4)
1685 return SDValue();
1686 }
1687
1688 // If the mask has an undef, replace it with the current index.
1689 // Note that it might still be undef if the current index is also undef
1690 if (SubMask[i] == -1)
1691 SubMask[i] = M;
1692 // Check that non-undef values are the same as in the mask. If they
1693 // aren't then give up
1694 else if (M != -1 && M != SubMask[i])
1695 return SDValue();
1696 }
1697 }
1698
1699 // Calculate the immediate. Replace any remaining undefs with zero
1700 int Imm = 0;
1701 for (int i = SubVecSize - 1; i >= 0; --i) {
1702 int M = SubMask[i];
1703
1704 if (M == -1)
1705 M = 0;
1706
1707 Imm <<= 2;
1708 Imm |= M & 0x3;
1709 }
1710
1711 MVT GRLenVT = Subtarget.getGRLenVT();
1712
1713 // Return vshuf4i.d
1714 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1715 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1716 DAG.getConstant(Imm, DL, GRLenVT));
1717
1718 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1719 DAG.getConstant(Imm, DL, GRLenVT));
1720}
1721
1722/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1723///
1724/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1725/// reverse whose mask likes:
1726/// <7, 6, 5, 4, 3, 2, 1, 0>
1727///
1728/// When undef's appear in the mask they are treated as if they were whatever
1729/// value is necessary in order to fit the above forms.
1730static SDValue
1732 SDValue V1, SelectionDAG &DAG,
1733 const LoongArchSubtarget &Subtarget) {
1734 // Only vectors with i8/i16 elements which cannot match other patterns
1735 // directly needs to do this.
1736 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1737 VT != MVT::v16i16)
1738 return SDValue();
1739
1740 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1741 return SDValue();
1742
1743 int WidenNumElts = VT.getVectorNumElements() / 4;
1744 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1745 for (int i = 0; i < WidenNumElts; ++i)
1746 WidenMask[i] = WidenNumElts - 1 - i;
1747
1748 MVT WidenVT = MVT::getVectorVT(
1749 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1750 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1751 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1752 DAG.getUNDEF(WidenVT), WidenMask);
1753
1754 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1755 DAG.getBitcast(VT, WidenRev),
1756 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1757}
1758
1759/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1760///
1761/// VPACKEV interleaves the even elements from each vector.
1762///
1763/// It is possible to lower into VPACKEV when the mask consists of two of the
1764/// following forms interleaved:
1765/// <0, 2, 4, ...>
1766/// <n, n+2, n+4, ...>
1767/// where n is the number of elements in the vector.
1768/// For example:
1769/// <0, 0, 2, 2, 4, 4, ...>
1770/// <0, n, 2, n+2, 4, n+4, ...>
1771///
1772/// When undef's appear in the mask they are treated as if they were whatever
1773/// value is necessary in order to fit the above forms.
1775 MVT VT, SDValue V1, SDValue V2,
1776 SelectionDAG &DAG) {
1777
1778 const auto &Begin = Mask.begin();
1779 const auto &End = Mask.end();
1780 SDValue OriV1 = V1, OriV2 = V2;
1781
1782 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1783 V1 = OriV1;
1784 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1785 V1 = OriV2;
1786 else
1787 return SDValue();
1788
1789 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1790 V2 = OriV1;
1791 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1792 V2 = OriV2;
1793 else
1794 return SDValue();
1795
1796 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1797}
1798
1799/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1800///
1801/// VPACKOD interleaves the odd elements from each vector.
1802///
1803/// It is possible to lower into VPACKOD when the mask consists of two of the
1804/// following forms interleaved:
1805/// <1, 3, 5, ...>
1806/// <n+1, n+3, n+5, ...>
1807/// where n is the number of elements in the vector.
1808/// For example:
1809/// <1, 1, 3, 3, 5, 5, ...>
1810/// <1, n+1, 3, n+3, 5, n+5, ...>
1811///
1812/// When undef's appear in the mask they are treated as if they were whatever
1813/// value is necessary in order to fit the above forms.
1815 MVT VT, SDValue V1, SDValue V2,
1816 SelectionDAG &DAG) {
1817
1818 const auto &Begin = Mask.begin();
1819 const auto &End = Mask.end();
1820 SDValue OriV1 = V1, OriV2 = V2;
1821
1822 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1823 V1 = OriV1;
1824 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1825 V1 = OriV2;
1826 else
1827 return SDValue();
1828
1829 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1830 V2 = OriV1;
1831 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1832 V2 = OriV2;
1833 else
1834 return SDValue();
1835
1836 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1837}
1838
1839/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1840///
1841/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1842/// of each vector.
1843///
1844/// It is possible to lower into VILVH when the mask consists of two of the
1845/// following forms interleaved:
1846/// <x, x+1, x+2, ...>
1847/// <n+x, n+x+1, n+x+2, ...>
1848/// where n is the number of elements in the vector and x is half n.
1849/// For example:
1850/// <x, x, x+1, x+1, x+2, x+2, ...>
1851/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1852///
1853/// When undef's appear in the mask they are treated as if they were whatever
1854/// value is necessary in order to fit the above forms.
1856 MVT VT, SDValue V1, SDValue V2,
1857 SelectionDAG &DAG) {
1858
1859 const auto &Begin = Mask.begin();
1860 const auto &End = Mask.end();
1861 unsigned HalfSize = Mask.size() / 2;
1862 SDValue OriV1 = V1, OriV2 = V2;
1863
1864 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1865 V1 = OriV1;
1866 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1867 V1 = OriV2;
1868 else
1869 return SDValue();
1870
1871 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1872 V2 = OriV1;
1873 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1874 1))
1875 V2 = OriV2;
1876 else
1877 return SDValue();
1878
1879 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1880}
1881
1882/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1883///
1884/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1885/// of each vector.
1886///
1887/// It is possible to lower into VILVL when the mask consists of two of the
1888/// following forms interleaved:
1889/// <0, 1, 2, ...>
1890/// <n, n+1, n+2, ...>
1891/// where n is the number of elements in the vector.
1892/// For example:
1893/// <0, 0, 1, 1, 2, 2, ...>
1894/// <0, n, 1, n+1, 2, n+2, ...>
1895///
1896/// When undef's appear in the mask they are treated as if they were whatever
1897/// value is necessary in order to fit the above forms.
1899 MVT VT, SDValue V1, SDValue V2,
1900 SelectionDAG &DAG) {
1901
1902 const auto &Begin = Mask.begin();
1903 const auto &End = Mask.end();
1904 SDValue OriV1 = V1, OriV2 = V2;
1905
1906 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1907 V1 = OriV1;
1908 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1909 V1 = OriV2;
1910 else
1911 return SDValue();
1912
1913 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1914 V2 = OriV1;
1915 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1916 V2 = OriV2;
1917 else
1918 return SDValue();
1919
1920 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1921}
1922
1923/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1924///
1925/// VPICKEV copies the even elements of each vector into the result vector.
1926///
1927/// It is possible to lower into VPICKEV when the mask consists of two of the
1928/// following forms concatenated:
1929/// <0, 2, 4, ...>
1930/// <n, n+2, n+4, ...>
1931/// where n is the number of elements in the vector.
1932/// For example:
1933/// <0, 2, 4, ..., 0, 2, 4, ...>
1934/// <0, 2, 4, ..., n, n+2, n+4, ...>
1935///
1936/// When undef's appear in the mask they are treated as if they were whatever
1937/// value is necessary in order to fit the above forms.
1939 MVT VT, SDValue V1, SDValue V2,
1940 SelectionDAG &DAG) {
1941
1942 const auto &Begin = Mask.begin();
1943 const auto &Mid = Mask.begin() + Mask.size() / 2;
1944 const auto &End = Mask.end();
1945 SDValue OriV1 = V1, OriV2 = V2;
1946
1947 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1948 V1 = OriV1;
1949 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1950 V1 = OriV2;
1951 else
1952 return SDValue();
1953
1954 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1955 V2 = OriV1;
1956 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1957 V2 = OriV2;
1958
1959 else
1960 return SDValue();
1961
1962 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1963}
1964
1965/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1966///
1967/// VPICKOD copies the odd elements of each vector into the result vector.
1968///
1969/// It is possible to lower into VPICKOD when the mask consists of two of the
1970/// following forms concatenated:
1971/// <1, 3, 5, ...>
1972/// <n+1, n+3, n+5, ...>
1973/// where n is the number of elements in the vector.
1974/// For example:
1975/// <1, 3, 5, ..., 1, 3, 5, ...>
1976/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1977///
1978/// When undef's appear in the mask they are treated as if they were whatever
1979/// value is necessary in order to fit the above forms.
1981 MVT VT, SDValue V1, SDValue V2,
1982 SelectionDAG &DAG) {
1983
1984 const auto &Begin = Mask.begin();
1985 const auto &Mid = Mask.begin() + Mask.size() / 2;
1986 const auto &End = Mask.end();
1987 SDValue OriV1 = V1, OriV2 = V2;
1988
1989 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1990 V1 = OriV1;
1991 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1992 V1 = OriV2;
1993 else
1994 return SDValue();
1995
1996 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1997 V2 = OriV1;
1998 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1999 V2 = OriV2;
2000 else
2001 return SDValue();
2002
2003 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2004}
2005
2006/// Lower VECTOR_SHUFFLE into VSHUF.
2007///
2008/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2009/// adding it as an operand to the resulting VSHUF.
2011 MVT VT, SDValue V1, SDValue V2,
2012 SelectionDAG &DAG,
2013 const LoongArchSubtarget &Subtarget) {
2014
2016 for (auto M : Mask)
2017 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2018
2019 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2020 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2021
2022 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2023 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2024 // VSHF concatenates the vectors in a bitwise fashion:
2025 // <0b00, 0b01> + <0b10, 0b11> ->
2026 // 0b0100 + 0b1110 -> 0b01001110
2027 // <0b10, 0b11, 0b00, 0b01>
2028 // We must therefore swap the operands to get the correct result.
2029 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2030}
2031
2032/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2033///
2034/// This routine breaks down the specific type of 128-bit shuffle and
2035/// dispatches to the lowering routines accordingly.
2037 SDValue V1, SDValue V2, SelectionDAG &DAG,
2038 const LoongArchSubtarget &Subtarget) {
2039 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2040 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2041 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2042 "Vector type is unsupported for lsx!");
2044 "Two operands have different types!");
2045 assert(VT.getVectorNumElements() == Mask.size() &&
2046 "Unexpected mask size for shuffle!");
2047 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2048
2049 APInt KnownUndef, KnownZero;
2050 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2051 APInt Zeroable = KnownUndef | KnownZero;
2052
2053 SDValue Result;
2054 // TODO: Add more comparison patterns.
2055 if (V2.isUndef()) {
2056 if ((Result =
2057 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2058 return Result;
2059 if ((Result =
2060 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2061 return Result;
2062 if ((Result =
2063 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2064 return Result;
2065
2066 // TODO: This comment may be enabled in the future to better match the
2067 // pattern for instruction selection.
2068 /* V2 = V1; */
2069 }
2070
2071 // It is recommended not to change the pattern comparison order for better
2072 // performance.
2073 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2074 return Result;
2075 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2076 return Result;
2077 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2078 return Result;
2079 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2080 return Result;
2081 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2082 return Result;
2083 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2084 return Result;
2085 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2086 (Result =
2087 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2088 return Result;
2089 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2090 Zeroable)))
2091 return Result;
2092 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2093 Zeroable)))
2094 return Result;
2095 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2096 Subtarget)))
2097 return Result;
2098 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2099 return NewShuffle;
2100 if ((Result =
2101 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2102 return Result;
2103 return SDValue();
2104}
2105
2106/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2107///
2108/// It is a XVREPLVEI when the mask is:
2109/// <x, x, x, ..., x+n, x+n, x+n, ...>
2110/// where the number of x is equal to n and n is half the length of vector.
2111///
2112/// When undef's appear in the mask they are treated as if they were whatever
2113/// value is necessary in order to fit the above form.
2114static SDValue
2116 SDValue V1, SelectionDAG &DAG,
2117 const LoongArchSubtarget &Subtarget) {
2118 int SplatIndex = -1;
2119 for (const auto &M : Mask) {
2120 if (M != -1) {
2121 SplatIndex = M;
2122 break;
2123 }
2124 }
2125
2126 if (SplatIndex == -1)
2127 return DAG.getUNDEF(VT);
2128
2129 const auto &Begin = Mask.begin();
2130 const auto &End = Mask.end();
2131 int HalfSize = Mask.size() / 2;
2132
2133 if (SplatIndex >= HalfSize)
2134 return SDValue();
2135
2136 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2137 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2138 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2139 0)) {
2140 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2141 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2142 }
2143
2144 return SDValue();
2145}
2146
2147/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2148static SDValue
2150 SDValue V1, SDValue V2, SelectionDAG &DAG,
2151 const LoongArchSubtarget &Subtarget) {
2152 // When the size is less than or equal to 4, lower cost instructions may be
2153 // used.
2154 if (Mask.size() <= 4)
2155 return SDValue();
2156 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2157}
2158
2159/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2160static SDValue
2162 SDValue V1, SelectionDAG &DAG,
2163 const LoongArchSubtarget &Subtarget) {
2164 // Only consider XVPERMI_D.
2165 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2166 return SDValue();
2167
2168 unsigned MaskImm = 0;
2169 for (unsigned i = 0; i < Mask.size(); ++i) {
2170 if (Mask[i] == -1)
2171 continue;
2172 MaskImm |= Mask[i] << (i * 2);
2173 }
2174
2175 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2176 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2177}
2178
2179/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2181 MVT VT, SDValue V1, SelectionDAG &DAG,
2182 const LoongArchSubtarget &Subtarget) {
2183 // LoongArch LASX only have XVPERM_W.
2184 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2185 return SDValue();
2186
2187 unsigned NumElts = VT.getVectorNumElements();
2188 unsigned HalfSize = NumElts / 2;
2189 bool FrontLo = true, FrontHi = true;
2190 bool BackLo = true, BackHi = true;
2191
2192 auto inRange = [](int val, int low, int high) {
2193 return (val == -1) || (val >= low && val < high);
2194 };
2195
2196 for (unsigned i = 0; i < HalfSize; ++i) {
2197 int Fronti = Mask[i];
2198 int Backi = Mask[i + HalfSize];
2199
2200 FrontLo &= inRange(Fronti, 0, HalfSize);
2201 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2202 BackLo &= inRange(Backi, 0, HalfSize);
2203 BackHi &= inRange(Backi, HalfSize, NumElts);
2204 }
2205
2206 // If both the lower and upper 128-bit parts access only one half of the
2207 // vector (either lower or upper), avoid using xvperm.w. The latency of
2208 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2209 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2210 return SDValue();
2211
2213 MVT GRLenVT = Subtarget.getGRLenVT();
2214 for (unsigned i = 0; i < NumElts; ++i)
2215 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2216 : DAG.getConstant(Mask[i], DL, GRLenVT));
2217 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2218
2219 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2220}
2221
2222/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2224 MVT VT, SDValue V1, SDValue V2,
2225 SelectionDAG &DAG) {
2226 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2227}
2228
2229/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2231 MVT VT, SDValue V1, SDValue V2,
2232 SelectionDAG &DAG) {
2233 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2234}
2235
2236/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2238 MVT VT, SDValue V1, SDValue V2,
2239 SelectionDAG &DAG) {
2240
2241 const auto &Begin = Mask.begin();
2242 const auto &End = Mask.end();
2243 unsigned HalfSize = Mask.size() / 2;
2244 unsigned LeftSize = HalfSize / 2;
2245 SDValue OriV1 = V1, OriV2 = V2;
2246
2247 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2248 1) &&
2249 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2250 V1 = OriV1;
2251 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2252 Mask.size() + HalfSize - LeftSize, 1) &&
2253 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2254 Mask.size() + HalfSize + LeftSize, 1))
2255 V1 = OriV2;
2256 else
2257 return SDValue();
2258
2259 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2260 1) &&
2261 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2262 1))
2263 V2 = OriV1;
2264 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2265 Mask.size() + HalfSize - LeftSize, 1) &&
2266 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2267 Mask.size() + HalfSize + LeftSize, 1))
2268 V2 = OriV2;
2269 else
2270 return SDValue();
2271
2272 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2273}
2274
2275/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2277 MVT VT, SDValue V1, SDValue V2,
2278 SelectionDAG &DAG) {
2279
2280 const auto &Begin = Mask.begin();
2281 const auto &End = Mask.end();
2282 unsigned HalfSize = Mask.size() / 2;
2283 SDValue OriV1 = V1, OriV2 = V2;
2284
2285 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2286 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2287 V1 = OriV1;
2288 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2289 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2290 Mask.size() + HalfSize, 1))
2291 V1 = OriV2;
2292 else
2293 return SDValue();
2294
2295 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2296 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2297 V2 = OriV1;
2298 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2299 1) &&
2300 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2301 Mask.size() + HalfSize, 1))
2302 V2 = OriV2;
2303 else
2304 return SDValue();
2305
2306 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2307}
2308
2309/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2311 MVT VT, SDValue V1, SDValue V2,
2312 SelectionDAG &DAG) {
2313
2314 const auto &Begin = Mask.begin();
2315 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2316 const auto &Mid = Mask.begin() + Mask.size() / 2;
2317 const auto &RightMid = Mask.end() - Mask.size() / 4;
2318 const auto &End = Mask.end();
2319 unsigned HalfSize = Mask.size() / 2;
2320 SDValue OriV1 = V1, OriV2 = V2;
2321
2322 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2323 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2324 V1 = OriV1;
2325 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2326 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2327 V1 = OriV2;
2328 else
2329 return SDValue();
2330
2331 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2332 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2333 V2 = OriV1;
2334 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2335 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2336 V2 = OriV2;
2337
2338 else
2339 return SDValue();
2340
2341 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2342}
2343
2344/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2346 MVT VT, SDValue V1, SDValue V2,
2347 SelectionDAG &DAG) {
2348
2349 const auto &Begin = Mask.begin();
2350 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2351 const auto &Mid = Mask.begin() + Mask.size() / 2;
2352 const auto &RightMid = Mask.end() - Mask.size() / 4;
2353 const auto &End = Mask.end();
2354 unsigned HalfSize = Mask.size() / 2;
2355 SDValue OriV1 = V1, OriV2 = V2;
2356
2357 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2358 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2359 V1 = OriV1;
2360 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2361 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2362 2))
2363 V1 = OriV2;
2364 else
2365 return SDValue();
2366
2367 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2368 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2369 V2 = OriV1;
2370 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2371 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2372 2))
2373 V2 = OriV2;
2374 else
2375 return SDValue();
2376
2377 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2378}
2379
2380/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2381static SDValue
2383 SDValue V1, SDValue V2, SelectionDAG &DAG,
2384 const LoongArchSubtarget &Subtarget) {
2385 // LoongArch LASX only supports xvinsve0.{w/d}.
2386 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2387 VT != MVT::v4f64)
2388 return SDValue();
2389
2390 MVT GRLenVT = Subtarget.getGRLenVT();
2391 int MaskSize = Mask.size();
2392 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2393
2394 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2395 // all other elements are either 'Base + i' or undef (-1). On success, return
2396 // the index of the replaced element. Otherwise, just return -1.
2397 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2398 int Idx = -1;
2399 for (int i = 0; i < MaskSize; ++i) {
2400 if (Mask[i] == Base + i || Mask[i] == -1)
2401 continue;
2402 if (Mask[i] != Replaced)
2403 return -1;
2404 if (Idx == -1)
2405 Idx = i;
2406 else
2407 return -1;
2408 }
2409 return Idx;
2410 };
2411
2412 // Case 1: the lowest element of V2 replaces one element in V1.
2413 int Idx = checkReplaceOne(0, MaskSize);
2414 if (Idx != -1)
2415 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2416 DAG.getConstant(Idx, DL, GRLenVT));
2417
2418 // Case 2: the lowest element of V1 replaces one element in V2.
2419 Idx = checkReplaceOne(MaskSize, 0);
2420 if (Idx != -1)
2421 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2422 DAG.getConstant(Idx, DL, GRLenVT));
2423
2424 return SDValue();
2425}
2426
2427/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2429 MVT VT, SDValue V1, SDValue V2,
2430 SelectionDAG &DAG) {
2431
2432 int MaskSize = Mask.size();
2433 int HalfSize = Mask.size() / 2;
2434 const auto &Begin = Mask.begin();
2435 const auto &Mid = Mask.begin() + HalfSize;
2436 const auto &End = Mask.end();
2437
2438 // VECTOR_SHUFFLE concatenates the vectors:
2439 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2440 // shuffling ->
2441 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2442 //
2443 // XVSHUF concatenates the vectors:
2444 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2445 // shuffling ->
2446 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2447 SmallVector<SDValue, 8> MaskAlloc;
2448 for (auto it = Begin; it < Mid; it++) {
2449 if (*it < 0) // UNDEF
2450 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2451 else if ((*it >= 0 && *it < HalfSize) ||
2452 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2453 int M = *it < HalfSize ? *it : *it - HalfSize;
2454 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2455 } else
2456 return SDValue();
2457 }
2458 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2459
2460 for (auto it = Mid; it < End; it++) {
2461 if (*it < 0) // UNDEF
2462 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2463 else if ((*it >= HalfSize && *it < MaskSize) ||
2464 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2465 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2466 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2467 } else
2468 return SDValue();
2469 }
2470 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2471
2472 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2473 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2474 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2475}
2476
2477/// Shuffle vectors by lane to generate more optimized instructions.
2478/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2479///
2480/// Therefore, except for the following four cases, other cases are regarded
2481/// as cross-lane shuffles, where optimization is relatively limited.
2482///
2483/// - Shuffle high, low lanes of two inputs vector
2484/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2485/// - Shuffle low, high lanes of two inputs vector
2486/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2487/// - Shuffle low, low lanes of two inputs vector
2488/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2489/// - Shuffle high, high lanes of two inputs vector
2490/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2491///
2492/// The first case is the closest to LoongArch instructions and the other
2493/// cases need to be converted to it for processing.
2494///
2495/// This function will return true for the last three cases above and will
2496/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2497/// cross-lane shuffle cases.
2499 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2500 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2501
2502 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2503
2504 int MaskSize = Mask.size();
2505 int HalfSize = Mask.size() / 2;
2506 MVT GRLenVT = Subtarget.getGRLenVT();
2507
2508 HalfMaskType preMask = None, postMask = None;
2509
2510 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2511 return M < 0 || (M >= 0 && M < HalfSize) ||
2512 (M >= MaskSize && M < MaskSize + HalfSize);
2513 }))
2514 preMask = HighLaneTy;
2515 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2516 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2517 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2518 }))
2519 preMask = LowLaneTy;
2520
2521 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2522 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2523 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2524 }))
2525 postMask = LowLaneTy;
2526 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2527 return M < 0 || (M >= 0 && M < HalfSize) ||
2528 (M >= MaskSize && M < MaskSize + HalfSize);
2529 }))
2530 postMask = HighLaneTy;
2531
2532 // The pre-half of mask is high lane type, and the post-half of mask
2533 // is low lane type, which is closest to the LoongArch instructions.
2534 //
2535 // Note: In the LoongArch architecture, the high lane of mask corresponds
2536 // to the lower 128-bit of vector register, and the low lane of mask
2537 // corresponds the higher 128-bit of vector register.
2538 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2539 return false;
2540 }
2541 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2542 V1 = DAG.getBitcast(MVT::v4i64, V1);
2543 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2544 DAG.getConstant(0b01001110, DL, GRLenVT));
2545 V1 = DAG.getBitcast(VT, V1);
2546
2547 if (!V2.isUndef()) {
2548 V2 = DAG.getBitcast(MVT::v4i64, V2);
2549 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2550 DAG.getConstant(0b01001110, DL, GRLenVT));
2551 V2 = DAG.getBitcast(VT, V2);
2552 }
2553
2554 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2555 *it = *it < 0 ? *it : *it - HalfSize;
2556 }
2557 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2558 *it = *it < 0 ? *it : *it + HalfSize;
2559 }
2560 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2561 V1 = DAG.getBitcast(MVT::v4i64, V1);
2562 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2563 DAG.getConstant(0b11101110, DL, GRLenVT));
2564 V1 = DAG.getBitcast(VT, V1);
2565
2566 if (!V2.isUndef()) {
2567 V2 = DAG.getBitcast(MVT::v4i64, V2);
2568 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2569 DAG.getConstant(0b11101110, DL, GRLenVT));
2570 V2 = DAG.getBitcast(VT, V2);
2571 }
2572
2573 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2574 *it = *it < 0 ? *it : *it - HalfSize;
2575 }
2576 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2577 V1 = DAG.getBitcast(MVT::v4i64, V1);
2578 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2579 DAG.getConstant(0b01000100, DL, GRLenVT));
2580 V1 = DAG.getBitcast(VT, V1);
2581
2582 if (!V2.isUndef()) {
2583 V2 = DAG.getBitcast(MVT::v4i64, V2);
2584 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2585 DAG.getConstant(0b01000100, DL, GRLenVT));
2586 V2 = DAG.getBitcast(VT, V2);
2587 }
2588
2589 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2590 *it = *it < 0 ? *it : *it + HalfSize;
2591 }
2592 } else { // cross-lane
2593 return false;
2594 }
2595
2596 return true;
2597}
2598
2599/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2600/// Only for 256-bit vector.
2601///
2602/// For example:
2603/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2604/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2605/// is lowerded to:
2606/// (XVPERMI $xr2, $xr0, 78)
2607/// (XVSHUF $xr1, $xr2, $xr0)
2608/// (XVORI $xr0, $xr1, 0)
2610 ArrayRef<int> Mask,
2611 MVT VT, SDValue V1,
2612 SDValue V2,
2613 SelectionDAG &DAG) {
2614 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2615 int Size = Mask.size();
2616 int LaneSize = Size / 2;
2617
2618 bool LaneCrossing[2] = {false, false};
2619 for (int i = 0; i < Size; ++i)
2620 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2621 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2622
2623 // Ensure that all lanes ared involved.
2624 if (!LaneCrossing[0] && !LaneCrossing[1])
2625 return SDValue();
2626
2627 SmallVector<int> InLaneMask;
2628 InLaneMask.assign(Mask.begin(), Mask.end());
2629 for (int i = 0; i < Size; ++i) {
2630 int &M = InLaneMask[i];
2631 if (M < 0)
2632 continue;
2633 if (((M % Size) / LaneSize) != (i / LaneSize))
2634 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2635 }
2636
2637 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2638 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2639 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2640 Flipped = DAG.getBitcast(VT, Flipped);
2641 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2642}
2643
2644/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2645///
2646/// This routine breaks down the specific type of 256-bit shuffle and
2647/// dispatches to the lowering routines accordingly.
2649 SDValue V1, SDValue V2, SelectionDAG &DAG,
2650 const LoongArchSubtarget &Subtarget) {
2651 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2652 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2653 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2654 "Vector type is unsupported for lasx!");
2656 "Two operands have different types!");
2657 assert(VT.getVectorNumElements() == Mask.size() &&
2658 "Unexpected mask size for shuffle!");
2659 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2660 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2661
2662 APInt KnownUndef, KnownZero;
2663 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2664 APInt Zeroable = KnownUndef | KnownZero;
2665
2666 SDValue Result;
2667 // TODO: Add more comparison patterns.
2668 if (V2.isUndef()) {
2669 if ((Result =
2670 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2671 return Result;
2672 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2673 Subtarget)))
2674 return Result;
2675 // Try to widen vectors to gain more optimization opportunities.
2676 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2677 return NewShuffle;
2678 if ((Result =
2679 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2680 return Result;
2681 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2682 return Result;
2683 if ((Result =
2684 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2685 return Result;
2686
2687 // TODO: This comment may be enabled in the future to better match the
2688 // pattern for instruction selection.
2689 /* V2 = V1; */
2690 }
2691
2692 // It is recommended not to change the pattern comparison order for better
2693 // performance.
2694 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2695 return Result;
2696 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2697 return Result;
2698 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2699 return Result;
2700 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2701 return Result;
2702 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2703 return Result;
2704 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2705 return Result;
2706 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2707 Zeroable)))
2708 return Result;
2709 if ((Result =
2710 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2711 return Result;
2712 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2713 Subtarget)))
2714 return Result;
2715
2716 // canonicalize non cross-lane shuffle vector
2717 SmallVector<int> NewMask(Mask);
2718 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2719 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2720
2721 // FIXME: Handling the remaining cases earlier can degrade performance
2722 // in some situations. Further analysis is required to enable more
2723 // effective optimizations.
2724 if (V2.isUndef()) {
2725 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2726 V1, V2, DAG)))
2727 return Result;
2728 }
2729
2730 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2731 return NewShuffle;
2732 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2733 return Result;
2734
2735 return SDValue();
2736}
2737
2738SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2739 SelectionDAG &DAG) const {
2740 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2741 ArrayRef<int> OrigMask = SVOp->getMask();
2742 SDValue V1 = Op.getOperand(0);
2743 SDValue V2 = Op.getOperand(1);
2744 MVT VT = Op.getSimpleValueType();
2745 int NumElements = VT.getVectorNumElements();
2746 SDLoc DL(Op);
2747
2748 bool V1IsUndef = V1.isUndef();
2749 bool V2IsUndef = V2.isUndef();
2750 if (V1IsUndef && V2IsUndef)
2751 return DAG.getUNDEF(VT);
2752
2753 // When we create a shuffle node we put the UNDEF node to second operand,
2754 // but in some cases the first operand may be transformed to UNDEF.
2755 // In this case we should just commute the node.
2756 if (V1IsUndef)
2757 return DAG.getCommutedVectorShuffle(*SVOp);
2758
2759 // Check for non-undef masks pointing at an undef vector and make the masks
2760 // undef as well. This makes it easier to match the shuffle based solely on
2761 // the mask.
2762 if (V2IsUndef &&
2763 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2764 SmallVector<int, 8> NewMask(OrigMask);
2765 for (int &M : NewMask)
2766 if (M >= NumElements)
2767 M = -1;
2768 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2769 }
2770
2771 // Check for illegal shuffle mask element index values.
2772 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2773 (void)MaskUpperLimit;
2774 assert(llvm::all_of(OrigMask,
2775 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2776 "Out of bounds shuffle index");
2777
2778 // For each vector width, delegate to a specialized lowering routine.
2779 if (VT.is128BitVector())
2780 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2781
2782 if (VT.is256BitVector())
2783 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2784
2785 return SDValue();
2786}
2787
2788SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2789 SelectionDAG &DAG) const {
2790 // Custom lower to ensure the libcall return is passed in an FPR on hard
2791 // float ABIs.
2792 SDLoc DL(Op);
2793 MakeLibCallOptions CallOptions;
2794 SDValue Op0 = Op.getOperand(0);
2795 SDValue Chain = SDValue();
2796 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2797 SDValue Res;
2798 std::tie(Res, Chain) =
2799 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2800 if (Subtarget.is64Bit())
2801 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2802 return DAG.getBitcast(MVT::i32, Res);
2803}
2804
2805SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2806 SelectionDAG &DAG) const {
2807 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2808 // float ABIs.
2809 SDLoc DL(Op);
2810 MakeLibCallOptions CallOptions;
2811 SDValue Op0 = Op.getOperand(0);
2812 SDValue Chain = SDValue();
2813 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2814 DL, MVT::f32, Op0)
2815 : DAG.getBitcast(MVT::f32, Op0);
2816 SDValue Res;
2817 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2818 CallOptions, DL, Chain);
2819 return Res;
2820}
2821
2822SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2823 SelectionDAG &DAG) const {
2824 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2825 SDLoc DL(Op);
2826 MakeLibCallOptions CallOptions;
2827 RTLIB::Libcall LC =
2828 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2829 SDValue Res =
2830 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2831 if (Subtarget.is64Bit())
2832 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2833 return DAG.getBitcast(MVT::i32, Res);
2834}
2835
2836SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2837 SelectionDAG &DAG) const {
2838 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2839 MVT VT = Op.getSimpleValueType();
2840 SDLoc DL(Op);
2841 Op = DAG.getNode(
2842 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2843 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2844 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2845 DL, MVT::f32, Op)
2846 : DAG.getBitcast(MVT::f32, Op);
2847 if (VT != MVT::f32)
2848 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2849 return Res;
2850}
2851
2852// Lower BUILD_VECTOR as broadcast load (if possible).
2853// For example:
2854// %a = load i8, ptr %ptr
2855// %b = build_vector %a, %a, %a, %a
2856// is lowered to :
2857// (VLDREPL_B $a0, 0)
2859 const SDLoc &DL,
2860 SelectionDAG &DAG) {
2861 MVT VT = BVOp->getSimpleValueType(0);
2862 int NumOps = BVOp->getNumOperands();
2863
2864 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2865 "Unsupported vector type for broadcast.");
2866
2867 SDValue IdentitySrc;
2868 bool IsIdeneity = true;
2869
2870 for (int i = 0; i != NumOps; i++) {
2871 SDValue Op = BVOp->getOperand(i);
2872 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2873 IsIdeneity = false;
2874 break;
2875 }
2876 IdentitySrc = BVOp->getOperand(0);
2877 }
2878
2879 // make sure that this load is valid and only has one user.
2880 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2881 return SDValue();
2882
2883 auto *LN = cast<LoadSDNode>(IdentitySrc);
2884 auto ExtType = LN->getExtensionType();
2885
2886 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2887 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2888 SDVTList Tys =
2889 LN->isIndexed()
2890 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2891 : DAG.getVTList(VT, MVT::Other);
2892 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2893 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2894 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2895 return BCast;
2896 }
2897 return SDValue();
2898}
2899
2900// Sequentially insert elements from Ops into Vector, from low to high indices.
2901// Note: Ops can have fewer elements than Vector.
2903 const LoongArchSubtarget &Subtarget, SDValue &Vector,
2904 EVT ResTy) {
2905 assert(Ops.size() <= ResTy.getVectorNumElements());
2906
2907 SDValue Op0 = Ops[0];
2908 if (!Op0.isUndef())
2909 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2910 for (unsigned i = 1; i < Ops.size(); ++i) {
2911 SDValue Opi = Ops[i];
2912 if (Opi.isUndef())
2913 continue;
2914 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2915 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2916 }
2917}
2918
2919// Build a ResTy subvector from Node, taking NumElts elements starting at index
2920// 'first'.
2922 SelectionDAG &DAG, SDLoc DL,
2923 const LoongArchSubtarget &Subtarget,
2924 EVT ResTy, unsigned first) {
2925 unsigned NumElts = ResTy.getVectorNumElements();
2926
2927 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
2928
2929 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
2930 Node->op_begin() + first + NumElts);
2931 SDValue Vector = DAG.getUNDEF(ResTy);
2932 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
2933 return Vector;
2934}
2935
2936SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2937 SelectionDAG &DAG) const {
2938 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2939 MVT VT = Node->getSimpleValueType(0);
2940 EVT ResTy = Op->getValueType(0);
2941 unsigned NumElts = ResTy.getVectorNumElements();
2942 SDLoc DL(Op);
2943 APInt SplatValue, SplatUndef;
2944 unsigned SplatBitSize;
2945 bool HasAnyUndefs;
2946 bool IsConstant = false;
2947 bool UseSameConstant = true;
2948 SDValue ConstantValue;
2949 bool Is128Vec = ResTy.is128BitVector();
2950 bool Is256Vec = ResTy.is256BitVector();
2951
2952 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2953 (!Subtarget.hasExtLASX() || !Is256Vec))
2954 return SDValue();
2955
2956 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2957 return Result;
2958
2959 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2960 /*MinSplatBits=*/8) &&
2961 SplatBitSize <= 64) {
2962 // We can only cope with 8, 16, 32, or 64-bit elements.
2963 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2964 SplatBitSize != 64)
2965 return SDValue();
2966
2967 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2968 // We can only handle 64-bit elements that are within
2969 // the signed 10-bit range or match vldi patterns on 32-bit targets.
2970 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
2971 if (!SplatValue.isSignedIntN(10) &&
2972 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
2973 return SDValue();
2974 if ((Is128Vec && ResTy == MVT::v4i32) ||
2975 (Is256Vec && ResTy == MVT::v8i32))
2976 return Op;
2977 }
2978
2979 EVT ViaVecTy;
2980
2981 switch (SplatBitSize) {
2982 default:
2983 return SDValue();
2984 case 8:
2985 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2986 break;
2987 case 16:
2988 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2989 break;
2990 case 32:
2991 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2992 break;
2993 case 64:
2994 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2995 break;
2996 }
2997
2998 // SelectionDAG::getConstant will promote SplatValue appropriately.
2999 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3000
3001 // Bitcast to the type we originally wanted.
3002 if (ViaVecTy != ResTy)
3003 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3004
3005 return Result;
3006 }
3007
3008 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3009 return Op;
3010
3011 for (unsigned i = 0; i < NumElts; ++i) {
3012 SDValue Opi = Node->getOperand(i);
3013 if (isIntOrFPConstant(Opi)) {
3014 IsConstant = true;
3015 if (!ConstantValue.getNode())
3016 ConstantValue = Opi;
3017 else if (ConstantValue != Opi)
3018 UseSameConstant = false;
3019 }
3020 }
3021
3022 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3023 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3024 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3025 for (unsigned i = 0; i < NumElts; ++i) {
3026 SDValue Opi = Node->getOperand(i);
3027 if (!isIntOrFPConstant(Opi))
3028 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3029 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3030 }
3031 return Result;
3032 }
3033
3034 if (!IsConstant) {
3035 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3036 // the sub-sequence of the vector and then broadcast the sub-sequence.
3037 //
3038 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3039 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3040 // generates worse code in some cases. This could be further optimized
3041 // with more consideration.
3043 BitVector UndefElements;
3044 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3045 UndefElements.count() == 0) {
3046 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3047 // because the high part can be simply treated as undef.
3048 SDValue Vector = DAG.getUNDEF(ResTy);
3049 EVT FillTy = Is256Vec
3051 : ResTy;
3052 SDValue FillVec =
3053 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3054
3055 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3056
3057 unsigned SeqLen = Sequence.size();
3058 unsigned SplatLen = NumElts / SeqLen;
3059 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3060 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3061
3062 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3063 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3064 if (SplatEltTy == MVT::i128)
3065 SplatTy = MVT::v4i64;
3066
3067 SDValue SplatVec;
3068 SDValue SrcVec = DAG.getBitcast(
3069 SplatTy,
3070 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3071 if (Is256Vec) {
3072 SplatVec =
3073 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3075 DL, SplatTy, SrcVec);
3076 } else {
3077 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3078 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3079 }
3080
3081 return DAG.getBitcast(ResTy, SplatVec);
3082 }
3083
3084 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3085 // using memory operations is much lower.
3086 //
3087 // For 256-bit vectors, normally split into two halves and concatenate.
3088 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3089 // one non-undef element, skip spliting to avoid a worse result.
3090 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3091 ResTy == MVT::v4f64) {
3092 unsigned NonUndefCount = 0;
3093 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3094 if (!Node->getOperand(i).isUndef()) {
3095 ++NonUndefCount;
3096 if (NonUndefCount > 1)
3097 break;
3098 }
3099 }
3100 if (NonUndefCount == 1)
3101 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3102 }
3103
3104 EVT VecTy =
3105 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3106 SDValue Vector =
3107 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3108
3109 if (Is128Vec)
3110 return Vector;
3111
3112 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3113 VecTy, NumElts / 2);
3114
3115 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3116 }
3117
3118 return SDValue();
3119}
3120
3121SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3122 SelectionDAG &DAG) const {
3123 SDLoc DL(Op);
3124 MVT ResVT = Op.getSimpleValueType();
3125 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3126
3127 unsigned NumOperands = Op.getNumOperands();
3128 unsigned NumFreezeUndef = 0;
3129 unsigned NumZero = 0;
3130 unsigned NumNonZero = 0;
3131 unsigned NonZeros = 0;
3132 SmallSet<SDValue, 4> Undefs;
3133 for (unsigned i = 0; i != NumOperands; ++i) {
3134 SDValue SubVec = Op.getOperand(i);
3135 if (SubVec.isUndef())
3136 continue;
3137 if (ISD::isFreezeUndef(SubVec.getNode())) {
3138 // If the freeze(undef) has multiple uses then we must fold to zero.
3139 if (SubVec.hasOneUse()) {
3140 ++NumFreezeUndef;
3141 } else {
3142 ++NumZero;
3143 Undefs.insert(SubVec);
3144 }
3145 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3146 ++NumZero;
3147 else {
3148 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3149 NonZeros |= 1 << i;
3150 ++NumNonZero;
3151 }
3152 }
3153
3154 // If we have more than 2 non-zeros, build each half separately.
3155 if (NumNonZero > 2) {
3156 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3157 ArrayRef<SDUse> Ops = Op->ops();
3158 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3159 Ops.slice(0, NumOperands / 2));
3160 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3161 Ops.slice(NumOperands / 2));
3162 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3163 }
3164
3165 // Otherwise, build it up through insert_subvectors.
3166 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3167 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3168 : DAG.getUNDEF(ResVT));
3169
3170 // Replace Undef operands with ZeroVector.
3171 for (SDValue U : Undefs)
3172 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3173
3174 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3175 unsigned NumSubElems = SubVT.getVectorNumElements();
3176 for (unsigned i = 0; i != NumOperands; ++i) {
3177 if ((NonZeros & (1 << i)) == 0)
3178 continue;
3179
3180 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3181 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3182 }
3183
3184 return Vec;
3185}
3186
3187SDValue
3188LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3189 SelectionDAG &DAG) const {
3190 MVT EltVT = Op.getSimpleValueType();
3191 SDValue Vec = Op->getOperand(0);
3192 EVT VecTy = Vec->getValueType(0);
3193 SDValue Idx = Op->getOperand(1);
3194 SDLoc DL(Op);
3195 MVT GRLenVT = Subtarget.getGRLenVT();
3196
3197 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3198
3199 if (isa<ConstantSDNode>(Idx))
3200 return Op;
3201
3202 switch (VecTy.getSimpleVT().SimpleTy) {
3203 default:
3204 llvm_unreachable("Unexpected type");
3205 case MVT::v32i8:
3206 case MVT::v16i16:
3207 case MVT::v4i64:
3208 case MVT::v4f64: {
3209 // Extract the high half subvector and place it to the low half of a new
3210 // vector. It doesn't matter what the high half of the new vector is.
3211 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3212 SDValue VecHi =
3213 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3214 SDValue TmpVec =
3215 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3216 VecHi, DAG.getConstant(0, DL, GRLenVT));
3217
3218 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3219 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3220 // desired element.
3221 SDValue IdxCp =
3222 Subtarget.is64Bit()
3223 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3224 : DAG.getBitcast(MVT::f32, Idx);
3225 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3226 SDValue MaskVec =
3227 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3228 SDValue ResVec =
3229 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3230
3231 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3232 DAG.getConstant(0, DL, GRLenVT));
3233 }
3234 case MVT::v8i32:
3235 case MVT::v8f32: {
3236 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3237 SDValue SplatValue =
3238 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3239
3240 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3241 DAG.getConstant(0, DL, GRLenVT));
3242 }
3243 }
3244}
3245
3246SDValue
3247LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3248 SelectionDAG &DAG) const {
3249 MVT VT = Op.getSimpleValueType();
3250 MVT EltVT = VT.getVectorElementType();
3251 unsigned NumElts = VT.getVectorNumElements();
3252 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3253 SDLoc DL(Op);
3254 SDValue Op0 = Op.getOperand(0);
3255 SDValue Op1 = Op.getOperand(1);
3256 SDValue Op2 = Op.getOperand(2);
3257
3258 if (isa<ConstantSDNode>(Op2))
3259 return Op;
3260
3261 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3262 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3263
3264 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3265 return SDValue();
3266
3267 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3268 SmallVector<SDValue, 32> RawIndices;
3269 SDValue SplatIdx;
3270 SDValue Indices;
3271
3272 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3273 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3274 for (unsigned i = 0; i < NumElts; ++i) {
3275 RawIndices.push_back(Op2);
3276 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3277 }
3278 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3279 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3280
3281 RawIndices.clear();
3282 for (unsigned i = 0; i < NumElts; ++i) {
3283 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3284 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3285 }
3286 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3287 Indices = DAG.getBitcast(IdxVTy, Indices);
3288 } else {
3289 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3290
3291 for (unsigned i = 0; i < NumElts; ++i)
3292 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3293 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3294 }
3295
3296 // insert vec, elt, idx
3297 // =>
3298 // select (splatidx == {0,1,2...}) ? splatelt : vec
3299 SDValue SelectCC =
3300 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3301 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3302}
3303
3304SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3305 SelectionDAG &DAG) const {
3306 SDLoc DL(Op);
3307 SyncScope::ID FenceSSID =
3308 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3309
3310 // singlethread fences only synchronize with signal handlers on the same
3311 // thread and thus only need to preserve instruction order, not actually
3312 // enforce memory ordering.
3313 if (FenceSSID == SyncScope::SingleThread)
3314 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3315 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3316
3317 return Op;
3318}
3319
3320SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3321 SelectionDAG &DAG) const {
3322
3323 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3324 DAG.getContext()->emitError(
3325 "On LA64, only 64-bit registers can be written.");
3326 return Op.getOperand(0);
3327 }
3328
3329 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3330 DAG.getContext()->emitError(
3331 "On LA32, only 32-bit registers can be written.");
3332 return Op.getOperand(0);
3333 }
3334
3335 return Op;
3336}
3337
3338SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3339 SelectionDAG &DAG) const {
3340 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3341 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3342 "be a constant integer");
3343 return SDValue();
3344 }
3345
3346 MachineFunction &MF = DAG.getMachineFunction();
3348 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3349 EVT VT = Op.getValueType();
3350 SDLoc DL(Op);
3351 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3352 unsigned Depth = Op.getConstantOperandVal(0);
3353 int GRLenInBytes = Subtarget.getGRLen() / 8;
3354
3355 while (Depth--) {
3356 int Offset = -(GRLenInBytes * 2);
3357 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3358 DAG.getSignedConstant(Offset, DL, VT));
3359 FrameAddr =
3360 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3361 }
3362 return FrameAddr;
3363}
3364
3365SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3366 SelectionDAG &DAG) const {
3367 // Currently only support lowering return address for current frame.
3368 if (Op.getConstantOperandVal(0) != 0) {
3369 DAG.getContext()->emitError(
3370 "return address can only be determined for the current frame");
3371 return SDValue();
3372 }
3373
3374 MachineFunction &MF = DAG.getMachineFunction();
3376 MVT GRLenVT = Subtarget.getGRLenVT();
3377
3378 // Return the value of the return address register, marking it an implicit
3379 // live-in.
3380 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3381 getRegClassFor(GRLenVT));
3382 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3383}
3384
3385SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3386 SelectionDAG &DAG) const {
3387 MachineFunction &MF = DAG.getMachineFunction();
3388 auto Size = Subtarget.getGRLen() / 8;
3389 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3390 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3391}
3392
3393SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3394 SelectionDAG &DAG) const {
3395 MachineFunction &MF = DAG.getMachineFunction();
3396 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3397
3398 SDLoc DL(Op);
3399 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3401
3402 // vastart just stores the address of the VarArgsFrameIndex slot into the
3403 // memory location argument.
3404 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3405 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3406 MachinePointerInfo(SV));
3407}
3408
3409SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3410 SelectionDAG &DAG) const {
3411 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3412 !Subtarget.hasBasicD() && "unexpected target features");
3413
3414 SDLoc DL(Op);
3415 SDValue Op0 = Op.getOperand(0);
3416 if (Op0->getOpcode() == ISD::AND) {
3417 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3418 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3419 return Op;
3420 }
3421
3422 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3423 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3424 Op0.getConstantOperandVal(2) == UINT64_C(0))
3425 return Op;
3426
3427 if (Op0.getOpcode() == ISD::AssertZext &&
3428 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3429 return Op;
3430
3431 EVT OpVT = Op0.getValueType();
3432 EVT RetVT = Op.getValueType();
3433 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3434 MakeLibCallOptions CallOptions;
3435 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3436 SDValue Chain = SDValue();
3438 std::tie(Result, Chain) =
3439 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3440 return Result;
3441}
3442
3443SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3444 SelectionDAG &DAG) const {
3445 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3446 !Subtarget.hasBasicD() && "unexpected target features");
3447
3448 SDLoc DL(Op);
3449 SDValue Op0 = Op.getOperand(0);
3450
3451 if ((Op0.getOpcode() == ISD::AssertSext ||
3453 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3454 return Op;
3455
3456 EVT OpVT = Op0.getValueType();
3457 EVT RetVT = Op.getValueType();
3458 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3459 MakeLibCallOptions CallOptions;
3460 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3461 SDValue Chain = SDValue();
3463 std::tie(Result, Chain) =
3464 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3465 return Result;
3466}
3467
3468SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3469 SelectionDAG &DAG) const {
3470
3471 SDLoc DL(Op);
3472 EVT VT = Op.getValueType();
3473 SDValue Op0 = Op.getOperand(0);
3474 EVT Op0VT = Op0.getValueType();
3475
3476 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3477 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3478 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3479 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3480 }
3481 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3482 SDValue Lo, Hi;
3483 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3484 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3485 }
3486 return Op;
3487}
3488
3489SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3490 SelectionDAG &DAG) const {
3491
3492 SDLoc DL(Op);
3493 SDValue Op0 = Op.getOperand(0);
3494
3495 if (Op0.getValueType() == MVT::f16)
3496 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3497
3498 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3499 !Subtarget.hasBasicD()) {
3500 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3501 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3502 }
3503
3504 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3505 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3506 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3507}
3508
3510 SelectionDAG &DAG, unsigned Flags) {
3511 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3512}
3513
3515 SelectionDAG &DAG, unsigned Flags) {
3516 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3517 Flags);
3518}
3519
3521 SelectionDAG &DAG, unsigned Flags) {
3522 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3523 N->getOffset(), Flags);
3524}
3525
3527 SelectionDAG &DAG, unsigned Flags) {
3528 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3529}
3530
3531template <class NodeTy>
3532SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3534 bool IsLocal) const {
3535 SDLoc DL(N);
3536 EVT Ty = getPointerTy(DAG.getDataLayout());
3537 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3538 SDValue Load;
3539
3540 switch (M) {
3541 default:
3542 report_fatal_error("Unsupported code model");
3543
3544 case CodeModel::Large: {
3545 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3546
3547 // This is not actually used, but is necessary for successfully matching
3548 // the PseudoLA_*_LARGE nodes.
3549 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3550 if (IsLocal) {
3551 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3552 // eventually becomes the desired 5-insn code sequence.
3553 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3554 Tmp, Addr),
3555 0);
3556 } else {
3557 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3558 // eventually becomes the desired 5-insn code sequence.
3559 Load = SDValue(
3560 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3561 0);
3562 }
3563 break;
3564 }
3565
3566 case CodeModel::Small:
3567 case CodeModel::Medium:
3568 if (IsLocal) {
3569 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3570 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3571 Load = SDValue(
3572 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3573 } else {
3574 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3575 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3576 Load =
3577 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3578 }
3579 }
3580
3581 if (!IsLocal) {
3582 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3583 MachineFunction &MF = DAG.getMachineFunction();
3584 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3588 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3589 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3590 }
3591
3592 return Load;
3593}
3594
3595SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3596 SelectionDAG &DAG) const {
3597 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3598 DAG.getTarget().getCodeModel());
3599}
3600
3601SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3602 SelectionDAG &DAG) const {
3603 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3604 DAG.getTarget().getCodeModel());
3605}
3606
3607SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3608 SelectionDAG &DAG) const {
3609 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3610 DAG.getTarget().getCodeModel());
3611}
3612
3613SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3614 SelectionDAG &DAG) const {
3615 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3616 assert(N->getOffset() == 0 && "unexpected offset in global node");
3617 auto CM = DAG.getTarget().getCodeModel();
3618 const GlobalValue *GV = N->getGlobal();
3619
3620 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3621 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3622 CM = *GCM;
3623 }
3624
3625 return getAddr(N, DAG, CM, GV->isDSOLocal());
3626}
3627
3628SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3629 SelectionDAG &DAG,
3630 unsigned Opc, bool UseGOT,
3631 bool Large) const {
3632 SDLoc DL(N);
3633 EVT Ty = getPointerTy(DAG.getDataLayout());
3634 MVT GRLenVT = Subtarget.getGRLenVT();
3635
3636 // This is not actually used, but is necessary for successfully matching the
3637 // PseudoLA_*_LARGE nodes.
3638 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3639 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3640
3641 // Only IE needs an extra argument for large code model.
3642 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3643 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3644 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3645
3646 // If it is LE for normal/medium code model, the add tp operation will occur
3647 // during the pseudo-instruction expansion.
3648 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3649 return Offset;
3650
3651 if (UseGOT) {
3652 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3653 MachineFunction &MF = DAG.getMachineFunction();
3654 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3658 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3659 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3660 }
3661
3662 // Add the thread pointer.
3663 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3664 DAG.getRegister(LoongArch::R2, GRLenVT));
3665}
3666
3667SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3668 SelectionDAG &DAG,
3669 unsigned Opc,
3670 bool Large) const {
3671 SDLoc DL(N);
3672 EVT Ty = getPointerTy(DAG.getDataLayout());
3673 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3674
3675 // This is not actually used, but is necessary for successfully matching the
3676 // PseudoLA_*_LARGE nodes.
3677 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3678
3679 // Use a PC-relative addressing mode to access the dynamic GOT address.
3680 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3681 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3682 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3683
3684 // Prepare argument list to generate call.
3686 Args.emplace_back(Load, CallTy);
3687
3688 // Setup call to __tls_get_addr.
3689 TargetLowering::CallLoweringInfo CLI(DAG);
3690 CLI.setDebugLoc(DL)
3691 .setChain(DAG.getEntryNode())
3692 .setLibCallee(CallingConv::C, CallTy,
3693 DAG.getExternalSymbol("__tls_get_addr", Ty),
3694 std::move(Args));
3695
3696 return LowerCallTo(CLI).first;
3697}
3698
3699SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3700 SelectionDAG &DAG, unsigned Opc,
3701 bool Large) const {
3702 SDLoc DL(N);
3703 EVT Ty = getPointerTy(DAG.getDataLayout());
3704 const GlobalValue *GV = N->getGlobal();
3705
3706 // This is not actually used, but is necessary for successfully matching the
3707 // PseudoLA_*_LARGE nodes.
3708 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3709
3710 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3711 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3712 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3713 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3714 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3715}
3716
3717SDValue
3718LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3719 SelectionDAG &DAG) const {
3722 report_fatal_error("In GHC calling convention TLS is not supported");
3723
3724 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3725 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3726
3727 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3728 assert(N->getOffset() == 0 && "unexpected offset in global node");
3729
3730 if (DAG.getTarget().useEmulatedTLS())
3731 reportFatalUsageError("the emulated TLS is prohibited");
3732
3733 bool IsDesc = DAG.getTarget().useTLSDESC();
3734
3735 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3737 // In this model, application code calls the dynamic linker function
3738 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3739 // runtime.
3740 if (!IsDesc)
3741 return getDynamicTLSAddr(N, DAG,
3742 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3743 : LoongArch::PseudoLA_TLS_GD,
3744 Large);
3745 break;
3747 // Same as GeneralDynamic, except for assembly modifiers and relocation
3748 // records.
3749 if (!IsDesc)
3750 return getDynamicTLSAddr(N, DAG,
3751 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3752 : LoongArch::PseudoLA_TLS_LD,
3753 Large);
3754 break;
3756 // This model uses the GOT to resolve TLS offsets.
3757 return getStaticTLSAddr(N, DAG,
3758 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3759 : LoongArch::PseudoLA_TLS_IE,
3760 /*UseGOT=*/true, Large);
3762 // This model is used when static linking as the TLS offsets are resolved
3763 // during program linking.
3764 //
3765 // This node doesn't need an extra argument for the large code model.
3766 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3767 /*UseGOT=*/false, Large);
3768 }
3769
3770 return getTLSDescAddr(N, DAG,
3771 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3772 : LoongArch::PseudoLA_TLS_DESC,
3773 Large);
3774}
3775
3776template <unsigned N>
3778 SelectionDAG &DAG, bool IsSigned = false) {
3779 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3780 // Check the ImmArg.
3781 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3782 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3783 DAG.getContext()->emitError(Op->getOperationName(0) +
3784 ": argument out of range.");
3785 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3786 }
3787 return SDValue();
3788}
3789
3790SDValue
3791LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3792 SelectionDAG &DAG) const {
3793 switch (Op.getConstantOperandVal(0)) {
3794 default:
3795 return SDValue(); // Don't custom lower most intrinsics.
3796 case Intrinsic::thread_pointer: {
3797 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3798 return DAG.getRegister(LoongArch::R2, PtrVT);
3799 }
3800 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3801 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3802 case Intrinsic::loongarch_lsx_vreplvei_d:
3803 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3804 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3805 case Intrinsic::loongarch_lsx_vreplvei_w:
3806 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3807 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3808 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3809 case Intrinsic::loongarch_lasx_xvpickve_d:
3810 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3811 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3812 case Intrinsic::loongarch_lasx_xvinsve0_d:
3813 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3814 case Intrinsic::loongarch_lsx_vsat_b:
3815 case Intrinsic::loongarch_lsx_vsat_bu:
3816 case Intrinsic::loongarch_lsx_vrotri_b:
3817 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3818 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3819 case Intrinsic::loongarch_lsx_vsrlri_b:
3820 case Intrinsic::loongarch_lsx_vsrari_b:
3821 case Intrinsic::loongarch_lsx_vreplvei_h:
3822 case Intrinsic::loongarch_lasx_xvsat_b:
3823 case Intrinsic::loongarch_lasx_xvsat_bu:
3824 case Intrinsic::loongarch_lasx_xvrotri_b:
3825 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3826 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3827 case Intrinsic::loongarch_lasx_xvsrlri_b:
3828 case Intrinsic::loongarch_lasx_xvsrari_b:
3829 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3830 case Intrinsic::loongarch_lasx_xvpickve_w:
3831 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3832 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3833 case Intrinsic::loongarch_lasx_xvinsve0_w:
3834 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3835 case Intrinsic::loongarch_lsx_vsat_h:
3836 case Intrinsic::loongarch_lsx_vsat_hu:
3837 case Intrinsic::loongarch_lsx_vrotri_h:
3838 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3839 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3840 case Intrinsic::loongarch_lsx_vsrlri_h:
3841 case Intrinsic::loongarch_lsx_vsrari_h:
3842 case Intrinsic::loongarch_lsx_vreplvei_b:
3843 case Intrinsic::loongarch_lasx_xvsat_h:
3844 case Intrinsic::loongarch_lasx_xvsat_hu:
3845 case Intrinsic::loongarch_lasx_xvrotri_h:
3846 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3847 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3848 case Intrinsic::loongarch_lasx_xvsrlri_h:
3849 case Intrinsic::loongarch_lasx_xvsrari_h:
3850 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3851 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3852 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3853 case Intrinsic::loongarch_lsx_vsrani_b_h:
3854 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3855 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3856 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3857 case Intrinsic::loongarch_lsx_vssrani_b_h:
3858 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3859 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3860 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3861 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3862 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3863 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3864 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3865 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3866 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3867 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3868 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3869 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3870 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3871 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3872 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3873 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3874 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3875 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3876 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3877 case Intrinsic::loongarch_lsx_vsat_w:
3878 case Intrinsic::loongarch_lsx_vsat_wu:
3879 case Intrinsic::loongarch_lsx_vrotri_w:
3880 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3881 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3882 case Intrinsic::loongarch_lsx_vsrlri_w:
3883 case Intrinsic::loongarch_lsx_vsrari_w:
3884 case Intrinsic::loongarch_lsx_vslei_bu:
3885 case Intrinsic::loongarch_lsx_vslei_hu:
3886 case Intrinsic::loongarch_lsx_vslei_wu:
3887 case Intrinsic::loongarch_lsx_vslei_du:
3888 case Intrinsic::loongarch_lsx_vslti_bu:
3889 case Intrinsic::loongarch_lsx_vslti_hu:
3890 case Intrinsic::loongarch_lsx_vslti_wu:
3891 case Intrinsic::loongarch_lsx_vslti_du:
3892 case Intrinsic::loongarch_lsx_vbsll_v:
3893 case Intrinsic::loongarch_lsx_vbsrl_v:
3894 case Intrinsic::loongarch_lasx_xvsat_w:
3895 case Intrinsic::loongarch_lasx_xvsat_wu:
3896 case Intrinsic::loongarch_lasx_xvrotri_w:
3897 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3898 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3899 case Intrinsic::loongarch_lasx_xvsrlri_w:
3900 case Intrinsic::loongarch_lasx_xvsrari_w:
3901 case Intrinsic::loongarch_lasx_xvslei_bu:
3902 case Intrinsic::loongarch_lasx_xvslei_hu:
3903 case Intrinsic::loongarch_lasx_xvslei_wu:
3904 case Intrinsic::loongarch_lasx_xvslei_du:
3905 case Intrinsic::loongarch_lasx_xvslti_bu:
3906 case Intrinsic::loongarch_lasx_xvslti_hu:
3907 case Intrinsic::loongarch_lasx_xvslti_wu:
3908 case Intrinsic::loongarch_lasx_xvslti_du:
3909 case Intrinsic::loongarch_lasx_xvbsll_v:
3910 case Intrinsic::loongarch_lasx_xvbsrl_v:
3911 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3912 case Intrinsic::loongarch_lsx_vseqi_b:
3913 case Intrinsic::loongarch_lsx_vseqi_h:
3914 case Intrinsic::loongarch_lsx_vseqi_w:
3915 case Intrinsic::loongarch_lsx_vseqi_d:
3916 case Intrinsic::loongarch_lsx_vslei_b:
3917 case Intrinsic::loongarch_lsx_vslei_h:
3918 case Intrinsic::loongarch_lsx_vslei_w:
3919 case Intrinsic::loongarch_lsx_vslei_d:
3920 case Intrinsic::loongarch_lsx_vslti_b:
3921 case Intrinsic::loongarch_lsx_vslti_h:
3922 case Intrinsic::loongarch_lsx_vslti_w:
3923 case Intrinsic::loongarch_lsx_vslti_d:
3924 case Intrinsic::loongarch_lasx_xvseqi_b:
3925 case Intrinsic::loongarch_lasx_xvseqi_h:
3926 case Intrinsic::loongarch_lasx_xvseqi_w:
3927 case Intrinsic::loongarch_lasx_xvseqi_d:
3928 case Intrinsic::loongarch_lasx_xvslei_b:
3929 case Intrinsic::loongarch_lasx_xvslei_h:
3930 case Intrinsic::loongarch_lasx_xvslei_w:
3931 case Intrinsic::loongarch_lasx_xvslei_d:
3932 case Intrinsic::loongarch_lasx_xvslti_b:
3933 case Intrinsic::loongarch_lasx_xvslti_h:
3934 case Intrinsic::loongarch_lasx_xvslti_w:
3935 case Intrinsic::loongarch_lasx_xvslti_d:
3936 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3937 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3938 case Intrinsic::loongarch_lsx_vsrani_h_w:
3939 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3940 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3941 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3942 case Intrinsic::loongarch_lsx_vssrani_h_w:
3943 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3944 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3945 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3946 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3947 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3948 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3949 case Intrinsic::loongarch_lsx_vfrstpi_b:
3950 case Intrinsic::loongarch_lsx_vfrstpi_h:
3951 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3952 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3953 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3954 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3955 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3956 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3957 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3958 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3959 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3960 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3961 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3962 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3963 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3964 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3965 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3966 case Intrinsic::loongarch_lsx_vsat_d:
3967 case Intrinsic::loongarch_lsx_vsat_du:
3968 case Intrinsic::loongarch_lsx_vrotri_d:
3969 case Intrinsic::loongarch_lsx_vsrlri_d:
3970 case Intrinsic::loongarch_lsx_vsrari_d:
3971 case Intrinsic::loongarch_lasx_xvsat_d:
3972 case Intrinsic::loongarch_lasx_xvsat_du:
3973 case Intrinsic::loongarch_lasx_xvrotri_d:
3974 case Intrinsic::loongarch_lasx_xvsrlri_d:
3975 case Intrinsic::loongarch_lasx_xvsrari_d:
3976 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3977 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3978 case Intrinsic::loongarch_lsx_vsrani_w_d:
3979 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3980 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3981 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3982 case Intrinsic::loongarch_lsx_vssrani_w_d:
3983 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3984 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3985 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3986 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3987 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3988 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3989 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3990 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3991 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3992 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3993 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3994 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3995 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3996 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3997 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3998 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3999 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4000 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4001 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4002 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4003 case Intrinsic::loongarch_lsx_vsrani_d_q:
4004 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4005 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4006 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4007 case Intrinsic::loongarch_lsx_vssrani_d_q:
4008 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4009 case Intrinsic::loongarch_lsx_vssrani_du_q:
4010 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4011 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4012 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4013 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4014 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4015 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4016 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4017 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4018 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4019 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4020 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4021 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4022 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4023 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4024 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4025 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4026 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4027 case Intrinsic::loongarch_lsx_vnori_b:
4028 case Intrinsic::loongarch_lsx_vshuf4i_b:
4029 case Intrinsic::loongarch_lsx_vshuf4i_h:
4030 case Intrinsic::loongarch_lsx_vshuf4i_w:
4031 case Intrinsic::loongarch_lasx_xvnori_b:
4032 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4033 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4034 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4035 case Intrinsic::loongarch_lasx_xvpermi_d:
4036 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4037 case Intrinsic::loongarch_lsx_vshuf4i_d:
4038 case Intrinsic::loongarch_lsx_vpermi_w:
4039 case Intrinsic::loongarch_lsx_vbitseli_b:
4040 case Intrinsic::loongarch_lsx_vextrins_b:
4041 case Intrinsic::loongarch_lsx_vextrins_h:
4042 case Intrinsic::loongarch_lsx_vextrins_w:
4043 case Intrinsic::loongarch_lsx_vextrins_d:
4044 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4045 case Intrinsic::loongarch_lasx_xvpermi_w:
4046 case Intrinsic::loongarch_lasx_xvpermi_q:
4047 case Intrinsic::loongarch_lasx_xvbitseli_b:
4048 case Intrinsic::loongarch_lasx_xvextrins_b:
4049 case Intrinsic::loongarch_lasx_xvextrins_h:
4050 case Intrinsic::loongarch_lasx_xvextrins_w:
4051 case Intrinsic::loongarch_lasx_xvextrins_d:
4052 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4053 case Intrinsic::loongarch_lsx_vrepli_b:
4054 case Intrinsic::loongarch_lsx_vrepli_h:
4055 case Intrinsic::loongarch_lsx_vrepli_w:
4056 case Intrinsic::loongarch_lsx_vrepli_d:
4057 case Intrinsic::loongarch_lasx_xvrepli_b:
4058 case Intrinsic::loongarch_lasx_xvrepli_h:
4059 case Intrinsic::loongarch_lasx_xvrepli_w:
4060 case Intrinsic::loongarch_lasx_xvrepli_d:
4061 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4062 case Intrinsic::loongarch_lsx_vldi:
4063 case Intrinsic::loongarch_lasx_xvldi:
4064 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4065 }
4066}
4067
4068// Helper function that emits error message for intrinsics with chain and return
4069// merge values of a UNDEF and the chain.
4071 StringRef ErrorMsg,
4072 SelectionDAG &DAG) {
4073 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4074 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4075 SDLoc(Op));
4076}
4077
4078SDValue
4079LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4080 SelectionDAG &DAG) const {
4081 SDLoc DL(Op);
4082 MVT GRLenVT = Subtarget.getGRLenVT();
4083 EVT VT = Op.getValueType();
4084 SDValue Chain = Op.getOperand(0);
4085 const StringRef ErrorMsgOOR = "argument out of range";
4086 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4087 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4088
4089 switch (Op.getConstantOperandVal(1)) {
4090 default:
4091 return Op;
4092 case Intrinsic::loongarch_crc_w_b_w:
4093 case Intrinsic::loongarch_crc_w_h_w:
4094 case Intrinsic::loongarch_crc_w_w_w:
4095 case Intrinsic::loongarch_crc_w_d_w:
4096 case Intrinsic::loongarch_crcc_w_b_w:
4097 case Intrinsic::loongarch_crcc_w_h_w:
4098 case Intrinsic::loongarch_crcc_w_w_w:
4099 case Intrinsic::loongarch_crcc_w_d_w:
4100 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4101 case Intrinsic::loongarch_csrrd_w:
4102 case Intrinsic::loongarch_csrrd_d: {
4103 unsigned Imm = Op.getConstantOperandVal(2);
4104 return !isUInt<14>(Imm)
4105 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4106 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4107 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4108 }
4109 case Intrinsic::loongarch_csrwr_w:
4110 case Intrinsic::loongarch_csrwr_d: {
4111 unsigned Imm = Op.getConstantOperandVal(3);
4112 return !isUInt<14>(Imm)
4113 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4114 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4115 {Chain, Op.getOperand(2),
4116 DAG.getConstant(Imm, DL, GRLenVT)});
4117 }
4118 case Intrinsic::loongarch_csrxchg_w:
4119 case Intrinsic::loongarch_csrxchg_d: {
4120 unsigned Imm = Op.getConstantOperandVal(4);
4121 return !isUInt<14>(Imm)
4122 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4123 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4124 {Chain, Op.getOperand(2), Op.getOperand(3),
4125 DAG.getConstant(Imm, DL, GRLenVT)});
4126 }
4127 case Intrinsic::loongarch_iocsrrd_d: {
4128 return DAG.getNode(
4129 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4130 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4131 }
4132#define IOCSRRD_CASE(NAME, NODE) \
4133 case Intrinsic::loongarch_##NAME: { \
4134 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4135 {Chain, Op.getOperand(2)}); \
4136 }
4137 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4138 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4139 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4140#undef IOCSRRD_CASE
4141 case Intrinsic::loongarch_cpucfg: {
4142 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4143 {Chain, Op.getOperand(2)});
4144 }
4145 case Intrinsic::loongarch_lddir_d: {
4146 unsigned Imm = Op.getConstantOperandVal(3);
4147 return !isUInt<8>(Imm)
4148 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4149 : Op;
4150 }
4151 case Intrinsic::loongarch_movfcsr2gr: {
4152 if (!Subtarget.hasBasicF())
4153 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4154 unsigned Imm = Op.getConstantOperandVal(2);
4155 return !isUInt<2>(Imm)
4156 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4157 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4158 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4159 }
4160 case Intrinsic::loongarch_lsx_vld:
4161 case Intrinsic::loongarch_lsx_vldrepl_b:
4162 case Intrinsic::loongarch_lasx_xvld:
4163 case Intrinsic::loongarch_lasx_xvldrepl_b:
4164 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4165 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4166 : SDValue();
4167 case Intrinsic::loongarch_lsx_vldrepl_h:
4168 case Intrinsic::loongarch_lasx_xvldrepl_h:
4169 return !isShiftedInt<11, 1>(
4170 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4172 Op, "argument out of range or not a multiple of 2", DAG)
4173 : SDValue();
4174 case Intrinsic::loongarch_lsx_vldrepl_w:
4175 case Intrinsic::loongarch_lasx_xvldrepl_w:
4176 return !isShiftedInt<10, 2>(
4177 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4179 Op, "argument out of range or not a multiple of 4", DAG)
4180 : SDValue();
4181 case Intrinsic::loongarch_lsx_vldrepl_d:
4182 case Intrinsic::loongarch_lasx_xvldrepl_d:
4183 return !isShiftedInt<9, 3>(
4184 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4186 Op, "argument out of range or not a multiple of 8", DAG)
4187 : SDValue();
4188 }
4189}
4190
4191// Helper function that emits error message for intrinsics with void return
4192// value and return the chain.
4194 SelectionDAG &DAG) {
4195
4196 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4197 return Op.getOperand(0);
4198}
4199
4200SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4201 SelectionDAG &DAG) const {
4202 SDLoc DL(Op);
4203 MVT GRLenVT = Subtarget.getGRLenVT();
4204 SDValue Chain = Op.getOperand(0);
4205 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4206 SDValue Op2 = Op.getOperand(2);
4207 const StringRef ErrorMsgOOR = "argument out of range";
4208 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4209 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4210 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4211
4212 switch (IntrinsicEnum) {
4213 default:
4214 // TODO: Add more Intrinsics.
4215 return SDValue();
4216 case Intrinsic::loongarch_cacop_d:
4217 case Intrinsic::loongarch_cacop_w: {
4218 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4219 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4220 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4221 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4222 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4223 unsigned Imm1 = Op2->getAsZExtVal();
4224 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4225 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4226 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4227 return Op;
4228 }
4229 case Intrinsic::loongarch_dbar: {
4230 unsigned Imm = Op2->getAsZExtVal();
4231 return !isUInt<15>(Imm)
4232 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4233 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4234 DAG.getConstant(Imm, DL, GRLenVT));
4235 }
4236 case Intrinsic::loongarch_ibar: {
4237 unsigned Imm = Op2->getAsZExtVal();
4238 return !isUInt<15>(Imm)
4239 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4240 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4241 DAG.getConstant(Imm, DL, GRLenVT));
4242 }
4243 case Intrinsic::loongarch_break: {
4244 unsigned Imm = Op2->getAsZExtVal();
4245 return !isUInt<15>(Imm)
4246 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4247 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4248 DAG.getConstant(Imm, DL, GRLenVT));
4249 }
4250 case Intrinsic::loongarch_movgr2fcsr: {
4251 if (!Subtarget.hasBasicF())
4252 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4253 unsigned Imm = Op2->getAsZExtVal();
4254 return !isUInt<2>(Imm)
4255 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4256 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4257 DAG.getConstant(Imm, DL, GRLenVT),
4258 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4259 Op.getOperand(3)));
4260 }
4261 case Intrinsic::loongarch_syscall: {
4262 unsigned Imm = Op2->getAsZExtVal();
4263 return !isUInt<15>(Imm)
4264 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4265 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4266 DAG.getConstant(Imm, DL, GRLenVT));
4267 }
4268#define IOCSRWR_CASE(NAME, NODE) \
4269 case Intrinsic::loongarch_##NAME: { \
4270 SDValue Op3 = Op.getOperand(3); \
4271 return Subtarget.is64Bit() \
4272 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4273 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4274 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4275 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4276 Op3); \
4277 }
4278 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4279 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4280 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4281#undef IOCSRWR_CASE
4282 case Intrinsic::loongarch_iocsrwr_d: {
4283 return !Subtarget.is64Bit()
4284 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4285 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4286 Op2,
4287 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4288 Op.getOperand(3)));
4289 }
4290#define ASRT_LE_GT_CASE(NAME) \
4291 case Intrinsic::loongarch_##NAME: { \
4292 return !Subtarget.is64Bit() \
4293 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4294 : Op; \
4295 }
4296 ASRT_LE_GT_CASE(asrtle_d)
4297 ASRT_LE_GT_CASE(asrtgt_d)
4298#undef ASRT_LE_GT_CASE
4299 case Intrinsic::loongarch_ldpte_d: {
4300 unsigned Imm = Op.getConstantOperandVal(3);
4301 return !Subtarget.is64Bit()
4302 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4303 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4304 : Op;
4305 }
4306 case Intrinsic::loongarch_lsx_vst:
4307 case Intrinsic::loongarch_lasx_xvst:
4308 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4309 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4310 : SDValue();
4311 case Intrinsic::loongarch_lasx_xvstelm_b:
4312 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4313 !isUInt<5>(Op.getConstantOperandVal(5)))
4314 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4315 : SDValue();
4316 case Intrinsic::loongarch_lsx_vstelm_b:
4317 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4318 !isUInt<4>(Op.getConstantOperandVal(5)))
4319 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4320 : SDValue();
4321 case Intrinsic::loongarch_lasx_xvstelm_h:
4322 return (!isShiftedInt<8, 1>(
4323 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4324 !isUInt<4>(Op.getConstantOperandVal(5)))
4326 Op, "argument out of range or not a multiple of 2", DAG)
4327 : SDValue();
4328 case Intrinsic::loongarch_lsx_vstelm_h:
4329 return (!isShiftedInt<8, 1>(
4330 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4331 !isUInt<3>(Op.getConstantOperandVal(5)))
4333 Op, "argument out of range or not a multiple of 2", DAG)
4334 : SDValue();
4335 case Intrinsic::loongarch_lasx_xvstelm_w:
4336 return (!isShiftedInt<8, 2>(
4337 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4338 !isUInt<3>(Op.getConstantOperandVal(5)))
4340 Op, "argument out of range or not a multiple of 4", DAG)
4341 : SDValue();
4342 case Intrinsic::loongarch_lsx_vstelm_w:
4343 return (!isShiftedInt<8, 2>(
4344 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4345 !isUInt<2>(Op.getConstantOperandVal(5)))
4347 Op, "argument out of range or not a multiple of 4", DAG)
4348 : SDValue();
4349 case Intrinsic::loongarch_lasx_xvstelm_d:
4350 return (!isShiftedInt<8, 3>(
4351 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4352 !isUInt<2>(Op.getConstantOperandVal(5)))
4354 Op, "argument out of range or not a multiple of 8", DAG)
4355 : SDValue();
4356 case Intrinsic::loongarch_lsx_vstelm_d:
4357 return (!isShiftedInt<8, 3>(
4358 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4359 !isUInt<1>(Op.getConstantOperandVal(5)))
4361 Op, "argument out of range or not a multiple of 8", DAG)
4362 : SDValue();
4363 }
4364}
4365
4366SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4367 SelectionDAG &DAG) const {
4368 SDLoc DL(Op);
4369 SDValue Lo = Op.getOperand(0);
4370 SDValue Hi = Op.getOperand(1);
4371 SDValue Shamt = Op.getOperand(2);
4372 EVT VT = Lo.getValueType();
4373
4374 // if Shamt-GRLen < 0: // Shamt < GRLen
4375 // Lo = Lo << Shamt
4376 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4377 // else:
4378 // Lo = 0
4379 // Hi = Lo << (Shamt-GRLen)
4380
4381 SDValue Zero = DAG.getConstant(0, DL, VT);
4382 SDValue One = DAG.getConstant(1, DL, VT);
4383 SDValue MinusGRLen =
4384 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4385 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4386 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4387 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4388
4389 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4390 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4391 SDValue ShiftRightLo =
4392 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4393 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4394 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4395 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4396
4397 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4398
4399 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4400 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4401
4402 SDValue Parts[2] = {Lo, Hi};
4403 return DAG.getMergeValues(Parts, DL);
4404}
4405
4406SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4407 SelectionDAG &DAG,
4408 bool IsSRA) const {
4409 SDLoc DL(Op);
4410 SDValue Lo = Op.getOperand(0);
4411 SDValue Hi = Op.getOperand(1);
4412 SDValue Shamt = Op.getOperand(2);
4413 EVT VT = Lo.getValueType();
4414
4415 // SRA expansion:
4416 // if Shamt-GRLen < 0: // Shamt < GRLen
4417 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4418 // Hi = Hi >>s Shamt
4419 // else:
4420 // Lo = Hi >>s (Shamt-GRLen);
4421 // Hi = Hi >>s (GRLen-1)
4422 //
4423 // SRL expansion:
4424 // if Shamt-GRLen < 0: // Shamt < GRLen
4425 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4426 // Hi = Hi >>u Shamt
4427 // else:
4428 // Lo = Hi >>u (Shamt-GRLen);
4429 // Hi = 0;
4430
4431 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4432
4433 SDValue Zero = DAG.getConstant(0, DL, VT);
4434 SDValue One = DAG.getConstant(1, DL, VT);
4435 SDValue MinusGRLen =
4436 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4437 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4438 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4439 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4440
4441 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4442 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4443 SDValue ShiftLeftHi =
4444 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4445 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4446 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4447 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4448 SDValue HiFalse =
4449 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4450
4451 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4452
4453 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4454 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4455
4456 SDValue Parts[2] = {Lo, Hi};
4457 return DAG.getMergeValues(Parts, DL);
4458}
4459
4460// Returns the opcode of the target-specific SDNode that implements the 32-bit
4461// form of the given Opcode.
4463 switch (Opcode) {
4464 default:
4465 llvm_unreachable("Unexpected opcode");
4466 case ISD::SDIV:
4467 return LoongArchISD::DIV_W;
4468 case ISD::UDIV:
4469 return LoongArchISD::DIV_WU;
4470 case ISD::SREM:
4471 return LoongArchISD::MOD_W;
4472 case ISD::UREM:
4473 return LoongArchISD::MOD_WU;
4474 case ISD::SHL:
4475 return LoongArchISD::SLL_W;
4476 case ISD::SRA:
4477 return LoongArchISD::SRA_W;
4478 case ISD::SRL:
4479 return LoongArchISD::SRL_W;
4480 case ISD::ROTL:
4481 case ISD::ROTR:
4482 return LoongArchISD::ROTR_W;
4483 case ISD::CTTZ:
4484 return LoongArchISD::CTZ_W;
4485 case ISD::CTLZ:
4486 return LoongArchISD::CLZ_W;
4487 }
4488}
4489
4490// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4491// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4492// otherwise be promoted to i64, making it difficult to select the
4493// SLL_W/.../*W later one because the fact the operation was originally of
4494// type i8/i16/i32 is lost.
4496 unsigned ExtOpc = ISD::ANY_EXTEND) {
4497 SDLoc DL(N);
4498 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4499 SDValue NewOp0, NewRes;
4500
4501 switch (NumOp) {
4502 default:
4503 llvm_unreachable("Unexpected NumOp");
4504 case 1: {
4505 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4506 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4507 break;
4508 }
4509 case 2: {
4510 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4511 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4512 if (N->getOpcode() == ISD::ROTL) {
4513 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4514 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4515 }
4516 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4517 break;
4518 }
4519 // TODO:Handle more NumOp.
4520 }
4521
4522 // ReplaceNodeResults requires we maintain the same type for the return
4523 // value.
4524 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4525}
4526
4527// Converts the given 32-bit operation to a i64 operation with signed extension
4528// semantic to reduce the signed extension instructions.
4530 SDLoc DL(N);
4531 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4532 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4533 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4534 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4535 DAG.getValueType(MVT::i32));
4536 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4537}
4538
4539// Helper function that emits error message for intrinsics with/without chain
4540// and return a UNDEF or and the chain as the results.
4543 StringRef ErrorMsg, bool WithChain = true) {
4544 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4545 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4546 if (!WithChain)
4547 return;
4548 Results.push_back(N->getOperand(0));
4549}
4550
4551template <unsigned N>
4552static void
4554 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4555 unsigned ResOp) {
4556 const StringRef ErrorMsgOOR = "argument out of range";
4557 unsigned Imm = Node->getConstantOperandVal(2);
4558 if (!isUInt<N>(Imm)) {
4560 /*WithChain=*/false);
4561 return;
4562 }
4563 SDLoc DL(Node);
4564 SDValue Vec = Node->getOperand(1);
4565
4566 SDValue PickElt =
4567 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4568 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4570 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4571 PickElt.getValue(0)));
4572}
4573
4576 SelectionDAG &DAG,
4577 const LoongArchSubtarget &Subtarget,
4578 unsigned ResOp) {
4579 SDLoc DL(N);
4580 SDValue Vec = N->getOperand(1);
4581
4582 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4583 Results.push_back(
4584 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4585}
4586
4587static void
4589 SelectionDAG &DAG,
4590 const LoongArchSubtarget &Subtarget) {
4591 switch (N->getConstantOperandVal(0)) {
4592 default:
4593 llvm_unreachable("Unexpected Intrinsic.");
4594 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4595 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4597 break;
4598 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4599 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4600 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4602 break;
4603 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4604 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4606 break;
4607 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4608 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4610 break;
4611 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4612 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4613 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4615 break;
4616 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4617 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4619 break;
4620 case Intrinsic::loongarch_lsx_bz_b:
4621 case Intrinsic::loongarch_lsx_bz_h:
4622 case Intrinsic::loongarch_lsx_bz_w:
4623 case Intrinsic::loongarch_lsx_bz_d:
4624 case Intrinsic::loongarch_lasx_xbz_b:
4625 case Intrinsic::loongarch_lasx_xbz_h:
4626 case Intrinsic::loongarch_lasx_xbz_w:
4627 case Intrinsic::loongarch_lasx_xbz_d:
4628 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4630 break;
4631 case Intrinsic::loongarch_lsx_bz_v:
4632 case Intrinsic::loongarch_lasx_xbz_v:
4633 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4635 break;
4636 case Intrinsic::loongarch_lsx_bnz_b:
4637 case Intrinsic::loongarch_lsx_bnz_h:
4638 case Intrinsic::loongarch_lsx_bnz_w:
4639 case Intrinsic::loongarch_lsx_bnz_d:
4640 case Intrinsic::loongarch_lasx_xbnz_b:
4641 case Intrinsic::loongarch_lasx_xbnz_h:
4642 case Intrinsic::loongarch_lasx_xbnz_w:
4643 case Intrinsic::loongarch_lasx_xbnz_d:
4644 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4646 break;
4647 case Intrinsic::loongarch_lsx_bnz_v:
4648 case Intrinsic::loongarch_lasx_xbnz_v:
4649 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4651 break;
4652 }
4653}
4654
4657 SelectionDAG &DAG) {
4658 assert(N->getValueType(0) == MVT::i128 &&
4659 "AtomicCmpSwap on types less than 128 should be legal");
4660 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4661
4662 unsigned Opcode;
4663 switch (MemOp->getMergedOrdering()) {
4667 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4668 break;
4671 Opcode = LoongArch::PseudoCmpXchg128;
4672 break;
4673 default:
4674 llvm_unreachable("Unexpected ordering!");
4675 }
4676
4677 SDLoc DL(N);
4678 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4679 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4680 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4681 NewVal.first, NewVal.second, N->getOperand(0)};
4682
4683 SDNode *CmpSwap = DAG.getMachineNode(
4684 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4685 Ops);
4686 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4687 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4688 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4689 Results.push_back(SDValue(CmpSwap, 3));
4690}
4691
4694 SDLoc DL(N);
4695 EVT VT = N->getValueType(0);
4696 switch (N->getOpcode()) {
4697 default:
4698 llvm_unreachable("Don't know how to legalize this operation");
4699 case ISD::ADD:
4700 case ISD::SUB:
4701 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4702 "Unexpected custom legalisation");
4703 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4704 break;
4705 case ISD::SDIV:
4706 case ISD::UDIV:
4707 case ISD::SREM:
4708 case ISD::UREM:
4709 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4710 "Unexpected custom legalisation");
4711 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4712 Subtarget.hasDiv32() && VT == MVT::i32
4714 : ISD::SIGN_EXTEND));
4715 break;
4716 case ISD::SHL:
4717 case ISD::SRA:
4718 case ISD::SRL:
4719 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4720 "Unexpected custom legalisation");
4721 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4722 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4723 break;
4724 }
4725 break;
4726 case ISD::ROTL:
4727 case ISD::ROTR:
4728 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4729 "Unexpected custom legalisation");
4730 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4731 break;
4732 case ISD::FP_TO_SINT: {
4733 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4734 "Unexpected custom legalisation");
4735 SDValue Src = N->getOperand(0);
4736 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4737 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4739 if (!isTypeLegal(Src.getValueType()))
4740 return;
4741 if (Src.getValueType() == MVT::f16)
4742 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4743 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4744 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4745 return;
4746 }
4747 // If the FP type needs to be softened, emit a library call using the 'si'
4748 // version. If we left it to default legalization we'd end up with 'di'.
4749 RTLIB::Libcall LC;
4750 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4751 MakeLibCallOptions CallOptions;
4752 EVT OpVT = Src.getValueType();
4753 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4754 SDValue Chain = SDValue();
4755 SDValue Result;
4756 std::tie(Result, Chain) =
4757 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4758 Results.push_back(Result);
4759 break;
4760 }
4761 case ISD::BITCAST: {
4762 SDValue Src = N->getOperand(0);
4763 EVT SrcVT = Src.getValueType();
4764 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4765 Subtarget.hasBasicF()) {
4766 SDValue Dst =
4767 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4768 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4769 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4771 DAG.getVTList(MVT::i32, MVT::i32), Src);
4772 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4773 NewReg.getValue(0), NewReg.getValue(1));
4774 Results.push_back(RetReg);
4775 }
4776 break;
4777 }
4778 case ISD::FP_TO_UINT: {
4779 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4780 "Unexpected custom legalisation");
4781 auto &TLI = DAG.getTargetLoweringInfo();
4782 SDValue Tmp1, Tmp2;
4783 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4784 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4785 break;
4786 }
4787 case ISD::BSWAP: {
4788 SDValue Src = N->getOperand(0);
4789 assert((VT == MVT::i16 || VT == MVT::i32) &&
4790 "Unexpected custom legalization");
4791 MVT GRLenVT = Subtarget.getGRLenVT();
4792 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4793 SDValue Tmp;
4794 switch (VT.getSizeInBits()) {
4795 default:
4796 llvm_unreachable("Unexpected operand width");
4797 case 16:
4798 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4799 break;
4800 case 32:
4801 // Only LA64 will get to here due to the size mismatch between VT and
4802 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4803 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4804 break;
4805 }
4806 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4807 break;
4808 }
4809 case ISD::BITREVERSE: {
4810 SDValue Src = N->getOperand(0);
4811 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4812 "Unexpected custom legalization");
4813 MVT GRLenVT = Subtarget.getGRLenVT();
4814 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4815 SDValue Tmp;
4816 switch (VT.getSizeInBits()) {
4817 default:
4818 llvm_unreachable("Unexpected operand width");
4819 case 8:
4820 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4821 break;
4822 case 32:
4823 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4824 break;
4825 }
4826 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4827 break;
4828 }
4829 case ISD::CTLZ:
4830 case ISD::CTTZ: {
4831 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4832 "Unexpected custom legalisation");
4833 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4834 break;
4835 }
4837 SDValue Chain = N->getOperand(0);
4838 SDValue Op2 = N->getOperand(2);
4839 MVT GRLenVT = Subtarget.getGRLenVT();
4840 const StringRef ErrorMsgOOR = "argument out of range";
4841 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4842 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4843
4844 switch (N->getConstantOperandVal(1)) {
4845 default:
4846 llvm_unreachable("Unexpected Intrinsic.");
4847 case Intrinsic::loongarch_movfcsr2gr: {
4848 if (!Subtarget.hasBasicF()) {
4849 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4850 return;
4851 }
4852 unsigned Imm = Op2->getAsZExtVal();
4853 if (!isUInt<2>(Imm)) {
4854 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4855 return;
4856 }
4857 SDValue MOVFCSR2GRResults = DAG.getNode(
4858 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4859 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4860 Results.push_back(
4861 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4862 Results.push_back(MOVFCSR2GRResults.getValue(1));
4863 break;
4864 }
4865#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4866 case Intrinsic::loongarch_##NAME: { \
4867 SDValue NODE = DAG.getNode( \
4868 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4869 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4870 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4871 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4872 Results.push_back(NODE.getValue(1)); \
4873 break; \
4874 }
4875 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4876 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4877 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4878 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4879 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4880 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4881#undef CRC_CASE_EXT_BINARYOP
4882
4883#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4884 case Intrinsic::loongarch_##NAME: { \
4885 SDValue NODE = DAG.getNode( \
4886 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4887 {Chain, Op2, \
4888 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4889 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4890 Results.push_back(NODE.getValue(1)); \
4891 break; \
4892 }
4893 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4894 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4895#undef CRC_CASE_EXT_UNARYOP
4896#define CSR_CASE(ID) \
4897 case Intrinsic::loongarch_##ID: { \
4898 if (!Subtarget.is64Bit()) \
4899 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4900 break; \
4901 }
4902 CSR_CASE(csrrd_d);
4903 CSR_CASE(csrwr_d);
4904 CSR_CASE(csrxchg_d);
4905 CSR_CASE(iocsrrd_d);
4906#undef CSR_CASE
4907 case Intrinsic::loongarch_csrrd_w: {
4908 unsigned Imm = Op2->getAsZExtVal();
4909 if (!isUInt<14>(Imm)) {
4910 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4911 return;
4912 }
4913 SDValue CSRRDResults =
4914 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4915 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4916 Results.push_back(
4917 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4918 Results.push_back(CSRRDResults.getValue(1));
4919 break;
4920 }
4921 case Intrinsic::loongarch_csrwr_w: {
4922 unsigned Imm = N->getConstantOperandVal(3);
4923 if (!isUInt<14>(Imm)) {
4924 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4925 return;
4926 }
4927 SDValue CSRWRResults =
4928 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4929 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4930 DAG.getConstant(Imm, DL, GRLenVT)});
4931 Results.push_back(
4932 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4933 Results.push_back(CSRWRResults.getValue(1));
4934 break;
4935 }
4936 case Intrinsic::loongarch_csrxchg_w: {
4937 unsigned Imm = N->getConstantOperandVal(4);
4938 if (!isUInt<14>(Imm)) {
4939 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4940 return;
4941 }
4942 SDValue CSRXCHGResults = DAG.getNode(
4943 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4944 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4945 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4946 DAG.getConstant(Imm, DL, GRLenVT)});
4947 Results.push_back(
4948 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4949 Results.push_back(CSRXCHGResults.getValue(1));
4950 break;
4951 }
4952#define IOCSRRD_CASE(NAME, NODE) \
4953 case Intrinsic::loongarch_##NAME: { \
4954 SDValue IOCSRRDResults = \
4955 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4956 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4957 Results.push_back( \
4958 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4959 Results.push_back(IOCSRRDResults.getValue(1)); \
4960 break; \
4961 }
4962 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4963 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4964 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4965#undef IOCSRRD_CASE
4966 case Intrinsic::loongarch_cpucfg: {
4967 SDValue CPUCFGResults =
4968 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4969 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4970 Results.push_back(
4971 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4972 Results.push_back(CPUCFGResults.getValue(1));
4973 break;
4974 }
4975 case Intrinsic::loongarch_lddir_d: {
4976 if (!Subtarget.is64Bit()) {
4977 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4978 return;
4979 }
4980 break;
4981 }
4982 }
4983 break;
4984 }
4985 case ISD::READ_REGISTER: {
4986 if (Subtarget.is64Bit())
4987 DAG.getContext()->emitError(
4988 "On LA64, only 64-bit registers can be read.");
4989 else
4990 DAG.getContext()->emitError(
4991 "On LA32, only 32-bit registers can be read.");
4992 Results.push_back(DAG.getUNDEF(VT));
4993 Results.push_back(N->getOperand(0));
4994 break;
4995 }
4997 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4998 break;
4999 }
5000 case ISD::LROUND: {
5001 SDValue Op0 = N->getOperand(0);
5002 EVT OpVT = Op0.getValueType();
5003 RTLIB::Libcall LC =
5004 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5005 MakeLibCallOptions CallOptions;
5006 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5007 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5008 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5009 Results.push_back(Result);
5010 break;
5011 }
5012 case ISD::ATOMIC_CMP_SWAP: {
5014 break;
5015 }
5016 case ISD::TRUNCATE: {
5017 MVT VT = N->getSimpleValueType(0);
5018 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5019 return;
5020
5021 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5022 SDValue In = N->getOperand(0);
5023 EVT InVT = In.getValueType();
5024 EVT InEltVT = InVT.getVectorElementType();
5025 EVT EltVT = VT.getVectorElementType();
5026 unsigned MinElts = VT.getVectorNumElements();
5027 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5028 unsigned InBits = InVT.getSizeInBits();
5029
5030 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5031 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5032 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5033 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5034 for (unsigned I = 0; I < MinElts; ++I)
5035 TruncMask[I] = Scale * I;
5036
5037 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5038 MVT SVT = In.getSimpleValueType().getScalarType();
5039 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5040 SDValue WidenIn =
5041 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5042 DAG.getVectorIdxConstant(0, DL));
5043 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5044 "Illegal vector type in truncation");
5045 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5046 Results.push_back(
5047 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5048 return;
5049 }
5050 }
5051
5052 break;
5053 }
5054 }
5055}
5056
5059 const LoongArchSubtarget &Subtarget) {
5060 if (DCI.isBeforeLegalizeOps())
5061 return SDValue();
5062
5063 SDValue FirstOperand = N->getOperand(0);
5064 SDValue SecondOperand = N->getOperand(1);
5065 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5066 EVT ValTy = N->getValueType(0);
5067 SDLoc DL(N);
5068 uint64_t lsb, msb;
5069 unsigned SMIdx, SMLen;
5070 ConstantSDNode *CN;
5071 SDValue NewOperand;
5072 MVT GRLenVT = Subtarget.getGRLenVT();
5073
5074 // BSTRPICK requires the 32S feature.
5075 if (!Subtarget.has32S())
5076 return SDValue();
5077
5078 // Op's second operand must be a shifted mask.
5079 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5080 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5081 return SDValue();
5082
5083 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5084 // Pattern match BSTRPICK.
5085 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5086 // => BSTRPICK $dst, $src, msb, lsb
5087 // where msb = lsb + len - 1
5088
5089 // The second operand of the shift must be an immediate.
5090 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5091 return SDValue();
5092
5093 lsb = CN->getZExtValue();
5094
5095 // Return if the shifted mask does not start at bit 0 or the sum of its
5096 // length and lsb exceeds the word's size.
5097 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5098 return SDValue();
5099
5100 NewOperand = FirstOperand.getOperand(0);
5101 } else {
5102 // Pattern match BSTRPICK.
5103 // $dst = and $src, (2**len- 1) , if len > 12
5104 // => BSTRPICK $dst, $src, msb, lsb
5105 // where lsb = 0 and msb = len - 1
5106
5107 // If the mask is <= 0xfff, andi can be used instead.
5108 if (CN->getZExtValue() <= 0xfff)
5109 return SDValue();
5110
5111 // Return if the MSB exceeds.
5112 if (SMIdx + SMLen > ValTy.getSizeInBits())
5113 return SDValue();
5114
5115 if (SMIdx > 0) {
5116 // Omit if the constant has more than 2 uses. This a conservative
5117 // decision. Whether it is a win depends on the HW microarchitecture.
5118 // However it should always be better for 1 and 2 uses.
5119 if (CN->use_size() > 2)
5120 return SDValue();
5121 // Return if the constant can be composed by a single LU12I.W.
5122 if ((CN->getZExtValue() & 0xfff) == 0)
5123 return SDValue();
5124 // Return if the constand can be composed by a single ADDI with
5125 // the zero register.
5126 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5127 return SDValue();
5128 }
5129
5130 lsb = SMIdx;
5131 NewOperand = FirstOperand;
5132 }
5133
5134 msb = lsb + SMLen - 1;
5135 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5136 DAG.getConstant(msb, DL, GRLenVT),
5137 DAG.getConstant(lsb, DL, GRLenVT));
5138 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5139 return NR0;
5140 // Try to optimize to
5141 // bstrpick $Rd, $Rs, msb, lsb
5142 // slli $Rd, $Rd, lsb
5143 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5144 DAG.getConstant(lsb, DL, GRLenVT));
5145}
5146
5149 const LoongArchSubtarget &Subtarget) {
5150 // BSTRPICK requires the 32S feature.
5151 if (!Subtarget.has32S())
5152 return SDValue();
5153
5154 if (DCI.isBeforeLegalizeOps())
5155 return SDValue();
5156
5157 // $dst = srl (and $src, Mask), Shamt
5158 // =>
5159 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5160 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5161 //
5162
5163 SDValue FirstOperand = N->getOperand(0);
5164 ConstantSDNode *CN;
5165 EVT ValTy = N->getValueType(0);
5166 SDLoc DL(N);
5167 MVT GRLenVT = Subtarget.getGRLenVT();
5168 unsigned MaskIdx, MaskLen;
5169 uint64_t Shamt;
5170
5171 // The first operand must be an AND and the second operand of the AND must be
5172 // a shifted mask.
5173 if (FirstOperand.getOpcode() != ISD::AND ||
5174 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5175 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5176 return SDValue();
5177
5178 // The second operand (shift amount) must be an immediate.
5179 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5180 return SDValue();
5181
5182 Shamt = CN->getZExtValue();
5183 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5184 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5185 FirstOperand->getOperand(0),
5186 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5187 DAG.getConstant(Shamt, DL, GRLenVT));
5188
5189 return SDValue();
5190}
5191
5192// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5193// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5194static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5195 unsigned Depth) {
5196 // Limit recursion.
5198 return false;
5199 switch (Src.getOpcode()) {
5200 case ISD::SETCC:
5201 case ISD::TRUNCATE:
5202 return Src.getOperand(0).getValueSizeInBits() == Size;
5203 case ISD::FREEZE:
5204 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5205 case ISD::AND:
5206 case ISD::XOR:
5207 case ISD::OR:
5208 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5209 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5210 case ISD::SELECT:
5211 case ISD::VSELECT:
5212 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5213 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5214 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5215 case ISD::BUILD_VECTOR:
5216 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5217 ISD::isBuildVectorAllOnes(Src.getNode());
5218 }
5219 return false;
5220}
5221
5222// Helper to push sign extension of vXi1 SETCC result through bitops.
5224 SDValue Src, const SDLoc &DL) {
5225 switch (Src.getOpcode()) {
5226 case ISD::SETCC:
5227 case ISD::FREEZE:
5228 case ISD::TRUNCATE:
5229 case ISD::BUILD_VECTOR:
5230 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5231 case ISD::AND:
5232 case ISD::XOR:
5233 case ISD::OR:
5234 return DAG.getNode(
5235 Src.getOpcode(), DL, SExtVT,
5236 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5237 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5238 case ISD::SELECT:
5239 case ISD::VSELECT:
5240 return DAG.getSelect(
5241 DL, SExtVT, Src.getOperand(0),
5242 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5243 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5244 }
5245 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5246}
5247
5248static SDValue
5251 const LoongArchSubtarget &Subtarget) {
5252 SDLoc DL(N);
5253 EVT VT = N->getValueType(0);
5254 SDValue Src = N->getOperand(0);
5255 EVT SrcVT = Src.getValueType();
5256
5257 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5258 return SDValue();
5259
5260 bool UseLASX;
5261 unsigned Opc = ISD::DELETED_NODE;
5262 EVT CmpVT = Src.getOperand(0).getValueType();
5263 EVT EltVT = CmpVT.getVectorElementType();
5264
5265 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5266 UseLASX = false;
5267 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5268 CmpVT.getSizeInBits() == 256)
5269 UseLASX = true;
5270 else
5271 return SDValue();
5272
5273 SDValue SrcN1 = Src.getOperand(1);
5274 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5275 default:
5276 break;
5277 case ISD::SETEQ:
5278 // x == 0 => not (vmsknez.b x)
5279 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5281 break;
5282 case ISD::SETGT:
5283 // x > -1 => vmskgez.b x
5284 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5286 break;
5287 case ISD::SETGE:
5288 // x >= 0 => vmskgez.b x
5289 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5291 break;
5292 case ISD::SETLT:
5293 // x < 0 => vmskltz.{b,h,w,d} x
5294 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5295 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5296 EltVT == MVT::i64))
5298 break;
5299 case ISD::SETLE:
5300 // x <= -1 => vmskltz.{b,h,w,d} x
5301 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5302 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5303 EltVT == MVT::i64))
5305 break;
5306 case ISD::SETNE:
5307 // x != 0 => vmsknez.b x
5308 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5310 break;
5311 }
5312
5313 if (Opc == ISD::DELETED_NODE)
5314 return SDValue();
5315
5316 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5318 V = DAG.getZExtOrTrunc(V, DL, T);
5319 return DAG.getBitcast(VT, V);
5320}
5321
5324 const LoongArchSubtarget &Subtarget) {
5325 SDLoc DL(N);
5326 EVT VT = N->getValueType(0);
5327 SDValue Src = N->getOperand(0);
5328 EVT SrcVT = Src.getValueType();
5329 MVT GRLenVT = Subtarget.getGRLenVT();
5330
5331 if (!DCI.isBeforeLegalizeOps())
5332 return SDValue();
5333
5334 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5335 return SDValue();
5336
5337 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5338 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5339 if (Res)
5340 return Res;
5341
5342 // Generate vXi1 using [X]VMSKLTZ
5343 MVT SExtVT;
5344 unsigned Opc;
5345 bool UseLASX = false;
5346 bool PropagateSExt = false;
5347
5348 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5349 EVT CmpVT = Src.getOperand(0).getValueType();
5350 if (CmpVT.getSizeInBits() > 256)
5351 return SDValue();
5352 }
5353
5354 switch (SrcVT.getSimpleVT().SimpleTy) {
5355 default:
5356 return SDValue();
5357 case MVT::v2i1:
5358 SExtVT = MVT::v2i64;
5359 break;
5360 case MVT::v4i1:
5361 SExtVT = MVT::v4i32;
5362 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5363 SExtVT = MVT::v4i64;
5364 UseLASX = true;
5365 PropagateSExt = true;
5366 }
5367 break;
5368 case MVT::v8i1:
5369 SExtVT = MVT::v8i16;
5370 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5371 SExtVT = MVT::v8i32;
5372 UseLASX = true;
5373 PropagateSExt = true;
5374 }
5375 break;
5376 case MVT::v16i1:
5377 SExtVT = MVT::v16i8;
5378 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5379 SExtVT = MVT::v16i16;
5380 UseLASX = true;
5381 PropagateSExt = true;
5382 }
5383 break;
5384 case MVT::v32i1:
5385 SExtVT = MVT::v32i8;
5386 UseLASX = true;
5387 break;
5388 };
5389 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5390 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5391
5392 SDValue V;
5393 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5394 if (Src.getSimpleValueType() == MVT::v32i8) {
5395 SDValue Lo, Hi;
5396 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5397 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5398 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5399 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5400 DAG.getConstant(16, DL, MVT::i8));
5401 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5402 } else if (UseLASX) {
5403 return SDValue();
5404 }
5405 }
5406
5407 if (!V) {
5409 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5410 }
5411
5413 V = DAG.getZExtOrTrunc(V, DL, T);
5414 return DAG.getBitcast(VT, V);
5415}
5416
5419 const LoongArchSubtarget &Subtarget) {
5420 MVT GRLenVT = Subtarget.getGRLenVT();
5421 EVT ValTy = N->getValueType(0);
5422 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5423 ConstantSDNode *CN0, *CN1;
5424 SDLoc DL(N);
5425 unsigned ValBits = ValTy.getSizeInBits();
5426 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5427 unsigned Shamt;
5428 bool SwapAndRetried = false;
5429
5430 // BSTRPICK requires the 32S feature.
5431 if (!Subtarget.has32S())
5432 return SDValue();
5433
5434 if (DCI.isBeforeLegalizeOps())
5435 return SDValue();
5436
5437 if (ValBits != 32 && ValBits != 64)
5438 return SDValue();
5439
5440Retry:
5441 // 1st pattern to match BSTRINS:
5442 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5443 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5444 // =>
5445 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5446 if (N0.getOpcode() == ISD::AND &&
5447 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5448 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5449 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5450 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5451 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5452 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5453 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5454 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5455 (MaskIdx0 + MaskLen0 <= ValBits)) {
5456 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5457 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5458 N1.getOperand(0).getOperand(0),
5459 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5460 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5461 }
5462
5463 // 2nd pattern to match BSTRINS:
5464 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5465 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5466 // =>
5467 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5468 if (N0.getOpcode() == ISD::AND &&
5469 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5470 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5471 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5472 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5473 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5474 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5475 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5476 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5477 (MaskIdx0 + MaskLen0 <= ValBits)) {
5478 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5479 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5480 N1.getOperand(0).getOperand(0),
5481 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5482 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5483 }
5484
5485 // 3rd pattern to match BSTRINS:
5486 // R = or (and X, mask0), (and Y, mask1)
5487 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5488 // =>
5489 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5490 // where msb = lsb + size - 1
5491 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5492 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5493 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5494 (MaskIdx0 + MaskLen0 <= 64) &&
5495 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5496 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5497 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5498 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5499 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5500 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5501 DAG.getConstant(ValBits == 32
5502 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5503 : (MaskIdx0 + MaskLen0 - 1),
5504 DL, GRLenVT),
5505 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5506 }
5507
5508 // 4th pattern to match BSTRINS:
5509 // R = or (and X, mask), (shl Y, shamt)
5510 // where mask = (2**shamt - 1)
5511 // =>
5512 // R = BSTRINS X, Y, ValBits - 1, shamt
5513 // where ValBits = 32 or 64
5514 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5515 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5516 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5517 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5518 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5519 (MaskIdx0 + MaskLen0 <= ValBits)) {
5520 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5521 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5522 N1.getOperand(0),
5523 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5524 DAG.getConstant(Shamt, DL, GRLenVT));
5525 }
5526
5527 // 5th pattern to match BSTRINS:
5528 // R = or (and X, mask), const
5529 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5530 // =>
5531 // R = BSTRINS X, (const >> lsb), msb, lsb
5532 // where msb = lsb + size - 1
5533 if (N0.getOpcode() == ISD::AND &&
5534 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5535 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5536 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5537 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5538 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5539 return DAG.getNode(
5540 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5541 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5542 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5543 : (MaskIdx0 + MaskLen0 - 1),
5544 DL, GRLenVT),
5545 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5546 }
5547
5548 // 6th pattern.
5549 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5550 // by the incoming bits are known to be zero.
5551 // =>
5552 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5553 //
5554 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5555 // pattern is more common than the 1st. So we put the 1st before the 6th in
5556 // order to match as many nodes as possible.
5557 ConstantSDNode *CNMask, *CNShamt;
5558 unsigned MaskIdx, MaskLen;
5559 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5560 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5561 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5562 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5563 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5564 Shamt = CNShamt->getZExtValue();
5565 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5566 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5567 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5568 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5569 N1.getOperand(0).getOperand(0),
5570 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5571 DAG.getConstant(Shamt, DL, GRLenVT));
5572 }
5573 }
5574
5575 // 7th pattern.
5576 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5577 // overwritten by the incoming bits are known to be zero.
5578 // =>
5579 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5580 //
5581 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5582 // before the 7th in order to match as many nodes as possible.
5583 if (N1.getOpcode() == ISD::AND &&
5584 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5585 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5586 N1.getOperand(0).getOpcode() == ISD::SHL &&
5587 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5588 CNShamt->getZExtValue() == MaskIdx) {
5589 APInt ShMask(ValBits, CNMask->getZExtValue());
5590 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5591 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5592 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5593 N1.getOperand(0).getOperand(0),
5594 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5595 DAG.getConstant(MaskIdx, DL, GRLenVT));
5596 }
5597 }
5598
5599 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5600 if (!SwapAndRetried) {
5601 std::swap(N0, N1);
5602 SwapAndRetried = true;
5603 goto Retry;
5604 }
5605
5606 SwapAndRetried = false;
5607Retry2:
5608 // 8th pattern.
5609 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5610 // the incoming bits are known to be zero.
5611 // =>
5612 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5613 //
5614 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5615 // we put it here in order to match as many nodes as possible or generate less
5616 // instructions.
5617 if (N1.getOpcode() == ISD::AND &&
5618 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5619 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5620 APInt ShMask(ValBits, CNMask->getZExtValue());
5621 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5622 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5623 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5624 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5625 N1->getOperand(0),
5626 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5627 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5628 DAG.getConstant(MaskIdx, DL, GRLenVT));
5629 }
5630 }
5631 // Swap N0/N1 and retry.
5632 if (!SwapAndRetried) {
5633 std::swap(N0, N1);
5634 SwapAndRetried = true;
5635 goto Retry2;
5636 }
5637
5638 return SDValue();
5639}
5640
5641static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5642 ExtType = ISD::NON_EXTLOAD;
5643
5644 switch (V.getNode()->getOpcode()) {
5645 case ISD::LOAD: {
5646 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5647 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5648 (LoadNode->getMemoryVT() == MVT::i16)) {
5649 ExtType = LoadNode->getExtensionType();
5650 return true;
5651 }
5652 return false;
5653 }
5654 case ISD::AssertSext: {
5655 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5656 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5657 ExtType = ISD::SEXTLOAD;
5658 return true;
5659 }
5660 return false;
5661 }
5662 case ISD::AssertZext: {
5663 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5664 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5665 ExtType = ISD::ZEXTLOAD;
5666 return true;
5667 }
5668 return false;
5669 }
5670 default:
5671 return false;
5672 }
5673
5674 return false;
5675}
5676
5677// Eliminate redundant truncation and zero-extension nodes.
5678// * Case 1:
5679// +------------+ +------------+ +------------+
5680// | Input1 | | Input2 | | CC |
5681// +------------+ +------------+ +------------+
5682// | | |
5683// V V +----+
5684// +------------+ +------------+ |
5685// | TRUNCATE | | TRUNCATE | |
5686// +------------+ +------------+ |
5687// | | |
5688// V V |
5689// +------------+ +------------+ |
5690// | ZERO_EXT | | ZERO_EXT | |
5691// +------------+ +------------+ |
5692// | | |
5693// | +-------------+ |
5694// V V | |
5695// +----------------+ | |
5696// | AND | | |
5697// +----------------+ | |
5698// | | |
5699// +---------------+ | |
5700// | | |
5701// V V V
5702// +-------------+
5703// | CMP |
5704// +-------------+
5705// * Case 2:
5706// +------------+ +------------+ +-------------+ +------------+ +------------+
5707// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5708// +------------+ +------------+ +-------------+ +------------+ +------------+
5709// | | | | |
5710// V | | | |
5711// +------------+ | | | |
5712// | XOR |<---------------------+ | |
5713// +------------+ | | |
5714// | | | |
5715// V V +---------------+ |
5716// +------------+ +------------+ | |
5717// | TRUNCATE | | TRUNCATE | | +-------------------------+
5718// +------------+ +------------+ | |
5719// | | | |
5720// V V | |
5721// +------------+ +------------+ | |
5722// | ZERO_EXT | | ZERO_EXT | | |
5723// +------------+ +------------+ | |
5724// | | | |
5725// V V | |
5726// +----------------+ | |
5727// | AND | | |
5728// +----------------+ | |
5729// | | |
5730// +---------------+ | |
5731// | | |
5732// V V V
5733// +-------------+
5734// | CMP |
5735// +-------------+
5738 const LoongArchSubtarget &Subtarget) {
5739 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5740
5741 SDNode *AndNode = N->getOperand(0).getNode();
5742 if (AndNode->getOpcode() != ISD::AND)
5743 return SDValue();
5744
5745 SDValue AndInputValue2 = AndNode->getOperand(1);
5746 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5747 return SDValue();
5748
5749 SDValue CmpInputValue = N->getOperand(1);
5750 SDValue AndInputValue1 = AndNode->getOperand(0);
5751 if (AndInputValue1.getOpcode() == ISD::XOR) {
5752 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5753 return SDValue();
5754 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5755 if (!CN || CN->getSExtValue() != -1)
5756 return SDValue();
5757 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5758 if (!CN || CN->getSExtValue() != 0)
5759 return SDValue();
5760 AndInputValue1 = AndInputValue1.getOperand(0);
5761 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5762 return SDValue();
5763 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5764 if (AndInputValue2 != CmpInputValue)
5765 return SDValue();
5766 } else {
5767 return SDValue();
5768 }
5769
5770 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5771 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5772 return SDValue();
5773
5774 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5775 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5776 return SDValue();
5777
5778 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5779 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5780 ISD::LoadExtType ExtType1;
5781 ISD::LoadExtType ExtType2;
5782
5783 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5784 !checkValueWidth(TruncInputValue2, ExtType2))
5785 return SDValue();
5786
5787 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5788 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5789 return SDValue();
5790
5791 if ((ExtType2 != ISD::ZEXTLOAD) &&
5792 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5793 return SDValue();
5794
5795 // These truncation and zero-extension nodes are not necessary, remove them.
5796 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5797 TruncInputValue1, TruncInputValue2);
5798 SDValue NewSetCC =
5799 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5800 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5801 return SDValue(N, 0);
5802}
5803
5804// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5807 const LoongArchSubtarget &Subtarget) {
5808 if (DCI.isBeforeLegalizeOps())
5809 return SDValue();
5810
5811 SDValue Src = N->getOperand(0);
5812 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5813 return SDValue();
5814
5815 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5816 Src.getOperand(0));
5817}
5818
5819// Perform common combines for BR_CC and SELECT_CC conditions.
5820static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5821 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5822 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5823
5824 // As far as arithmetic right shift always saves the sign,
5825 // shift can be omitted.
5826 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5827 // setge (sra X, N), 0 -> setge X, 0
5828 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5829 LHS.getOpcode() == ISD::SRA) {
5830 LHS = LHS.getOperand(0);
5831 return true;
5832 }
5833
5834 if (!ISD::isIntEqualitySetCC(CCVal))
5835 return false;
5836
5837 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5838 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5839 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5840 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5841 // If we're looking for eq 0 instead of ne 0, we need to invert the
5842 // condition.
5843 bool Invert = CCVal == ISD::SETEQ;
5844 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5845 if (Invert)
5846 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5847
5848 RHS = LHS.getOperand(1);
5849 LHS = LHS.getOperand(0);
5850 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5851
5852 CC = DAG.getCondCode(CCVal);
5853 return true;
5854 }
5855
5856 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5857 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5858 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5859 SDValue LHS0 = LHS.getOperand(0);
5860 if (LHS0.getOpcode() == ISD::AND &&
5861 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5862 uint64_t Mask = LHS0.getConstantOperandVal(1);
5863 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5864 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5865 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5866 CC = DAG.getCondCode(CCVal);
5867
5868 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5869 LHS = LHS0.getOperand(0);
5870 if (ShAmt != 0)
5871 LHS =
5872 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5873 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5874 return true;
5875 }
5876 }
5877 }
5878
5879 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5880 // This can occur when legalizing some floating point comparisons.
5881 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5882 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5883 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5884 CC = DAG.getCondCode(CCVal);
5885 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5886 return true;
5887 }
5888
5889 return false;
5890}
5891
5894 const LoongArchSubtarget &Subtarget) {
5895 SDValue LHS = N->getOperand(1);
5896 SDValue RHS = N->getOperand(2);
5897 SDValue CC = N->getOperand(3);
5898 SDLoc DL(N);
5899
5900 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5901 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5902 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5903
5904 return SDValue();
5905}
5906
5909 const LoongArchSubtarget &Subtarget) {
5910 // Transform
5911 SDValue LHS = N->getOperand(0);
5912 SDValue RHS = N->getOperand(1);
5913 SDValue CC = N->getOperand(2);
5914 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5915 SDValue TrueV = N->getOperand(3);
5916 SDValue FalseV = N->getOperand(4);
5917 SDLoc DL(N);
5918 EVT VT = N->getValueType(0);
5919
5920 // If the True and False values are the same, we don't need a select_cc.
5921 if (TrueV == FalseV)
5922 return TrueV;
5923
5924 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5925 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5926 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5928 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5929 if (CCVal == ISD::CondCode::SETGE)
5930 std::swap(TrueV, FalseV);
5931
5932 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5933 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5934 // Only handle simm12, if it is not in this range, it can be considered as
5935 // register.
5936 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5937 isInt<12>(TrueSImm - FalseSImm)) {
5938 SDValue SRA =
5939 DAG.getNode(ISD::SRA, DL, VT, LHS,
5940 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5941 SDValue AND =
5942 DAG.getNode(ISD::AND, DL, VT, SRA,
5943 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5944 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5945 }
5946
5947 if (CCVal == ISD::CondCode::SETGE)
5948 std::swap(TrueV, FalseV);
5949 }
5950
5951 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5952 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5953 {LHS, RHS, CC, TrueV, FalseV});
5954
5955 return SDValue();
5956}
5957
5958template <unsigned N>
5960 SelectionDAG &DAG,
5961 const LoongArchSubtarget &Subtarget,
5962 bool IsSigned = false) {
5963 SDLoc DL(Node);
5964 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5965 // Check the ImmArg.
5966 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5967 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5968 DAG.getContext()->emitError(Node->getOperationName(0) +
5969 ": argument out of range.");
5970 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5971 }
5972 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5973}
5974
5975template <unsigned N>
5976static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5977 SelectionDAG &DAG, bool IsSigned = false) {
5978 SDLoc DL(Node);
5979 EVT ResTy = Node->getValueType(0);
5980 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5981
5982 // Check the ImmArg.
5983 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5984 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5985 DAG.getContext()->emitError(Node->getOperationName(0) +
5986 ": argument out of range.");
5987 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5988 }
5989 return DAG.getConstant(
5991 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5992 DL, ResTy);
5993}
5994
5996 SDLoc DL(Node);
5997 EVT ResTy = Node->getValueType(0);
5998 SDValue Vec = Node->getOperand(2);
5999 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6000 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6001}
6002
6004 SDLoc DL(Node);
6005 EVT ResTy = Node->getValueType(0);
6006 SDValue One = DAG.getConstant(1, DL, ResTy);
6007 SDValue Bit =
6008 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6009
6010 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6011 DAG.getNOT(DL, Bit, ResTy));
6012}
6013
6014template <unsigned N>
6016 SDLoc DL(Node);
6017 EVT ResTy = Node->getValueType(0);
6018 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6019 // Check the unsigned ImmArg.
6020 if (!isUInt<N>(CImm->getZExtValue())) {
6021 DAG.getContext()->emitError(Node->getOperationName(0) +
6022 ": argument out of range.");
6023 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6024 }
6025
6026 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6027 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6028
6029 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6030}
6031
6032template <unsigned N>
6034 SDLoc DL(Node);
6035 EVT ResTy = Node->getValueType(0);
6036 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6037 // Check the unsigned ImmArg.
6038 if (!isUInt<N>(CImm->getZExtValue())) {
6039 DAG.getContext()->emitError(Node->getOperationName(0) +
6040 ": argument out of range.");
6041 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6042 }
6043
6044 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6045 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6046 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6047}
6048
6049template <unsigned N>
6051 SDLoc DL(Node);
6052 EVT ResTy = Node->getValueType(0);
6053 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6054 // Check the unsigned ImmArg.
6055 if (!isUInt<N>(CImm->getZExtValue())) {
6056 DAG.getContext()->emitError(Node->getOperationName(0) +
6057 ": argument out of range.");
6058 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6059 }
6060
6061 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6062 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6063 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6064}
6065
6066template <unsigned W>
6068 unsigned ResOp) {
6069 unsigned Imm = N->getConstantOperandVal(2);
6070 if (!isUInt<W>(Imm)) {
6071 const StringRef ErrorMsg = "argument out of range";
6072 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6073 return DAG.getUNDEF(N->getValueType(0));
6074 }
6075 SDLoc DL(N);
6076 SDValue Vec = N->getOperand(1);
6077 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6079 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6080}
6081
6082static SDValue
6085 const LoongArchSubtarget &Subtarget) {
6086 SDLoc DL(N);
6087 switch (N->getConstantOperandVal(0)) {
6088 default:
6089 break;
6090 case Intrinsic::loongarch_lsx_vadd_b:
6091 case Intrinsic::loongarch_lsx_vadd_h:
6092 case Intrinsic::loongarch_lsx_vadd_w:
6093 case Intrinsic::loongarch_lsx_vadd_d:
6094 case Intrinsic::loongarch_lasx_xvadd_b:
6095 case Intrinsic::loongarch_lasx_xvadd_h:
6096 case Intrinsic::loongarch_lasx_xvadd_w:
6097 case Intrinsic::loongarch_lasx_xvadd_d:
6098 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6099 N->getOperand(2));
6100 case Intrinsic::loongarch_lsx_vaddi_bu:
6101 case Intrinsic::loongarch_lsx_vaddi_hu:
6102 case Intrinsic::loongarch_lsx_vaddi_wu:
6103 case Intrinsic::loongarch_lsx_vaddi_du:
6104 case Intrinsic::loongarch_lasx_xvaddi_bu:
6105 case Intrinsic::loongarch_lasx_xvaddi_hu:
6106 case Intrinsic::loongarch_lasx_xvaddi_wu:
6107 case Intrinsic::loongarch_lasx_xvaddi_du:
6108 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6109 lowerVectorSplatImm<5>(N, 2, DAG));
6110 case Intrinsic::loongarch_lsx_vsub_b:
6111 case Intrinsic::loongarch_lsx_vsub_h:
6112 case Intrinsic::loongarch_lsx_vsub_w:
6113 case Intrinsic::loongarch_lsx_vsub_d:
6114 case Intrinsic::loongarch_lasx_xvsub_b:
6115 case Intrinsic::loongarch_lasx_xvsub_h:
6116 case Intrinsic::loongarch_lasx_xvsub_w:
6117 case Intrinsic::loongarch_lasx_xvsub_d:
6118 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6119 N->getOperand(2));
6120 case Intrinsic::loongarch_lsx_vsubi_bu:
6121 case Intrinsic::loongarch_lsx_vsubi_hu:
6122 case Intrinsic::loongarch_lsx_vsubi_wu:
6123 case Intrinsic::loongarch_lsx_vsubi_du:
6124 case Intrinsic::loongarch_lasx_xvsubi_bu:
6125 case Intrinsic::loongarch_lasx_xvsubi_hu:
6126 case Intrinsic::loongarch_lasx_xvsubi_wu:
6127 case Intrinsic::loongarch_lasx_xvsubi_du:
6128 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6129 lowerVectorSplatImm<5>(N, 2, DAG));
6130 case Intrinsic::loongarch_lsx_vneg_b:
6131 case Intrinsic::loongarch_lsx_vneg_h:
6132 case Intrinsic::loongarch_lsx_vneg_w:
6133 case Intrinsic::loongarch_lsx_vneg_d:
6134 case Intrinsic::loongarch_lasx_xvneg_b:
6135 case Intrinsic::loongarch_lasx_xvneg_h:
6136 case Intrinsic::loongarch_lasx_xvneg_w:
6137 case Intrinsic::loongarch_lasx_xvneg_d:
6138 return DAG.getNode(
6139 ISD::SUB, DL, N->getValueType(0),
6140 DAG.getConstant(
6141 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6142 /*isSigned=*/true),
6143 SDLoc(N), N->getValueType(0)),
6144 N->getOperand(1));
6145 case Intrinsic::loongarch_lsx_vmax_b:
6146 case Intrinsic::loongarch_lsx_vmax_h:
6147 case Intrinsic::loongarch_lsx_vmax_w:
6148 case Intrinsic::loongarch_lsx_vmax_d:
6149 case Intrinsic::loongarch_lasx_xvmax_b:
6150 case Intrinsic::loongarch_lasx_xvmax_h:
6151 case Intrinsic::loongarch_lasx_xvmax_w:
6152 case Intrinsic::loongarch_lasx_xvmax_d:
6153 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6154 N->getOperand(2));
6155 case Intrinsic::loongarch_lsx_vmax_bu:
6156 case Intrinsic::loongarch_lsx_vmax_hu:
6157 case Intrinsic::loongarch_lsx_vmax_wu:
6158 case Intrinsic::loongarch_lsx_vmax_du:
6159 case Intrinsic::loongarch_lasx_xvmax_bu:
6160 case Intrinsic::loongarch_lasx_xvmax_hu:
6161 case Intrinsic::loongarch_lasx_xvmax_wu:
6162 case Intrinsic::loongarch_lasx_xvmax_du:
6163 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6164 N->getOperand(2));
6165 case Intrinsic::loongarch_lsx_vmaxi_b:
6166 case Intrinsic::loongarch_lsx_vmaxi_h:
6167 case Intrinsic::loongarch_lsx_vmaxi_w:
6168 case Intrinsic::loongarch_lsx_vmaxi_d:
6169 case Intrinsic::loongarch_lasx_xvmaxi_b:
6170 case Intrinsic::loongarch_lasx_xvmaxi_h:
6171 case Intrinsic::loongarch_lasx_xvmaxi_w:
6172 case Intrinsic::loongarch_lasx_xvmaxi_d:
6173 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6174 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6175 case Intrinsic::loongarch_lsx_vmaxi_bu:
6176 case Intrinsic::loongarch_lsx_vmaxi_hu:
6177 case Intrinsic::loongarch_lsx_vmaxi_wu:
6178 case Intrinsic::loongarch_lsx_vmaxi_du:
6179 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6180 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6181 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6182 case Intrinsic::loongarch_lasx_xvmaxi_du:
6183 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6184 lowerVectorSplatImm<5>(N, 2, DAG));
6185 case Intrinsic::loongarch_lsx_vmin_b:
6186 case Intrinsic::loongarch_lsx_vmin_h:
6187 case Intrinsic::loongarch_lsx_vmin_w:
6188 case Intrinsic::loongarch_lsx_vmin_d:
6189 case Intrinsic::loongarch_lasx_xvmin_b:
6190 case Intrinsic::loongarch_lasx_xvmin_h:
6191 case Intrinsic::loongarch_lasx_xvmin_w:
6192 case Intrinsic::loongarch_lasx_xvmin_d:
6193 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6194 N->getOperand(2));
6195 case Intrinsic::loongarch_lsx_vmin_bu:
6196 case Intrinsic::loongarch_lsx_vmin_hu:
6197 case Intrinsic::loongarch_lsx_vmin_wu:
6198 case Intrinsic::loongarch_lsx_vmin_du:
6199 case Intrinsic::loongarch_lasx_xvmin_bu:
6200 case Intrinsic::loongarch_lasx_xvmin_hu:
6201 case Intrinsic::loongarch_lasx_xvmin_wu:
6202 case Intrinsic::loongarch_lasx_xvmin_du:
6203 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6204 N->getOperand(2));
6205 case Intrinsic::loongarch_lsx_vmini_b:
6206 case Intrinsic::loongarch_lsx_vmini_h:
6207 case Intrinsic::loongarch_lsx_vmini_w:
6208 case Intrinsic::loongarch_lsx_vmini_d:
6209 case Intrinsic::loongarch_lasx_xvmini_b:
6210 case Intrinsic::loongarch_lasx_xvmini_h:
6211 case Intrinsic::loongarch_lasx_xvmini_w:
6212 case Intrinsic::loongarch_lasx_xvmini_d:
6213 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6214 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6215 case Intrinsic::loongarch_lsx_vmini_bu:
6216 case Intrinsic::loongarch_lsx_vmini_hu:
6217 case Intrinsic::loongarch_lsx_vmini_wu:
6218 case Intrinsic::loongarch_lsx_vmini_du:
6219 case Intrinsic::loongarch_lasx_xvmini_bu:
6220 case Intrinsic::loongarch_lasx_xvmini_hu:
6221 case Intrinsic::loongarch_lasx_xvmini_wu:
6222 case Intrinsic::loongarch_lasx_xvmini_du:
6223 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6224 lowerVectorSplatImm<5>(N, 2, DAG));
6225 case Intrinsic::loongarch_lsx_vmul_b:
6226 case Intrinsic::loongarch_lsx_vmul_h:
6227 case Intrinsic::loongarch_lsx_vmul_w:
6228 case Intrinsic::loongarch_lsx_vmul_d:
6229 case Intrinsic::loongarch_lasx_xvmul_b:
6230 case Intrinsic::loongarch_lasx_xvmul_h:
6231 case Intrinsic::loongarch_lasx_xvmul_w:
6232 case Intrinsic::loongarch_lasx_xvmul_d:
6233 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6234 N->getOperand(2));
6235 case Intrinsic::loongarch_lsx_vmadd_b:
6236 case Intrinsic::loongarch_lsx_vmadd_h:
6237 case Intrinsic::loongarch_lsx_vmadd_w:
6238 case Intrinsic::loongarch_lsx_vmadd_d:
6239 case Intrinsic::loongarch_lasx_xvmadd_b:
6240 case Intrinsic::loongarch_lasx_xvmadd_h:
6241 case Intrinsic::loongarch_lasx_xvmadd_w:
6242 case Intrinsic::loongarch_lasx_xvmadd_d: {
6243 EVT ResTy = N->getValueType(0);
6244 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6245 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6246 N->getOperand(3)));
6247 }
6248 case Intrinsic::loongarch_lsx_vmsub_b:
6249 case Intrinsic::loongarch_lsx_vmsub_h:
6250 case Intrinsic::loongarch_lsx_vmsub_w:
6251 case Intrinsic::loongarch_lsx_vmsub_d:
6252 case Intrinsic::loongarch_lasx_xvmsub_b:
6253 case Intrinsic::loongarch_lasx_xvmsub_h:
6254 case Intrinsic::loongarch_lasx_xvmsub_w:
6255 case Intrinsic::loongarch_lasx_xvmsub_d: {
6256 EVT ResTy = N->getValueType(0);
6257 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6258 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6259 N->getOperand(3)));
6260 }
6261 case Intrinsic::loongarch_lsx_vdiv_b:
6262 case Intrinsic::loongarch_lsx_vdiv_h:
6263 case Intrinsic::loongarch_lsx_vdiv_w:
6264 case Intrinsic::loongarch_lsx_vdiv_d:
6265 case Intrinsic::loongarch_lasx_xvdiv_b:
6266 case Intrinsic::loongarch_lasx_xvdiv_h:
6267 case Intrinsic::loongarch_lasx_xvdiv_w:
6268 case Intrinsic::loongarch_lasx_xvdiv_d:
6269 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6270 N->getOperand(2));
6271 case Intrinsic::loongarch_lsx_vdiv_bu:
6272 case Intrinsic::loongarch_lsx_vdiv_hu:
6273 case Intrinsic::loongarch_lsx_vdiv_wu:
6274 case Intrinsic::loongarch_lsx_vdiv_du:
6275 case Intrinsic::loongarch_lasx_xvdiv_bu:
6276 case Intrinsic::loongarch_lasx_xvdiv_hu:
6277 case Intrinsic::loongarch_lasx_xvdiv_wu:
6278 case Intrinsic::loongarch_lasx_xvdiv_du:
6279 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6280 N->getOperand(2));
6281 case Intrinsic::loongarch_lsx_vmod_b:
6282 case Intrinsic::loongarch_lsx_vmod_h:
6283 case Intrinsic::loongarch_lsx_vmod_w:
6284 case Intrinsic::loongarch_lsx_vmod_d:
6285 case Intrinsic::loongarch_lasx_xvmod_b:
6286 case Intrinsic::loongarch_lasx_xvmod_h:
6287 case Intrinsic::loongarch_lasx_xvmod_w:
6288 case Intrinsic::loongarch_lasx_xvmod_d:
6289 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6290 N->getOperand(2));
6291 case Intrinsic::loongarch_lsx_vmod_bu:
6292 case Intrinsic::loongarch_lsx_vmod_hu:
6293 case Intrinsic::loongarch_lsx_vmod_wu:
6294 case Intrinsic::loongarch_lsx_vmod_du:
6295 case Intrinsic::loongarch_lasx_xvmod_bu:
6296 case Intrinsic::loongarch_lasx_xvmod_hu:
6297 case Intrinsic::loongarch_lasx_xvmod_wu:
6298 case Intrinsic::loongarch_lasx_xvmod_du:
6299 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6300 N->getOperand(2));
6301 case Intrinsic::loongarch_lsx_vand_v:
6302 case Intrinsic::loongarch_lasx_xvand_v:
6303 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6304 N->getOperand(2));
6305 case Intrinsic::loongarch_lsx_vor_v:
6306 case Intrinsic::loongarch_lasx_xvor_v:
6307 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6308 N->getOperand(2));
6309 case Intrinsic::loongarch_lsx_vxor_v:
6310 case Intrinsic::loongarch_lasx_xvxor_v:
6311 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6312 N->getOperand(2));
6313 case Intrinsic::loongarch_lsx_vnor_v:
6314 case Intrinsic::loongarch_lasx_xvnor_v: {
6315 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6316 N->getOperand(2));
6317 return DAG.getNOT(DL, Res, Res->getValueType(0));
6318 }
6319 case Intrinsic::loongarch_lsx_vandi_b:
6320 case Intrinsic::loongarch_lasx_xvandi_b:
6321 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6322 lowerVectorSplatImm<8>(N, 2, DAG));
6323 case Intrinsic::loongarch_lsx_vori_b:
6324 case Intrinsic::loongarch_lasx_xvori_b:
6325 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6326 lowerVectorSplatImm<8>(N, 2, DAG));
6327 case Intrinsic::loongarch_lsx_vxori_b:
6328 case Intrinsic::loongarch_lasx_xvxori_b:
6329 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6330 lowerVectorSplatImm<8>(N, 2, DAG));
6331 case Intrinsic::loongarch_lsx_vsll_b:
6332 case Intrinsic::loongarch_lsx_vsll_h:
6333 case Intrinsic::loongarch_lsx_vsll_w:
6334 case Intrinsic::loongarch_lsx_vsll_d:
6335 case Intrinsic::loongarch_lasx_xvsll_b:
6336 case Intrinsic::loongarch_lasx_xvsll_h:
6337 case Intrinsic::loongarch_lasx_xvsll_w:
6338 case Intrinsic::loongarch_lasx_xvsll_d:
6339 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6340 truncateVecElts(N, DAG));
6341 case Intrinsic::loongarch_lsx_vslli_b:
6342 case Intrinsic::loongarch_lasx_xvslli_b:
6343 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6344 lowerVectorSplatImm<3>(N, 2, DAG));
6345 case Intrinsic::loongarch_lsx_vslli_h:
6346 case Intrinsic::loongarch_lasx_xvslli_h:
6347 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6348 lowerVectorSplatImm<4>(N, 2, DAG));
6349 case Intrinsic::loongarch_lsx_vslli_w:
6350 case Intrinsic::loongarch_lasx_xvslli_w:
6351 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6352 lowerVectorSplatImm<5>(N, 2, DAG));
6353 case Intrinsic::loongarch_lsx_vslli_d:
6354 case Intrinsic::loongarch_lasx_xvslli_d:
6355 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6356 lowerVectorSplatImm<6>(N, 2, DAG));
6357 case Intrinsic::loongarch_lsx_vsrl_b:
6358 case Intrinsic::loongarch_lsx_vsrl_h:
6359 case Intrinsic::loongarch_lsx_vsrl_w:
6360 case Intrinsic::loongarch_lsx_vsrl_d:
6361 case Intrinsic::loongarch_lasx_xvsrl_b:
6362 case Intrinsic::loongarch_lasx_xvsrl_h:
6363 case Intrinsic::loongarch_lasx_xvsrl_w:
6364 case Intrinsic::loongarch_lasx_xvsrl_d:
6365 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6366 truncateVecElts(N, DAG));
6367 case Intrinsic::loongarch_lsx_vsrli_b:
6368 case Intrinsic::loongarch_lasx_xvsrli_b:
6369 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6370 lowerVectorSplatImm<3>(N, 2, DAG));
6371 case Intrinsic::loongarch_lsx_vsrli_h:
6372 case Intrinsic::loongarch_lasx_xvsrli_h:
6373 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6374 lowerVectorSplatImm<4>(N, 2, DAG));
6375 case Intrinsic::loongarch_lsx_vsrli_w:
6376 case Intrinsic::loongarch_lasx_xvsrli_w:
6377 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6378 lowerVectorSplatImm<5>(N, 2, DAG));
6379 case Intrinsic::loongarch_lsx_vsrli_d:
6380 case Intrinsic::loongarch_lasx_xvsrli_d:
6381 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6382 lowerVectorSplatImm<6>(N, 2, DAG));
6383 case Intrinsic::loongarch_lsx_vsra_b:
6384 case Intrinsic::loongarch_lsx_vsra_h:
6385 case Intrinsic::loongarch_lsx_vsra_w:
6386 case Intrinsic::loongarch_lsx_vsra_d:
6387 case Intrinsic::loongarch_lasx_xvsra_b:
6388 case Intrinsic::loongarch_lasx_xvsra_h:
6389 case Intrinsic::loongarch_lasx_xvsra_w:
6390 case Intrinsic::loongarch_lasx_xvsra_d:
6391 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6392 truncateVecElts(N, DAG));
6393 case Intrinsic::loongarch_lsx_vsrai_b:
6394 case Intrinsic::loongarch_lasx_xvsrai_b:
6395 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6396 lowerVectorSplatImm<3>(N, 2, DAG));
6397 case Intrinsic::loongarch_lsx_vsrai_h:
6398 case Intrinsic::loongarch_lasx_xvsrai_h:
6399 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6400 lowerVectorSplatImm<4>(N, 2, DAG));
6401 case Intrinsic::loongarch_lsx_vsrai_w:
6402 case Intrinsic::loongarch_lasx_xvsrai_w:
6403 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6404 lowerVectorSplatImm<5>(N, 2, DAG));
6405 case Intrinsic::loongarch_lsx_vsrai_d:
6406 case Intrinsic::loongarch_lasx_xvsrai_d:
6407 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6408 lowerVectorSplatImm<6>(N, 2, DAG));
6409 case Intrinsic::loongarch_lsx_vclz_b:
6410 case Intrinsic::loongarch_lsx_vclz_h:
6411 case Intrinsic::loongarch_lsx_vclz_w:
6412 case Intrinsic::loongarch_lsx_vclz_d:
6413 case Intrinsic::loongarch_lasx_xvclz_b:
6414 case Intrinsic::loongarch_lasx_xvclz_h:
6415 case Intrinsic::loongarch_lasx_xvclz_w:
6416 case Intrinsic::loongarch_lasx_xvclz_d:
6417 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6418 case Intrinsic::loongarch_lsx_vpcnt_b:
6419 case Intrinsic::loongarch_lsx_vpcnt_h:
6420 case Intrinsic::loongarch_lsx_vpcnt_w:
6421 case Intrinsic::loongarch_lsx_vpcnt_d:
6422 case Intrinsic::loongarch_lasx_xvpcnt_b:
6423 case Intrinsic::loongarch_lasx_xvpcnt_h:
6424 case Intrinsic::loongarch_lasx_xvpcnt_w:
6425 case Intrinsic::loongarch_lasx_xvpcnt_d:
6426 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6427 case Intrinsic::loongarch_lsx_vbitclr_b:
6428 case Intrinsic::loongarch_lsx_vbitclr_h:
6429 case Intrinsic::loongarch_lsx_vbitclr_w:
6430 case Intrinsic::loongarch_lsx_vbitclr_d:
6431 case Intrinsic::loongarch_lasx_xvbitclr_b:
6432 case Intrinsic::loongarch_lasx_xvbitclr_h:
6433 case Intrinsic::loongarch_lasx_xvbitclr_w:
6434 case Intrinsic::loongarch_lasx_xvbitclr_d:
6435 return lowerVectorBitClear(N, DAG);
6436 case Intrinsic::loongarch_lsx_vbitclri_b:
6437 case Intrinsic::loongarch_lasx_xvbitclri_b:
6438 return lowerVectorBitClearImm<3>(N, DAG);
6439 case Intrinsic::loongarch_lsx_vbitclri_h:
6440 case Intrinsic::loongarch_lasx_xvbitclri_h:
6441 return lowerVectorBitClearImm<4>(N, DAG);
6442 case Intrinsic::loongarch_lsx_vbitclri_w:
6443 case Intrinsic::loongarch_lasx_xvbitclri_w:
6444 return lowerVectorBitClearImm<5>(N, DAG);
6445 case Intrinsic::loongarch_lsx_vbitclri_d:
6446 case Intrinsic::loongarch_lasx_xvbitclri_d:
6447 return lowerVectorBitClearImm<6>(N, DAG);
6448 case Intrinsic::loongarch_lsx_vbitset_b:
6449 case Intrinsic::loongarch_lsx_vbitset_h:
6450 case Intrinsic::loongarch_lsx_vbitset_w:
6451 case Intrinsic::loongarch_lsx_vbitset_d:
6452 case Intrinsic::loongarch_lasx_xvbitset_b:
6453 case Intrinsic::loongarch_lasx_xvbitset_h:
6454 case Intrinsic::loongarch_lasx_xvbitset_w:
6455 case Intrinsic::loongarch_lasx_xvbitset_d: {
6456 EVT VecTy = N->getValueType(0);
6457 SDValue One = DAG.getConstant(1, DL, VecTy);
6458 return DAG.getNode(
6459 ISD::OR, DL, VecTy, N->getOperand(1),
6460 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6461 }
6462 case Intrinsic::loongarch_lsx_vbitseti_b:
6463 case Intrinsic::loongarch_lasx_xvbitseti_b:
6464 return lowerVectorBitSetImm<3>(N, DAG);
6465 case Intrinsic::loongarch_lsx_vbitseti_h:
6466 case Intrinsic::loongarch_lasx_xvbitseti_h:
6467 return lowerVectorBitSetImm<4>(N, DAG);
6468 case Intrinsic::loongarch_lsx_vbitseti_w:
6469 case Intrinsic::loongarch_lasx_xvbitseti_w:
6470 return lowerVectorBitSetImm<5>(N, DAG);
6471 case Intrinsic::loongarch_lsx_vbitseti_d:
6472 case Intrinsic::loongarch_lasx_xvbitseti_d:
6473 return lowerVectorBitSetImm<6>(N, DAG);
6474 case Intrinsic::loongarch_lsx_vbitrev_b:
6475 case Intrinsic::loongarch_lsx_vbitrev_h:
6476 case Intrinsic::loongarch_lsx_vbitrev_w:
6477 case Intrinsic::loongarch_lsx_vbitrev_d:
6478 case Intrinsic::loongarch_lasx_xvbitrev_b:
6479 case Intrinsic::loongarch_lasx_xvbitrev_h:
6480 case Intrinsic::loongarch_lasx_xvbitrev_w:
6481 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6482 EVT VecTy = N->getValueType(0);
6483 SDValue One = DAG.getConstant(1, DL, VecTy);
6484 return DAG.getNode(
6485 ISD::XOR, DL, VecTy, N->getOperand(1),
6486 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6487 }
6488 case Intrinsic::loongarch_lsx_vbitrevi_b:
6489 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6490 return lowerVectorBitRevImm<3>(N, DAG);
6491 case Intrinsic::loongarch_lsx_vbitrevi_h:
6492 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6493 return lowerVectorBitRevImm<4>(N, DAG);
6494 case Intrinsic::loongarch_lsx_vbitrevi_w:
6495 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6496 return lowerVectorBitRevImm<5>(N, DAG);
6497 case Intrinsic::loongarch_lsx_vbitrevi_d:
6498 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6499 return lowerVectorBitRevImm<6>(N, DAG);
6500 case Intrinsic::loongarch_lsx_vfadd_s:
6501 case Intrinsic::loongarch_lsx_vfadd_d:
6502 case Intrinsic::loongarch_lasx_xvfadd_s:
6503 case Intrinsic::loongarch_lasx_xvfadd_d:
6504 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6505 N->getOperand(2));
6506 case Intrinsic::loongarch_lsx_vfsub_s:
6507 case Intrinsic::loongarch_lsx_vfsub_d:
6508 case Intrinsic::loongarch_lasx_xvfsub_s:
6509 case Intrinsic::loongarch_lasx_xvfsub_d:
6510 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6511 N->getOperand(2));
6512 case Intrinsic::loongarch_lsx_vfmul_s:
6513 case Intrinsic::loongarch_lsx_vfmul_d:
6514 case Intrinsic::loongarch_lasx_xvfmul_s:
6515 case Intrinsic::loongarch_lasx_xvfmul_d:
6516 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6517 N->getOperand(2));
6518 case Intrinsic::loongarch_lsx_vfdiv_s:
6519 case Intrinsic::loongarch_lsx_vfdiv_d:
6520 case Intrinsic::loongarch_lasx_xvfdiv_s:
6521 case Intrinsic::loongarch_lasx_xvfdiv_d:
6522 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6523 N->getOperand(2));
6524 case Intrinsic::loongarch_lsx_vfmadd_s:
6525 case Intrinsic::loongarch_lsx_vfmadd_d:
6526 case Intrinsic::loongarch_lasx_xvfmadd_s:
6527 case Intrinsic::loongarch_lasx_xvfmadd_d:
6528 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6529 N->getOperand(2), N->getOperand(3));
6530 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6531 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6532 N->getOperand(1), N->getOperand(2),
6533 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6534 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6535 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6536 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6537 N->getOperand(1), N->getOperand(2),
6538 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6539 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6540 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6541 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6542 N->getOperand(1), N->getOperand(2),
6543 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6544 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6545 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6546 N->getOperand(1), N->getOperand(2),
6547 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6548 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6549 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6550 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6551 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6552 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6553 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6554 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6555 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6556 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6557 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6558 N->getOperand(1)));
6559 case Intrinsic::loongarch_lsx_vreplve_b:
6560 case Intrinsic::loongarch_lsx_vreplve_h:
6561 case Intrinsic::loongarch_lsx_vreplve_w:
6562 case Intrinsic::loongarch_lsx_vreplve_d:
6563 case Intrinsic::loongarch_lasx_xvreplve_b:
6564 case Intrinsic::loongarch_lasx_xvreplve_h:
6565 case Intrinsic::loongarch_lasx_xvreplve_w:
6566 case Intrinsic::loongarch_lasx_xvreplve_d:
6567 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6568 N->getOperand(1),
6569 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6570 N->getOperand(2)));
6571 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6572 if (!Subtarget.is64Bit())
6574 break;
6575 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6576 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6577 if (!Subtarget.is64Bit())
6579 break;
6580 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6581 if (!Subtarget.is64Bit())
6583 break;
6584 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6585 if (!Subtarget.is64Bit())
6587 break;
6588 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6589 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6590 if (!Subtarget.is64Bit())
6592 break;
6593 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6594 if (!Subtarget.is64Bit())
6596 break;
6597 case Intrinsic::loongarch_lsx_bz_b:
6598 case Intrinsic::loongarch_lsx_bz_h:
6599 case Intrinsic::loongarch_lsx_bz_w:
6600 case Intrinsic::loongarch_lsx_bz_d:
6601 case Intrinsic::loongarch_lasx_xbz_b:
6602 case Intrinsic::loongarch_lasx_xbz_h:
6603 case Intrinsic::loongarch_lasx_xbz_w:
6604 case Intrinsic::loongarch_lasx_xbz_d:
6605 if (!Subtarget.is64Bit())
6606 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6607 N->getOperand(1));
6608 break;
6609 case Intrinsic::loongarch_lsx_bz_v:
6610 case Intrinsic::loongarch_lasx_xbz_v:
6611 if (!Subtarget.is64Bit())
6612 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6613 N->getOperand(1));
6614 break;
6615 case Intrinsic::loongarch_lsx_bnz_b:
6616 case Intrinsic::loongarch_lsx_bnz_h:
6617 case Intrinsic::loongarch_lsx_bnz_w:
6618 case Intrinsic::loongarch_lsx_bnz_d:
6619 case Intrinsic::loongarch_lasx_xbnz_b:
6620 case Intrinsic::loongarch_lasx_xbnz_h:
6621 case Intrinsic::loongarch_lasx_xbnz_w:
6622 case Intrinsic::loongarch_lasx_xbnz_d:
6623 if (!Subtarget.is64Bit())
6624 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6625 N->getOperand(1));
6626 break;
6627 case Intrinsic::loongarch_lsx_bnz_v:
6628 case Intrinsic::loongarch_lasx_xbnz_v:
6629 if (!Subtarget.is64Bit())
6630 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6631 N->getOperand(1));
6632 break;
6633 }
6634 return SDValue();
6635}
6636
6639 const LoongArchSubtarget &Subtarget) {
6640 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6641 // conversion is unnecessary and can be replaced with the
6642 // MOVFR2GR_S_LA64 operand.
6643 SDValue Op0 = N->getOperand(0);
6645 return Op0.getOperand(0);
6646 return SDValue();
6647}
6648
6651 const LoongArchSubtarget &Subtarget) {
6652 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6653 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6654 // operand.
6655 SDValue Op0 = N->getOperand(0);
6657 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6658 "Unexpected value type!");
6659 return Op0.getOperand(0);
6660 }
6661 return SDValue();
6662}
6663
6666 const LoongArchSubtarget &Subtarget) {
6667 MVT VT = N->getSimpleValueType(0);
6668 unsigned NumBits = VT.getScalarSizeInBits();
6669
6670 // Simplify the inputs.
6671 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6672 APInt DemandedMask(APInt::getAllOnes(NumBits));
6673 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6674 return SDValue(N, 0);
6675
6676 return SDValue();
6677}
6678
6679static SDValue
6682 const LoongArchSubtarget &Subtarget) {
6683 SDValue Op0 = N->getOperand(0);
6684 SDLoc DL(N);
6685
6686 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6687 // redundant. Instead, use BuildPairF64's operands directly.
6689 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6690
6691 if (Op0->isUndef()) {
6692 SDValue Lo = DAG.getUNDEF(MVT::i32);
6693 SDValue Hi = DAG.getUNDEF(MVT::i32);
6694 return DCI.CombineTo(N, Lo, Hi);
6695 }
6696
6697 // It's cheaper to materialise two 32-bit integers than to load a double
6698 // from the constant pool and transfer it to integer registers through the
6699 // stack.
6701 APInt V = C->getValueAPF().bitcastToAPInt();
6702 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6703 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6704 return DCI.CombineTo(N, Lo, Hi);
6705 }
6706
6707 return SDValue();
6708}
6709
6710static SDValue
6713 const LoongArchSubtarget &Subtarget) {
6714 if (!DCI.isBeforeLegalize())
6715 return SDValue();
6716
6717 MVT EltVT = N->getSimpleValueType(0);
6718 SDValue Vec = N->getOperand(0);
6719 EVT VecTy = Vec->getValueType(0);
6720 SDValue Idx = N->getOperand(1);
6721 unsigned IdxOp = Idx.getOpcode();
6722 SDLoc DL(N);
6723
6724 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6725 return SDValue();
6726
6727 // Combine:
6728 // t2 = truncate t1
6729 // t3 = {zero/sign/any}_extend t2
6730 // t4 = extract_vector_elt t0, t3
6731 // to:
6732 // t4 = extract_vector_elt t0, t1
6733 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6734 IdxOp == ISD::ANY_EXTEND) {
6735 SDValue IdxOrig = Idx.getOperand(0);
6736 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6737 return SDValue();
6738
6739 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6740 IdxOrig.getOperand(0));
6741 }
6742
6743 return SDValue();
6744}
6745
6747 DAGCombinerInfo &DCI) const {
6748 SelectionDAG &DAG = DCI.DAG;
6749 switch (N->getOpcode()) {
6750 default:
6751 break;
6752 case ISD::AND:
6753 return performANDCombine(N, DAG, DCI, Subtarget);
6754 case ISD::OR:
6755 return performORCombine(N, DAG, DCI, Subtarget);
6756 case ISD::SETCC:
6757 return performSETCCCombine(N, DAG, DCI, Subtarget);
6758 case ISD::SRL:
6759 return performSRLCombine(N, DAG, DCI, Subtarget);
6760 case ISD::BITCAST:
6761 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6763 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6765 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6767 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6769 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6771 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6773 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6776 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6778 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6780 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6781 }
6782 return SDValue();
6783}
6784
6787 if (!ZeroDivCheck)
6788 return MBB;
6789
6790 // Build instructions:
6791 // MBB:
6792 // div(or mod) $dst, $dividend, $divisor
6793 // bne $divisor, $zero, SinkMBB
6794 // BreakMBB:
6795 // break 7 // BRK_DIVZERO
6796 // SinkMBB:
6797 // fallthrough
6798 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6799 MachineFunction::iterator It = ++MBB->getIterator();
6800 MachineFunction *MF = MBB->getParent();
6801 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6802 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6803 MF->insert(It, BreakMBB);
6804 MF->insert(It, SinkMBB);
6805
6806 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6807 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6808 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6809
6810 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6811 DebugLoc DL = MI.getDebugLoc();
6812 MachineOperand &Divisor = MI.getOperand(2);
6813 Register DivisorReg = Divisor.getReg();
6814
6815 // MBB:
6816 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6817 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6818 .addReg(LoongArch::R0)
6819 .addMBB(SinkMBB);
6820 MBB->addSuccessor(BreakMBB);
6821 MBB->addSuccessor(SinkMBB);
6822
6823 // BreakMBB:
6824 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6825 // definition of BRK_DIVZERO.
6826 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6827 BreakMBB->addSuccessor(SinkMBB);
6828
6829 // Clear Divisor's kill flag.
6830 Divisor.setIsKill(false);
6831
6832 return SinkMBB;
6833}
6834
6835static MachineBasicBlock *
6837 const LoongArchSubtarget &Subtarget) {
6838 unsigned CondOpc;
6839 switch (MI.getOpcode()) {
6840 default:
6841 llvm_unreachable("Unexpected opcode");
6842 case LoongArch::PseudoVBZ:
6843 CondOpc = LoongArch::VSETEQZ_V;
6844 break;
6845 case LoongArch::PseudoVBZ_B:
6846 CondOpc = LoongArch::VSETANYEQZ_B;
6847 break;
6848 case LoongArch::PseudoVBZ_H:
6849 CondOpc = LoongArch::VSETANYEQZ_H;
6850 break;
6851 case LoongArch::PseudoVBZ_W:
6852 CondOpc = LoongArch::VSETANYEQZ_W;
6853 break;
6854 case LoongArch::PseudoVBZ_D:
6855 CondOpc = LoongArch::VSETANYEQZ_D;
6856 break;
6857 case LoongArch::PseudoVBNZ:
6858 CondOpc = LoongArch::VSETNEZ_V;
6859 break;
6860 case LoongArch::PseudoVBNZ_B:
6861 CondOpc = LoongArch::VSETALLNEZ_B;
6862 break;
6863 case LoongArch::PseudoVBNZ_H:
6864 CondOpc = LoongArch::VSETALLNEZ_H;
6865 break;
6866 case LoongArch::PseudoVBNZ_W:
6867 CondOpc = LoongArch::VSETALLNEZ_W;
6868 break;
6869 case LoongArch::PseudoVBNZ_D:
6870 CondOpc = LoongArch::VSETALLNEZ_D;
6871 break;
6872 case LoongArch::PseudoXVBZ:
6873 CondOpc = LoongArch::XVSETEQZ_V;
6874 break;
6875 case LoongArch::PseudoXVBZ_B:
6876 CondOpc = LoongArch::XVSETANYEQZ_B;
6877 break;
6878 case LoongArch::PseudoXVBZ_H:
6879 CondOpc = LoongArch::XVSETANYEQZ_H;
6880 break;
6881 case LoongArch::PseudoXVBZ_W:
6882 CondOpc = LoongArch::XVSETANYEQZ_W;
6883 break;
6884 case LoongArch::PseudoXVBZ_D:
6885 CondOpc = LoongArch::XVSETANYEQZ_D;
6886 break;
6887 case LoongArch::PseudoXVBNZ:
6888 CondOpc = LoongArch::XVSETNEZ_V;
6889 break;
6890 case LoongArch::PseudoXVBNZ_B:
6891 CondOpc = LoongArch::XVSETALLNEZ_B;
6892 break;
6893 case LoongArch::PseudoXVBNZ_H:
6894 CondOpc = LoongArch::XVSETALLNEZ_H;
6895 break;
6896 case LoongArch::PseudoXVBNZ_W:
6897 CondOpc = LoongArch::XVSETALLNEZ_W;
6898 break;
6899 case LoongArch::PseudoXVBNZ_D:
6900 CondOpc = LoongArch::XVSETALLNEZ_D;
6901 break;
6902 }
6903
6904 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6905 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6906 DebugLoc DL = MI.getDebugLoc();
6909
6910 MachineFunction *F = BB->getParent();
6911 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6912 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6913 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6914
6915 F->insert(It, FalseBB);
6916 F->insert(It, TrueBB);
6917 F->insert(It, SinkBB);
6918
6919 // Transfer the remainder of MBB and its successor edges to Sink.
6920 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6922
6923 // Insert the real instruction to BB.
6924 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6925 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6926
6927 // Insert branch.
6928 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6929 BB->addSuccessor(FalseBB);
6930 BB->addSuccessor(TrueBB);
6931
6932 // FalseBB.
6933 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6934 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6935 .addReg(LoongArch::R0)
6936 .addImm(0);
6937 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6938 FalseBB->addSuccessor(SinkBB);
6939
6940 // TrueBB.
6941 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6942 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6943 .addReg(LoongArch::R0)
6944 .addImm(1);
6945 TrueBB->addSuccessor(SinkBB);
6946
6947 // SinkBB: merge the results.
6948 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6949 MI.getOperand(0).getReg())
6950 .addReg(RD1)
6951 .addMBB(FalseBB)
6952 .addReg(RD2)
6953 .addMBB(TrueBB);
6954
6955 // The pseudo instruction is gone now.
6956 MI.eraseFromParent();
6957 return SinkBB;
6958}
6959
6960static MachineBasicBlock *
6962 const LoongArchSubtarget &Subtarget) {
6963 unsigned InsOp;
6964 unsigned BroadcastOp;
6965 unsigned HalfSize;
6966 switch (MI.getOpcode()) {
6967 default:
6968 llvm_unreachable("Unexpected opcode");
6969 case LoongArch::PseudoXVINSGR2VR_B:
6970 HalfSize = 16;
6971 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6972 InsOp = LoongArch::XVEXTRINS_B;
6973 break;
6974 case LoongArch::PseudoXVINSGR2VR_H:
6975 HalfSize = 8;
6976 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6977 InsOp = LoongArch::XVEXTRINS_H;
6978 break;
6979 }
6980 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6981 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6982 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6983 DebugLoc DL = MI.getDebugLoc();
6985 // XDst = vector_insert XSrc, Elt, Idx
6986 Register XDst = MI.getOperand(0).getReg();
6987 Register XSrc = MI.getOperand(1).getReg();
6988 Register Elt = MI.getOperand(2).getReg();
6989 unsigned Idx = MI.getOperand(3).getImm();
6990
6991 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6992 Idx < HalfSize) {
6993 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6994 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6995
6996 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6997 .addReg(XSrc, 0, LoongArch::sub_128);
6998 BuildMI(*BB, MI, DL,
6999 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7000 : LoongArch::VINSGR2VR_B),
7001 ScratchSubReg2)
7002 .addReg(ScratchSubReg1)
7003 .addReg(Elt)
7004 .addImm(Idx);
7005
7006 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7007 .addImm(0)
7008 .addReg(ScratchSubReg2)
7009 .addImm(LoongArch::sub_128);
7010 } else {
7011 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7012 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7013
7014 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7015
7016 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7017 .addReg(ScratchReg1)
7018 .addReg(XSrc)
7019 .addImm(Idx >= HalfSize ? 48 : 18);
7020
7021 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7022 .addReg(XSrc)
7023 .addReg(ScratchReg2)
7024 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7025 }
7026
7027 MI.eraseFromParent();
7028 return BB;
7029}
7030
7033 const LoongArchSubtarget &Subtarget) {
7034 assert(Subtarget.hasExtLSX());
7035 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7036 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7037 DebugLoc DL = MI.getDebugLoc();
7039 Register Dst = MI.getOperand(0).getReg();
7040 Register Src = MI.getOperand(1).getReg();
7041 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7042 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7043 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7044
7045 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7046 BuildMI(*BB, MI, DL,
7047 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7048 : LoongArch::VINSGR2VR_W),
7049 ScratchReg2)
7050 .addReg(ScratchReg1)
7051 .addReg(Src)
7052 .addImm(0);
7053 BuildMI(
7054 *BB, MI, DL,
7055 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7056 ScratchReg3)
7057 .addReg(ScratchReg2);
7058 BuildMI(*BB, MI, DL,
7059 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7060 : LoongArch::VPICKVE2GR_W),
7061 Dst)
7062 .addReg(ScratchReg3)
7063 .addImm(0);
7064
7065 MI.eraseFromParent();
7066 return BB;
7067}
7068
7069static MachineBasicBlock *
7071 const LoongArchSubtarget &Subtarget) {
7072 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7073 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7074 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7076 Register Dst = MI.getOperand(0).getReg();
7077 Register Src = MI.getOperand(1).getReg();
7078 DebugLoc DL = MI.getDebugLoc();
7079 unsigned EleBits = 8;
7080 unsigned NotOpc = 0;
7081 unsigned MskOpc;
7082
7083 switch (MI.getOpcode()) {
7084 default:
7085 llvm_unreachable("Unexpected opcode");
7086 case LoongArch::PseudoVMSKLTZ_B:
7087 MskOpc = LoongArch::VMSKLTZ_B;
7088 break;
7089 case LoongArch::PseudoVMSKLTZ_H:
7090 MskOpc = LoongArch::VMSKLTZ_H;
7091 EleBits = 16;
7092 break;
7093 case LoongArch::PseudoVMSKLTZ_W:
7094 MskOpc = LoongArch::VMSKLTZ_W;
7095 EleBits = 32;
7096 break;
7097 case LoongArch::PseudoVMSKLTZ_D:
7098 MskOpc = LoongArch::VMSKLTZ_D;
7099 EleBits = 64;
7100 break;
7101 case LoongArch::PseudoVMSKGEZ_B:
7102 MskOpc = LoongArch::VMSKGEZ_B;
7103 break;
7104 case LoongArch::PseudoVMSKEQZ_B:
7105 MskOpc = LoongArch::VMSKNZ_B;
7106 NotOpc = LoongArch::VNOR_V;
7107 break;
7108 case LoongArch::PseudoVMSKNEZ_B:
7109 MskOpc = LoongArch::VMSKNZ_B;
7110 break;
7111 case LoongArch::PseudoXVMSKLTZ_B:
7112 MskOpc = LoongArch::XVMSKLTZ_B;
7113 RC = &LoongArch::LASX256RegClass;
7114 break;
7115 case LoongArch::PseudoXVMSKLTZ_H:
7116 MskOpc = LoongArch::XVMSKLTZ_H;
7117 RC = &LoongArch::LASX256RegClass;
7118 EleBits = 16;
7119 break;
7120 case LoongArch::PseudoXVMSKLTZ_W:
7121 MskOpc = LoongArch::XVMSKLTZ_W;
7122 RC = &LoongArch::LASX256RegClass;
7123 EleBits = 32;
7124 break;
7125 case LoongArch::PseudoXVMSKLTZ_D:
7126 MskOpc = LoongArch::XVMSKLTZ_D;
7127 RC = &LoongArch::LASX256RegClass;
7128 EleBits = 64;
7129 break;
7130 case LoongArch::PseudoXVMSKGEZ_B:
7131 MskOpc = LoongArch::XVMSKGEZ_B;
7132 RC = &LoongArch::LASX256RegClass;
7133 break;
7134 case LoongArch::PseudoXVMSKEQZ_B:
7135 MskOpc = LoongArch::XVMSKNZ_B;
7136 NotOpc = LoongArch::XVNOR_V;
7137 RC = &LoongArch::LASX256RegClass;
7138 break;
7139 case LoongArch::PseudoXVMSKNEZ_B:
7140 MskOpc = LoongArch::XVMSKNZ_B;
7141 RC = &LoongArch::LASX256RegClass;
7142 break;
7143 }
7144
7145 Register Msk = MRI.createVirtualRegister(RC);
7146 if (NotOpc) {
7147 Register Tmp = MRI.createVirtualRegister(RC);
7148 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7149 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7150 .addReg(Tmp, RegState::Kill)
7151 .addReg(Tmp, RegState::Kill);
7152 } else {
7153 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7154 }
7155
7156 if (TRI->getRegSizeInBits(*RC) > 128) {
7157 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7158 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7159 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7160 .addReg(Msk)
7161 .addImm(0);
7162 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7163 .addReg(Msk, RegState::Kill)
7164 .addImm(4);
7165 BuildMI(*BB, MI, DL,
7166 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7167 : LoongArch::BSTRINS_W),
7168 Dst)
7171 .addImm(256 / EleBits - 1)
7172 .addImm(128 / EleBits);
7173 } else {
7174 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7175 .addReg(Msk, RegState::Kill)
7176 .addImm(0);
7177 }
7178
7179 MI.eraseFromParent();
7180 return BB;
7181}
7182
7183static MachineBasicBlock *
7185 const LoongArchSubtarget &Subtarget) {
7186 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7187 "Unexpected instruction");
7188
7189 MachineFunction &MF = *BB->getParent();
7190 DebugLoc DL = MI.getDebugLoc();
7192 Register LoReg = MI.getOperand(0).getReg();
7193 Register HiReg = MI.getOperand(1).getReg();
7194 Register SrcReg = MI.getOperand(2).getReg();
7195
7196 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7197 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7198 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7199 MI.eraseFromParent(); // The pseudo instruction is gone now.
7200 return BB;
7201}
7202
7203static MachineBasicBlock *
7205 const LoongArchSubtarget &Subtarget) {
7206 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7207 "Unexpected instruction");
7208
7209 MachineFunction &MF = *BB->getParent();
7210 DebugLoc DL = MI.getDebugLoc();
7213 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7214 Register DstReg = MI.getOperand(0).getReg();
7215 Register LoReg = MI.getOperand(1).getReg();
7216 Register HiReg = MI.getOperand(2).getReg();
7217
7218 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7219 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7220 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7221 .addReg(TmpReg, RegState::Kill)
7222 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7223 MI.eraseFromParent(); // The pseudo instruction is gone now.
7224 return BB;
7225}
7226
7228 switch (MI.getOpcode()) {
7229 default:
7230 return false;
7231 case LoongArch::Select_GPR_Using_CC_GPR:
7232 return true;
7233 }
7234}
7235
7236static MachineBasicBlock *
7238 const LoongArchSubtarget &Subtarget) {
7239 // To "insert" Select_* instructions, we actually have to insert the triangle
7240 // control-flow pattern. The incoming instructions know the destination vreg
7241 // to set, the condition code register to branch on, the true/false values to
7242 // select between, and the condcode to use to select the appropriate branch.
7243 //
7244 // We produce the following control flow:
7245 // HeadMBB
7246 // | \
7247 // | IfFalseMBB
7248 // | /
7249 // TailMBB
7250 //
7251 // When we find a sequence of selects we attempt to optimize their emission
7252 // by sharing the control flow. Currently we only handle cases where we have
7253 // multiple selects with the exact same condition (same LHS, RHS and CC).
7254 // The selects may be interleaved with other instructions if the other
7255 // instructions meet some requirements we deem safe:
7256 // - They are not pseudo instructions.
7257 // - They are debug instructions. Otherwise,
7258 // - They do not have side-effects, do not access memory and their inputs do
7259 // not depend on the results of the select pseudo-instructions.
7260 // The TrueV/FalseV operands of the selects cannot depend on the result of
7261 // previous selects in the sequence.
7262 // These conditions could be further relaxed. See the X86 target for a
7263 // related approach and more information.
7264
7265 Register LHS = MI.getOperand(1).getReg();
7266 Register RHS;
7267 if (MI.getOperand(2).isReg())
7268 RHS = MI.getOperand(2).getReg();
7269 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7270
7271 SmallVector<MachineInstr *, 4> SelectDebugValues;
7272 SmallSet<Register, 4> SelectDests;
7273 SelectDests.insert(MI.getOperand(0).getReg());
7274
7275 MachineInstr *LastSelectPseudo = &MI;
7276 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7277 SequenceMBBI != E; ++SequenceMBBI) {
7278 if (SequenceMBBI->isDebugInstr())
7279 continue;
7280 if (isSelectPseudo(*SequenceMBBI)) {
7281 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7282 !SequenceMBBI->getOperand(2).isReg() ||
7283 SequenceMBBI->getOperand(2).getReg() != RHS ||
7284 SequenceMBBI->getOperand(3).getImm() != CC ||
7285 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7286 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7287 break;
7288 LastSelectPseudo = &*SequenceMBBI;
7289 SequenceMBBI->collectDebugValues(SelectDebugValues);
7290 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7291 continue;
7292 }
7293 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7294 SequenceMBBI->mayLoadOrStore() ||
7295 SequenceMBBI->usesCustomInsertionHook())
7296 break;
7297 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7298 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7299 }))
7300 break;
7301 }
7302
7303 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7304 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7305 DebugLoc DL = MI.getDebugLoc();
7307
7308 MachineBasicBlock *HeadMBB = BB;
7309 MachineFunction *F = BB->getParent();
7310 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7311 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7312
7313 F->insert(I, IfFalseMBB);
7314 F->insert(I, TailMBB);
7315
7316 // Set the call frame size on entry to the new basic blocks.
7317 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7318 IfFalseMBB->setCallFrameSize(CallFrameSize);
7319 TailMBB->setCallFrameSize(CallFrameSize);
7320
7321 // Transfer debug instructions associated with the selects to TailMBB.
7322 for (MachineInstr *DebugInstr : SelectDebugValues) {
7323 TailMBB->push_back(DebugInstr->removeFromParent());
7324 }
7325
7326 // Move all instructions after the sequence to TailMBB.
7327 TailMBB->splice(TailMBB->end(), HeadMBB,
7328 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7329 // Update machine-CFG edges by transferring all successors of the current
7330 // block to the new block which will contain the Phi nodes for the selects.
7331 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7332 // Set the successors for HeadMBB.
7333 HeadMBB->addSuccessor(IfFalseMBB);
7334 HeadMBB->addSuccessor(TailMBB);
7335
7336 // Insert appropriate branch.
7337 if (MI.getOperand(2).isImm())
7338 BuildMI(HeadMBB, DL, TII.get(CC))
7339 .addReg(LHS)
7340 .addImm(MI.getOperand(2).getImm())
7341 .addMBB(TailMBB);
7342 else
7343 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7344
7345 // IfFalseMBB just falls through to TailMBB.
7346 IfFalseMBB->addSuccessor(TailMBB);
7347
7348 // Create PHIs for all of the select pseudo-instructions.
7349 auto SelectMBBI = MI.getIterator();
7350 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7351 auto InsertionPoint = TailMBB->begin();
7352 while (SelectMBBI != SelectEnd) {
7353 auto Next = std::next(SelectMBBI);
7354 if (isSelectPseudo(*SelectMBBI)) {
7355 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7356 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7357 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7358 .addReg(SelectMBBI->getOperand(4).getReg())
7359 .addMBB(HeadMBB)
7360 .addReg(SelectMBBI->getOperand(5).getReg())
7361 .addMBB(IfFalseMBB);
7362 SelectMBBI->eraseFromParent();
7363 }
7364 SelectMBBI = Next;
7365 }
7366
7367 F->getProperties().resetNoPHIs();
7368 return TailMBB;
7369}
7370
7371MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7372 MachineInstr &MI, MachineBasicBlock *BB) const {
7373 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7374 DebugLoc DL = MI.getDebugLoc();
7375
7376 switch (MI.getOpcode()) {
7377 default:
7378 llvm_unreachable("Unexpected instr type to insert");
7379 case LoongArch::DIV_W:
7380 case LoongArch::DIV_WU:
7381 case LoongArch::MOD_W:
7382 case LoongArch::MOD_WU:
7383 case LoongArch::DIV_D:
7384 case LoongArch::DIV_DU:
7385 case LoongArch::MOD_D:
7386 case LoongArch::MOD_DU:
7387 return insertDivByZeroTrap(MI, BB);
7388 break;
7389 case LoongArch::WRFCSR: {
7390 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7391 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7392 .addReg(MI.getOperand(1).getReg());
7393 MI.eraseFromParent();
7394 return BB;
7395 }
7396 case LoongArch::RDFCSR: {
7397 MachineInstr *ReadFCSR =
7398 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7399 MI.getOperand(0).getReg())
7400 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7401 ReadFCSR->getOperand(1).setIsUndef();
7402 MI.eraseFromParent();
7403 return BB;
7404 }
7405 case LoongArch::Select_GPR_Using_CC_GPR:
7406 return emitSelectPseudo(MI, BB, Subtarget);
7407 case LoongArch::BuildPairF64Pseudo:
7408 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7409 case LoongArch::SplitPairF64Pseudo:
7410 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7411 case LoongArch::PseudoVBZ:
7412 case LoongArch::PseudoVBZ_B:
7413 case LoongArch::PseudoVBZ_H:
7414 case LoongArch::PseudoVBZ_W:
7415 case LoongArch::PseudoVBZ_D:
7416 case LoongArch::PseudoVBNZ:
7417 case LoongArch::PseudoVBNZ_B:
7418 case LoongArch::PseudoVBNZ_H:
7419 case LoongArch::PseudoVBNZ_W:
7420 case LoongArch::PseudoVBNZ_D:
7421 case LoongArch::PseudoXVBZ:
7422 case LoongArch::PseudoXVBZ_B:
7423 case LoongArch::PseudoXVBZ_H:
7424 case LoongArch::PseudoXVBZ_W:
7425 case LoongArch::PseudoXVBZ_D:
7426 case LoongArch::PseudoXVBNZ:
7427 case LoongArch::PseudoXVBNZ_B:
7428 case LoongArch::PseudoXVBNZ_H:
7429 case LoongArch::PseudoXVBNZ_W:
7430 case LoongArch::PseudoXVBNZ_D:
7431 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7432 case LoongArch::PseudoXVINSGR2VR_B:
7433 case LoongArch::PseudoXVINSGR2VR_H:
7434 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7435 case LoongArch::PseudoCTPOP:
7436 return emitPseudoCTPOP(MI, BB, Subtarget);
7437 case LoongArch::PseudoVMSKLTZ_B:
7438 case LoongArch::PseudoVMSKLTZ_H:
7439 case LoongArch::PseudoVMSKLTZ_W:
7440 case LoongArch::PseudoVMSKLTZ_D:
7441 case LoongArch::PseudoVMSKGEZ_B:
7442 case LoongArch::PseudoVMSKEQZ_B:
7443 case LoongArch::PseudoVMSKNEZ_B:
7444 case LoongArch::PseudoXVMSKLTZ_B:
7445 case LoongArch::PseudoXVMSKLTZ_H:
7446 case LoongArch::PseudoXVMSKLTZ_W:
7447 case LoongArch::PseudoXVMSKLTZ_D:
7448 case LoongArch::PseudoXVMSKGEZ_B:
7449 case LoongArch::PseudoXVMSKEQZ_B:
7450 case LoongArch::PseudoXVMSKNEZ_B:
7451 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7452 case TargetOpcode::STATEPOINT:
7453 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7454 // while bl call instruction (where statepoint will be lowered at the
7455 // end) has implicit def. This def is early-clobber as it will be set at
7456 // the moment of the call and earlier than any use is read.
7457 // Add this implicit dead def here as a workaround.
7458 MI.addOperand(*MI.getMF(),
7460 LoongArch::R1, /*isDef*/ true,
7461 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7462 /*isUndef*/ false, /*isEarlyClobber*/ true));
7463 if (!Subtarget.is64Bit())
7464 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7465 return emitPatchPoint(MI, BB);
7466 }
7467}
7468
7470 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7471 unsigned *Fast) const {
7472 if (!Subtarget.hasUAL())
7473 return false;
7474
7475 // TODO: set reasonable speed number.
7476 if (Fast)
7477 *Fast = 1;
7478 return true;
7479}
7480
7481const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7482 switch ((LoongArchISD::NodeType)Opcode) {
7484 break;
7485
7486#define NODE_NAME_CASE(node) \
7487 case LoongArchISD::node: \
7488 return "LoongArchISD::" #node;
7489
7490 // TODO: Add more target-dependent nodes later.
7491 NODE_NAME_CASE(CALL)
7492 NODE_NAME_CASE(CALL_MEDIUM)
7493 NODE_NAME_CASE(CALL_LARGE)
7494 NODE_NAME_CASE(RET)
7495 NODE_NAME_CASE(TAIL)
7496 NODE_NAME_CASE(TAIL_MEDIUM)
7497 NODE_NAME_CASE(TAIL_LARGE)
7498 NODE_NAME_CASE(SELECT_CC)
7499 NODE_NAME_CASE(BR_CC)
7500 NODE_NAME_CASE(BRCOND)
7501 NODE_NAME_CASE(SLL_W)
7502 NODE_NAME_CASE(SRA_W)
7503 NODE_NAME_CASE(SRL_W)
7504 NODE_NAME_CASE(BSTRINS)
7505 NODE_NAME_CASE(BSTRPICK)
7506 NODE_NAME_CASE(MOVGR2FR_W)
7507 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7508 NODE_NAME_CASE(MOVGR2FR_D)
7509 NODE_NAME_CASE(MOVGR2FR_D_LO_HI)
7510 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7511 NODE_NAME_CASE(FTINT)
7512 NODE_NAME_CASE(BUILD_PAIR_F64)
7513 NODE_NAME_CASE(SPLIT_PAIR_F64)
7514 NODE_NAME_CASE(REVB_2H)
7515 NODE_NAME_CASE(REVB_2W)
7516 NODE_NAME_CASE(BITREV_4B)
7517 NODE_NAME_CASE(BITREV_8B)
7518 NODE_NAME_CASE(BITREV_W)
7519 NODE_NAME_CASE(ROTR_W)
7520 NODE_NAME_CASE(ROTL_W)
7521 NODE_NAME_CASE(DIV_W)
7522 NODE_NAME_CASE(DIV_WU)
7523 NODE_NAME_CASE(MOD_W)
7524 NODE_NAME_CASE(MOD_WU)
7525 NODE_NAME_CASE(CLZ_W)
7526 NODE_NAME_CASE(CTZ_W)
7527 NODE_NAME_CASE(DBAR)
7528 NODE_NAME_CASE(IBAR)
7529 NODE_NAME_CASE(BREAK)
7530 NODE_NAME_CASE(SYSCALL)
7531 NODE_NAME_CASE(CRC_W_B_W)
7532 NODE_NAME_CASE(CRC_W_H_W)
7533 NODE_NAME_CASE(CRC_W_W_W)
7534 NODE_NAME_CASE(CRC_W_D_W)
7535 NODE_NAME_CASE(CRCC_W_B_W)
7536 NODE_NAME_CASE(CRCC_W_H_W)
7537 NODE_NAME_CASE(CRCC_W_W_W)
7538 NODE_NAME_CASE(CRCC_W_D_W)
7539 NODE_NAME_CASE(CSRRD)
7540 NODE_NAME_CASE(CSRWR)
7541 NODE_NAME_CASE(CSRXCHG)
7542 NODE_NAME_CASE(IOCSRRD_B)
7543 NODE_NAME_CASE(IOCSRRD_H)
7544 NODE_NAME_CASE(IOCSRRD_W)
7545 NODE_NAME_CASE(IOCSRRD_D)
7546 NODE_NAME_CASE(IOCSRWR_B)
7547 NODE_NAME_CASE(IOCSRWR_H)
7548 NODE_NAME_CASE(IOCSRWR_W)
7549 NODE_NAME_CASE(IOCSRWR_D)
7550 NODE_NAME_CASE(CPUCFG)
7551 NODE_NAME_CASE(MOVGR2FCSR)
7552 NODE_NAME_CASE(MOVFCSR2GR)
7553 NODE_NAME_CASE(CACOP_D)
7554 NODE_NAME_CASE(CACOP_W)
7555 NODE_NAME_CASE(VSHUF)
7556 NODE_NAME_CASE(VPICKEV)
7557 NODE_NAME_CASE(VPICKOD)
7558 NODE_NAME_CASE(VPACKEV)
7559 NODE_NAME_CASE(VPACKOD)
7560 NODE_NAME_CASE(VILVL)
7561 NODE_NAME_CASE(VILVH)
7562 NODE_NAME_CASE(VSHUF4I)
7563 NODE_NAME_CASE(VREPLVEI)
7564 NODE_NAME_CASE(VREPLGR2VR)
7565 NODE_NAME_CASE(XVPERMI)
7566 NODE_NAME_CASE(XVPERM)
7567 NODE_NAME_CASE(XVREPLVE0)
7568 NODE_NAME_CASE(XVREPLVE0Q)
7569 NODE_NAME_CASE(XVINSVE0)
7570 NODE_NAME_CASE(VPICK_SEXT_ELT)
7571 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7572 NODE_NAME_CASE(VREPLVE)
7573 NODE_NAME_CASE(VALL_ZERO)
7574 NODE_NAME_CASE(VANY_ZERO)
7575 NODE_NAME_CASE(VALL_NONZERO)
7576 NODE_NAME_CASE(VANY_NONZERO)
7577 NODE_NAME_CASE(FRECIPE)
7578 NODE_NAME_CASE(FRSQRTE)
7579 NODE_NAME_CASE(VSLLI)
7580 NODE_NAME_CASE(VSRLI)
7581 NODE_NAME_CASE(VBSLL)
7582 NODE_NAME_CASE(VBSRL)
7583 NODE_NAME_CASE(VLDREPL)
7584 NODE_NAME_CASE(VMSKLTZ)
7585 NODE_NAME_CASE(VMSKGEZ)
7586 NODE_NAME_CASE(VMSKEQZ)
7587 NODE_NAME_CASE(VMSKNEZ)
7588 NODE_NAME_CASE(XVMSKLTZ)
7589 NODE_NAME_CASE(XVMSKGEZ)
7590 NODE_NAME_CASE(XVMSKEQZ)
7591 NODE_NAME_CASE(XVMSKNEZ)
7592 NODE_NAME_CASE(VHADDW)
7593 }
7594#undef NODE_NAME_CASE
7595 return nullptr;
7596}
7597
7598//===----------------------------------------------------------------------===//
7599// Calling Convention Implementation
7600//===----------------------------------------------------------------------===//
7601
7602// Eight general-purpose registers a0-a7 used for passing integer arguments,
7603// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7604// fixed-point arguments, and floating-point arguments when no FPR is available
7605// or with soft float ABI.
7606const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7607 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7608 LoongArch::R10, LoongArch::R11};
7609// Eight floating-point registers fa0-fa7 used for passing floating-point
7610// arguments, and fa0-fa1 are also used to return values.
7611const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7612 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7613 LoongArch::F6, LoongArch::F7};
7614// FPR32 and FPR64 alias each other.
7616 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7617 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7618
7619const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7620 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7621 LoongArch::VR6, LoongArch::VR7};
7622
7623const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7624 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7625 LoongArch::XR6, LoongArch::XR7};
7626
7627// Pass a 2*GRLen argument that has been split into two GRLen values through
7628// registers or the stack as necessary.
7629static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7630 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7631 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7632 ISD::ArgFlagsTy ArgFlags2) {
7633 unsigned GRLenInBytes = GRLen / 8;
7634 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7635 // At least one half can be passed via register.
7636 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7637 VA1.getLocVT(), CCValAssign::Full));
7638 } else {
7639 // Both halves must be passed on the stack, with proper alignment.
7640 Align StackAlign =
7641 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7642 State.addLoc(
7644 State.AllocateStack(GRLenInBytes, StackAlign),
7645 VA1.getLocVT(), CCValAssign::Full));
7646 State.addLoc(CCValAssign::getMem(
7647 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7648 LocVT2, CCValAssign::Full));
7649 return false;
7650 }
7651 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7652 // The second half can also be passed via register.
7653 State.addLoc(
7654 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7655 } else {
7656 // The second half is passed via the stack, without additional alignment.
7657 State.addLoc(CCValAssign::getMem(
7658 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7659 LocVT2, CCValAssign::Full));
7660 }
7661 return false;
7662}
7663
7664// Implements the LoongArch calling convention. Returns true upon failure.
7666 unsigned ValNo, MVT ValVT,
7667 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7668 CCState &State, bool IsRet, Type *OrigTy) {
7669 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7670 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7671 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7672 MVT LocVT = ValVT;
7673
7674 // Any return value split into more than two values can't be returned
7675 // directly.
7676 if (IsRet && ValNo > 1)
7677 return true;
7678
7679 // If passing a variadic argument, or if no FPR is available.
7680 bool UseGPRForFloat = true;
7681
7682 switch (ABI) {
7683 default:
7684 llvm_unreachable("Unexpected ABI");
7685 break;
7690 UseGPRForFloat = ArgFlags.isVarArg();
7691 break;
7694 break;
7695 }
7696
7697 // If this is a variadic argument, the LoongArch calling convention requires
7698 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7699 // byte alignment. An aligned register should be used regardless of whether
7700 // the original argument was split during legalisation or not. The argument
7701 // will not be passed by registers if the original type is larger than
7702 // 2*GRLen, so the register alignment rule does not apply.
7703 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7704 if (ArgFlags.isVarArg() &&
7705 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7706 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7707 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7708 // Skip 'odd' register if necessary.
7709 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7710 State.AllocateReg(ArgGPRs);
7711 }
7712
7713 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7714 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7715 State.getPendingArgFlags();
7716
7717 assert(PendingLocs.size() == PendingArgFlags.size() &&
7718 "PendingLocs and PendingArgFlags out of sync");
7719
7720 // FPR32 and FPR64 alias each other.
7721 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7722 UseGPRForFloat = true;
7723
7724 if (UseGPRForFloat && ValVT == MVT::f32) {
7725 LocVT = GRLenVT;
7726 LocInfo = CCValAssign::BCvt;
7727 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7728 LocVT = MVT::i64;
7729 LocInfo = CCValAssign::BCvt;
7730 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7731 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7732 // registers are exhausted.
7733 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7734 // Depending on available argument GPRS, f64 may be passed in a pair of
7735 // GPRs, split between a GPR and the stack, or passed completely on the
7736 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7737 // cases.
7738 MCRegister Reg = State.AllocateReg(ArgGPRs);
7739 if (!Reg) {
7740 int64_t StackOffset = State.AllocateStack(8, Align(8));
7741 State.addLoc(
7742 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7743 return false;
7744 }
7745 LocVT = MVT::i32;
7746 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7747 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7748 if (HiReg) {
7749 State.addLoc(
7750 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7751 } else {
7752 int64_t StackOffset = State.AllocateStack(4, Align(4));
7753 State.addLoc(
7754 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7755 }
7756 return false;
7757 }
7758
7759 // Split arguments might be passed indirectly, so keep track of the pending
7760 // values.
7761 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7762 LocVT = GRLenVT;
7763 LocInfo = CCValAssign::Indirect;
7764 PendingLocs.push_back(
7765 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7766 PendingArgFlags.push_back(ArgFlags);
7767 if (!ArgFlags.isSplitEnd()) {
7768 return false;
7769 }
7770 }
7771
7772 // If the split argument only had two elements, it should be passed directly
7773 // in registers or on the stack.
7774 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7775 PendingLocs.size() <= 2) {
7776 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7777 // Apply the normal calling convention rules to the first half of the
7778 // split argument.
7779 CCValAssign VA = PendingLocs[0];
7780 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7781 PendingLocs.clear();
7782 PendingArgFlags.clear();
7783 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7784 ArgFlags);
7785 }
7786
7787 // Allocate to a register if possible, or else a stack slot.
7788 Register Reg;
7789 unsigned StoreSizeBytes = GRLen / 8;
7790 Align StackAlign = Align(GRLen / 8);
7791
7792 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7793 Reg = State.AllocateReg(ArgFPR32s);
7794 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7795 Reg = State.AllocateReg(ArgFPR64s);
7796 } else if (ValVT.is128BitVector()) {
7797 Reg = State.AllocateReg(ArgVRs);
7798 UseGPRForFloat = false;
7799 StoreSizeBytes = 16;
7800 StackAlign = Align(16);
7801 } else if (ValVT.is256BitVector()) {
7802 Reg = State.AllocateReg(ArgXRs);
7803 UseGPRForFloat = false;
7804 StoreSizeBytes = 32;
7805 StackAlign = Align(32);
7806 } else {
7807 Reg = State.AllocateReg(ArgGPRs);
7808 }
7809
7810 unsigned StackOffset =
7811 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7812
7813 // If we reach this point and PendingLocs is non-empty, we must be at the
7814 // end of a split argument that must be passed indirectly.
7815 if (!PendingLocs.empty()) {
7816 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7817 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7818 for (auto &It : PendingLocs) {
7819 if (Reg)
7820 It.convertToReg(Reg);
7821 else
7822 It.convertToMem(StackOffset);
7823 State.addLoc(It);
7824 }
7825 PendingLocs.clear();
7826 PendingArgFlags.clear();
7827 return false;
7828 }
7829 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7830 "Expected an GRLenVT at this stage");
7831
7832 if (Reg) {
7833 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7834 return false;
7835 }
7836
7837 // When a floating-point value is passed on the stack, no bit-cast is needed.
7838 if (ValVT.isFloatingPoint()) {
7839 LocVT = ValVT;
7840 LocInfo = CCValAssign::Full;
7841 }
7842
7843 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7844 return false;
7845}
7846
7847void LoongArchTargetLowering::analyzeInputArgs(
7848 MachineFunction &MF, CCState &CCInfo,
7849 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7850 LoongArchCCAssignFn Fn) const {
7851 FunctionType *FType = MF.getFunction().getFunctionType();
7852 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7853 MVT ArgVT = Ins[i].VT;
7854 Type *ArgTy = nullptr;
7855 if (IsRet)
7856 ArgTy = FType->getReturnType();
7857 else if (Ins[i].isOrigArg())
7858 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7860 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7861 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7862 CCInfo, IsRet, ArgTy)) {
7863 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7864 << '\n');
7865 llvm_unreachable("");
7866 }
7867 }
7868}
7869
7870void LoongArchTargetLowering::analyzeOutputArgs(
7871 MachineFunction &MF, CCState &CCInfo,
7872 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7873 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7874 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7875 MVT ArgVT = Outs[i].VT;
7876 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7878 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7879 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7880 CCInfo, IsRet, OrigTy)) {
7881 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7882 << "\n");
7883 llvm_unreachable("");
7884 }
7885 }
7886}
7887
7888// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7889// values.
7891 const CCValAssign &VA, const SDLoc &DL) {
7892 switch (VA.getLocInfo()) {
7893 default:
7894 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7895 case CCValAssign::Full:
7897 break;
7898 case CCValAssign::BCvt:
7899 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7900 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7901 else
7902 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7903 break;
7904 }
7905 return Val;
7906}
7907
7909 const CCValAssign &VA, const SDLoc &DL,
7910 const ISD::InputArg &In,
7911 const LoongArchTargetLowering &TLI) {
7914 EVT LocVT = VA.getLocVT();
7915 SDValue Val;
7916 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7917 Register VReg = RegInfo.createVirtualRegister(RC);
7918 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7919 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7920
7921 // If input is sign extended from 32 bits, note it for the OptW pass.
7922 if (In.isOrigArg()) {
7923 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7924 if (OrigArg->getType()->isIntegerTy()) {
7925 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7926 // An input zero extended from i31 can also be considered sign extended.
7927 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7928 (BitWidth < 32 && In.Flags.isZExt())) {
7931 LAFI->addSExt32Register(VReg);
7932 }
7933 }
7934 }
7935
7936 return convertLocVTToValVT(DAG, Val, VA, DL);
7937}
7938
7939// The caller is responsible for loading the full value if the argument is
7940// passed with CCValAssign::Indirect.
7942 const CCValAssign &VA, const SDLoc &DL) {
7944 MachineFrameInfo &MFI = MF.getFrameInfo();
7945 EVT ValVT = VA.getValVT();
7946 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7947 /*IsImmutable=*/true);
7948 SDValue FIN = DAG.getFrameIndex(
7950
7951 ISD::LoadExtType ExtType;
7952 switch (VA.getLocInfo()) {
7953 default:
7954 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7955 case CCValAssign::Full:
7957 case CCValAssign::BCvt:
7958 ExtType = ISD::NON_EXTLOAD;
7959 break;
7960 }
7961 return DAG.getExtLoad(
7962 ExtType, DL, VA.getLocVT(), Chain, FIN,
7964}
7965
7967 const CCValAssign &VA,
7968 const CCValAssign &HiVA,
7969 const SDLoc &DL) {
7970 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7971 "Unexpected VA");
7973 MachineFrameInfo &MFI = MF.getFrameInfo();
7975
7976 assert(VA.isRegLoc() && "Expected register VA assignment");
7977
7978 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7979 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7980 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7981 SDValue Hi;
7982 if (HiVA.isMemLoc()) {
7983 // Second half of f64 is passed on the stack.
7984 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7985 /*IsImmutable=*/true);
7986 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7987 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7989 } else {
7990 // Second half of f64 is passed in another GPR.
7991 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7992 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7993 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7994 }
7995 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7996}
7997
7999 const CCValAssign &VA, const SDLoc &DL) {
8000 EVT LocVT = VA.getLocVT();
8001
8002 switch (VA.getLocInfo()) {
8003 default:
8004 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8005 case CCValAssign::Full:
8006 break;
8007 case CCValAssign::BCvt:
8008 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8009 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8010 else
8011 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8012 break;
8013 }
8014 return Val;
8015}
8016
8017static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8018 CCValAssign::LocInfo LocInfo,
8019 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8020 CCState &State) {
8021 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8022 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8023 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8024 static const MCPhysReg GPRList[] = {
8025 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8026 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8027 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8028 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8029 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8030 return false;
8031 }
8032 }
8033
8034 if (LocVT == MVT::f32) {
8035 // Pass in STG registers: F1, F2, F3, F4
8036 // fs0,fs1,fs2,fs3
8037 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8038 LoongArch::F26, LoongArch::F27};
8039 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8040 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8041 return false;
8042 }
8043 }
8044
8045 if (LocVT == MVT::f64) {
8046 // Pass in STG registers: D1, D2, D3, D4
8047 // fs4,fs5,fs6,fs7
8048 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8049 LoongArch::F30_64, LoongArch::F31_64};
8050 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8051 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8052 return false;
8053 }
8054 }
8055
8056 report_fatal_error("No registers left in GHC calling convention");
8057 return true;
8058}
8059
8060// Transform physical registers into virtual registers.
8062 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8063 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8064 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8065
8067
8068 switch (CallConv) {
8069 default:
8070 llvm_unreachable("Unsupported calling convention");
8071 case CallingConv::C:
8072 case CallingConv::Fast:
8074 break;
8075 case CallingConv::GHC:
8076 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8077 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8079 "GHC calling convention requires the F and D extensions");
8080 }
8081
8082 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8083 MVT GRLenVT = Subtarget.getGRLenVT();
8084 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8085 // Used with varargs to acumulate store chains.
8086 std::vector<SDValue> OutChains;
8087
8088 // Assign locations to all of the incoming arguments.
8090 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8091
8092 if (CallConv == CallingConv::GHC)
8094 else
8095 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8096
8097 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8098 CCValAssign &VA = ArgLocs[i];
8099 SDValue ArgValue;
8100 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8101 // case.
8102 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8103 assert(VA.needsCustom());
8104 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8105 } else if (VA.isRegLoc())
8106 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8107 else
8108 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8109 if (VA.getLocInfo() == CCValAssign::Indirect) {
8110 // If the original argument was split and passed by reference, we need to
8111 // load all parts of it here (using the same address).
8112 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8114 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8115 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8116 assert(ArgPartOffset == 0);
8117 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8118 CCValAssign &PartVA = ArgLocs[i + 1];
8119 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8120 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8121 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8122 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8124 ++i;
8125 ++InsIdx;
8126 }
8127 continue;
8128 }
8129 InVals.push_back(ArgValue);
8130 }
8131
8132 if (IsVarArg) {
8134 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8135 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8136 MachineFrameInfo &MFI = MF.getFrameInfo();
8137 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8138 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8139
8140 // Offset of the first variable argument from stack pointer, and size of
8141 // the vararg save area. For now, the varargs save area is either zero or
8142 // large enough to hold a0-a7.
8143 int VaArgOffset, VarArgsSaveSize;
8144
8145 // If all registers are allocated, then all varargs must be passed on the
8146 // stack and we don't need to save any argregs.
8147 if (ArgRegs.size() == Idx) {
8148 VaArgOffset = CCInfo.getStackSize();
8149 VarArgsSaveSize = 0;
8150 } else {
8151 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8152 VaArgOffset = -VarArgsSaveSize;
8153 }
8154
8155 // Record the frame index of the first variable argument
8156 // which is a value necessary to VASTART.
8157 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8158 LoongArchFI->setVarArgsFrameIndex(FI);
8159
8160 // If saving an odd number of registers then create an extra stack slot to
8161 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8162 // offsets to even-numbered registered remain 2*GRLen-aligned.
8163 if (Idx % 2) {
8164 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8165 true);
8166 VarArgsSaveSize += GRLenInBytes;
8167 }
8168
8169 // Copy the integer registers that may have been used for passing varargs
8170 // to the vararg save area.
8171 for (unsigned I = Idx; I < ArgRegs.size();
8172 ++I, VaArgOffset += GRLenInBytes) {
8173 const Register Reg = RegInfo.createVirtualRegister(RC);
8174 RegInfo.addLiveIn(ArgRegs[I], Reg);
8175 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8176 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8177 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8178 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8180 cast<StoreSDNode>(Store.getNode())
8181 ->getMemOperand()
8182 ->setValue((Value *)nullptr);
8183 OutChains.push_back(Store);
8184 }
8185 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8186 }
8187
8188 // All stores are grouped in one node to allow the matching between
8189 // the size of Ins and InVals. This only happens for vararg functions.
8190 if (!OutChains.empty()) {
8191 OutChains.push_back(Chain);
8192 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8193 }
8194
8195 return Chain;
8196}
8197
8199 return CI->isTailCall();
8200}
8201
8202// Check if the return value is used as only a return value, as otherwise
8203// we can't perform a tail-call.
8205 SDValue &Chain) const {
8206 if (N->getNumValues() != 1)
8207 return false;
8208 if (!N->hasNUsesOfValue(1, 0))
8209 return false;
8210
8211 SDNode *Copy = *N->user_begin();
8212 if (Copy->getOpcode() != ISD::CopyToReg)
8213 return false;
8214
8215 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8216 // isn't safe to perform a tail call.
8217 if (Copy->getGluedNode())
8218 return false;
8219
8220 // The copy must be used by a LoongArchISD::RET, and nothing else.
8221 bool HasRet = false;
8222 for (SDNode *Node : Copy->users()) {
8223 if (Node->getOpcode() != LoongArchISD::RET)
8224 return false;
8225 HasRet = true;
8226 }
8227
8228 if (!HasRet)
8229 return false;
8230
8231 Chain = Copy->getOperand(0);
8232 return true;
8233}
8234
8235// Check whether the call is eligible for tail call optimization.
8236bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8237 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8238 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8239
8240 auto CalleeCC = CLI.CallConv;
8241 auto &Outs = CLI.Outs;
8242 auto &Caller = MF.getFunction();
8243 auto CallerCC = Caller.getCallingConv();
8244
8245 // Do not tail call opt if the stack is used to pass parameters.
8246 if (CCInfo.getStackSize() != 0)
8247 return false;
8248
8249 // Do not tail call opt if any parameters need to be passed indirectly.
8250 for (auto &VA : ArgLocs)
8251 if (VA.getLocInfo() == CCValAssign::Indirect)
8252 return false;
8253
8254 // Do not tail call opt if either caller or callee uses struct return
8255 // semantics.
8256 auto IsCallerStructRet = Caller.hasStructRetAttr();
8257 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8258 if (IsCallerStructRet || IsCalleeStructRet)
8259 return false;
8260
8261 // Do not tail call opt if either the callee or caller has a byval argument.
8262 for (auto &Arg : Outs)
8263 if (Arg.Flags.isByVal())
8264 return false;
8265
8266 // The callee has to preserve all registers the caller needs to preserve.
8267 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8268 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8269 if (CalleeCC != CallerCC) {
8270 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8271 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8272 return false;
8273 }
8274 return true;
8275}
8276
8278 return DAG.getDataLayout().getPrefTypeAlign(
8279 VT.getTypeForEVT(*DAG.getContext()));
8280}
8281
8282// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8283// and output parameter nodes.
8284SDValue
8286 SmallVectorImpl<SDValue> &InVals) const {
8287 SelectionDAG &DAG = CLI.DAG;
8288 SDLoc &DL = CLI.DL;
8290 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8292 SDValue Chain = CLI.Chain;
8293 SDValue Callee = CLI.Callee;
8294 CallingConv::ID CallConv = CLI.CallConv;
8295 bool IsVarArg = CLI.IsVarArg;
8296 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8297 MVT GRLenVT = Subtarget.getGRLenVT();
8298 bool &IsTailCall = CLI.IsTailCall;
8299
8301
8302 // Analyze the operands of the call, assigning locations to each operand.
8304 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8305
8306 if (CallConv == CallingConv::GHC)
8307 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8308 else
8309 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8310
8311 // Check if it's really possible to do a tail call.
8312 if (IsTailCall)
8313 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8314
8315 if (IsTailCall)
8316 ++NumTailCalls;
8317 else if (CLI.CB && CLI.CB->isMustTailCall())
8318 report_fatal_error("failed to perform tail call elimination on a call "
8319 "site marked musttail");
8320
8321 // Get a count of how many bytes are to be pushed on the stack.
8322 unsigned NumBytes = ArgCCInfo.getStackSize();
8323
8324 // Create local copies for byval args.
8325 SmallVector<SDValue> ByValArgs;
8326 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8327 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8328 if (!Flags.isByVal())
8329 continue;
8330
8331 SDValue Arg = OutVals[i];
8332 unsigned Size = Flags.getByValSize();
8333 Align Alignment = Flags.getNonZeroByValAlign();
8334
8335 int FI =
8336 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8337 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8338 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8339
8340 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8341 /*IsVolatile=*/false,
8342 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8344 ByValArgs.push_back(FIPtr);
8345 }
8346
8347 if (!IsTailCall)
8348 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8349
8350 // Copy argument values to their designated locations.
8352 SmallVector<SDValue> MemOpChains;
8353 SDValue StackPtr;
8354 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8355 ++i, ++OutIdx) {
8356 CCValAssign &VA = ArgLocs[i];
8357 SDValue ArgValue = OutVals[OutIdx];
8358 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8359
8360 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8361 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8362 assert(VA.isRegLoc() && "Expected register VA assignment");
8363 assert(VA.needsCustom());
8364 SDValue SplitF64 =
8366 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8367 SDValue Lo = SplitF64.getValue(0);
8368 SDValue Hi = SplitF64.getValue(1);
8369
8370 Register RegLo = VA.getLocReg();
8371 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8372
8373 // Get the CCValAssign for the Hi part.
8374 CCValAssign &HiVA = ArgLocs[++i];
8375
8376 if (HiVA.isMemLoc()) {
8377 // Second half of f64 is passed on the stack.
8378 if (!StackPtr.getNode())
8379 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8381 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8382 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8383 // Emit the store.
8384 MemOpChains.push_back(DAG.getStore(
8385 Chain, DL, Hi, Address,
8387 } else {
8388 // Second half of f64 is passed in another GPR.
8389 Register RegHigh = HiVA.getLocReg();
8390 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8391 }
8392 continue;
8393 }
8394
8395 // Promote the value if needed.
8396 // For now, only handle fully promoted and indirect arguments.
8397 if (VA.getLocInfo() == CCValAssign::Indirect) {
8398 // Store the argument in a stack slot and pass its address.
8399 Align StackAlign =
8400 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8401 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8402 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8403 // If the original argument was split and passed by reference, we need to
8404 // store the required parts of it here (and pass just one address).
8405 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8406 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8407 assert(ArgPartOffset == 0);
8408 // Calculate the total size to store. We don't have access to what we're
8409 // actually storing other than performing the loop and collecting the
8410 // info.
8412 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8413 SDValue PartValue = OutVals[OutIdx + 1];
8414 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8415 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8416 EVT PartVT = PartValue.getValueType();
8417
8418 StoredSize += PartVT.getStoreSize();
8419 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8420 Parts.push_back(std::make_pair(PartValue, Offset));
8421 ++i;
8422 ++OutIdx;
8423 }
8424 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8425 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8426 MemOpChains.push_back(
8427 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8429 for (const auto &Part : Parts) {
8430 SDValue PartValue = Part.first;
8431 SDValue PartOffset = Part.second;
8433 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8434 MemOpChains.push_back(
8435 DAG.getStore(Chain, DL, PartValue, Address,
8437 }
8438 ArgValue = SpillSlot;
8439 } else {
8440 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8441 }
8442
8443 // Use local copy if it is a byval arg.
8444 if (Flags.isByVal())
8445 ArgValue = ByValArgs[j++];
8446
8447 if (VA.isRegLoc()) {
8448 // Queue up the argument copies and emit them at the end.
8449 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8450 } else {
8451 assert(VA.isMemLoc() && "Argument not register or memory");
8452 assert(!IsTailCall && "Tail call not allowed if stack is used "
8453 "for passing parameters");
8454
8455 // Work out the address of the stack slot.
8456 if (!StackPtr.getNode())
8457 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8459 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8461
8462 // Emit the store.
8463 MemOpChains.push_back(
8464 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8465 }
8466 }
8467
8468 // Join the stores, which are independent of one another.
8469 if (!MemOpChains.empty())
8470 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8471
8472 SDValue Glue;
8473
8474 // Build a sequence of copy-to-reg nodes, chained and glued together.
8475 for (auto &Reg : RegsToPass) {
8476 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8477 Glue = Chain.getValue(1);
8478 }
8479
8480 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8481 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8482 // split it and then direct call can be matched by PseudoCALL.
8484 const GlobalValue *GV = S->getGlobal();
8485 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8488 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8489 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8490 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8493 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8494 }
8495
8496 // The first call operand is the chain and the second is the target address.
8498 Ops.push_back(Chain);
8499 Ops.push_back(Callee);
8500
8501 // Add argument registers to the end of the list so that they are
8502 // known live into the call.
8503 for (auto &Reg : RegsToPass)
8504 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8505
8506 if (!IsTailCall) {
8507 // Add a register mask operand representing the call-preserved registers.
8508 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8509 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8510 assert(Mask && "Missing call preserved mask for calling convention");
8511 Ops.push_back(DAG.getRegisterMask(Mask));
8512 }
8513
8514 // Glue the call to the argument copies, if any.
8515 if (Glue.getNode())
8516 Ops.push_back(Glue);
8517
8518 // Emit the call.
8519 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8520 unsigned Op;
8521 switch (DAG.getTarget().getCodeModel()) {
8522 default:
8523 report_fatal_error("Unsupported code model");
8524 case CodeModel::Small:
8525 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8526 break;
8527 case CodeModel::Medium:
8528 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8530 break;
8531 case CodeModel::Large:
8532 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8534 break;
8535 }
8536
8537 if (IsTailCall) {
8539 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8540 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8541 return Ret;
8542 }
8543
8544 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8545 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8546 Glue = Chain.getValue(1);
8547
8548 // Mark the end of the call, which is glued to the call itself.
8549 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8550 Glue = Chain.getValue(1);
8551
8552 // Assign locations to each value returned by this call.
8554 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8555 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8556
8557 // Copy all of the result registers out of their specified physreg.
8558 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8559 auto &VA = RVLocs[i];
8560 // Copy the value out.
8561 SDValue RetValue =
8562 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8563 // Glue the RetValue to the end of the call sequence.
8564 Chain = RetValue.getValue(1);
8565 Glue = RetValue.getValue(2);
8566
8567 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8568 assert(VA.needsCustom());
8569 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8570 MVT::i32, Glue);
8571 Chain = RetValue2.getValue(1);
8572 Glue = RetValue2.getValue(2);
8573 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8574 RetValue, RetValue2);
8575 } else
8576 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8577
8578 InVals.push_back(RetValue);
8579 }
8580
8581 return Chain;
8582}
8583
8585 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8586 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8587 const Type *RetTy) const {
8589 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8590
8591 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8592 LoongArchABI::ABI ABI =
8593 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8594 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8595 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8596 return false;
8597 }
8598 return true;
8599}
8600
8602 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8604 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8605 SelectionDAG &DAG) const {
8606 // Stores the assignment of the return value to a location.
8608
8609 // Info about the registers and stack slot.
8610 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8611 *DAG.getContext());
8612
8613 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8614 nullptr, CC_LoongArch);
8615 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8616 report_fatal_error("GHC functions return void only");
8617 SDValue Glue;
8618 SmallVector<SDValue, 4> RetOps(1, Chain);
8619
8620 // Copy the result values into the output registers.
8621 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8622 SDValue Val = OutVals[OutIdx];
8623 CCValAssign &VA = RVLocs[i];
8624 assert(VA.isRegLoc() && "Can only return in registers!");
8625
8626 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8627 // Handle returning f64 on LA32D with a soft float ABI.
8628 assert(VA.isRegLoc() && "Expected return via registers");
8629 assert(VA.needsCustom());
8631 DAG.getVTList(MVT::i32, MVT::i32), Val);
8632 SDValue Lo = SplitF64.getValue(0);
8633 SDValue Hi = SplitF64.getValue(1);
8634 Register RegLo = VA.getLocReg();
8635 Register RegHi = RVLocs[++i].getLocReg();
8636
8637 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8638 Glue = Chain.getValue(1);
8639 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8640 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8641 Glue = Chain.getValue(1);
8642 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8643 } else {
8644 // Handle a 'normal' return.
8645 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8646 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8647
8648 // Guarantee that all emitted copies are stuck together.
8649 Glue = Chain.getValue(1);
8650 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8651 }
8652 }
8653
8654 RetOps[0] = Chain; // Update chain.
8655
8656 // Add the glue node if we have it.
8657 if (Glue.getNode())
8658 RetOps.push_back(Glue);
8659
8660 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8661}
8662
8663// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8664// Note: The following prefixes are excluded:
8665// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8666// as they can be represented using [x]vrepli.[whb]
8668 const APInt &SplatValue, const unsigned SplatBitSize) const {
8669 uint64_t RequiredImm = 0;
8670 uint64_t V = SplatValue.getZExtValue();
8671 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8672 // 4'b0101
8673 RequiredImm = (0b10101 << 8) | (V >> 8);
8674 return {true, RequiredImm};
8675 } else if (SplatBitSize == 32) {
8676 // 4'b0001
8677 if (!(V & 0xFFFF00FF)) {
8678 RequiredImm = (0b10001 << 8) | (V >> 8);
8679 return {true, RequiredImm};
8680 }
8681 // 4'b0010
8682 if (!(V & 0xFF00FFFF)) {
8683 RequiredImm = (0b10010 << 8) | (V >> 16);
8684 return {true, RequiredImm};
8685 }
8686 // 4'b0011
8687 if (!(V & 0x00FFFFFF)) {
8688 RequiredImm = (0b10011 << 8) | (V >> 24);
8689 return {true, RequiredImm};
8690 }
8691 // 4'b0110
8692 if ((V & 0xFFFF00FF) == 0xFF) {
8693 RequiredImm = (0b10110 << 8) | (V >> 8);
8694 return {true, RequiredImm};
8695 }
8696 // 4'b0111
8697 if ((V & 0xFF00FFFF) == 0xFFFF) {
8698 RequiredImm = (0b10111 << 8) | (V >> 16);
8699 return {true, RequiredImm};
8700 }
8701 // 4'b1010
8702 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8703 RequiredImm =
8704 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8705 return {true, RequiredImm};
8706 }
8707 } else if (SplatBitSize == 64) {
8708 // 4'b1011
8709 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8710 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8711 RequiredImm =
8712 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8713 return {true, RequiredImm};
8714 }
8715 // 4'b1100
8716 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8717 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8718 RequiredImm =
8719 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8720 return {true, RequiredImm};
8721 }
8722 // 4'b1001
8723 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8724 uint8_t res = 0;
8725 for (int i = 0; i < 8; ++i) {
8726 uint8_t byte = x & 0xFF;
8727 if (byte == 0 || byte == 0xFF)
8728 res |= ((byte & 1) << i);
8729 else
8730 return {false, 0};
8731 x >>= 8;
8732 }
8733 return {true, res};
8734 };
8735 auto [IsSame, Suffix] = sameBitsPreByte(V);
8736 if (IsSame) {
8737 RequiredImm = (0b11001 << 8) | Suffix;
8738 return {true, RequiredImm};
8739 }
8740 }
8741 return {false, RequiredImm};
8742}
8743
8745 EVT VT) const {
8746 if (!Subtarget.hasExtLSX())
8747 return false;
8748
8749 if (VT == MVT::f32) {
8750 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8751 return (masked == 0x3e000000 || masked == 0x40000000);
8752 }
8753
8754 if (VT == MVT::f64) {
8755 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8756 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8757 }
8758
8759 return false;
8760}
8761
8762bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8763 bool ForCodeSize) const {
8764 // TODO: Maybe need more checks here after vector extension is supported.
8765 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8766 return false;
8767 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8768 return false;
8769 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8770}
8771
8773 return true;
8774}
8775
8777 return true;
8778}
8779
8780bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8781 const Instruction *I) const {
8782 if (!Subtarget.is64Bit())
8783 return isa<LoadInst>(I) || isa<StoreInst>(I);
8784
8785 if (isa<LoadInst>(I))
8786 return true;
8787
8788 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8789 // require fences beacuse we can use amswap_db.[w/d].
8790 Type *Ty = I->getOperand(0)->getType();
8791 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8792 unsigned Size = Ty->getIntegerBitWidth();
8793 return (Size == 8 || Size == 16);
8794 }
8795
8796 return false;
8797}
8798
8800 LLVMContext &Context,
8801 EVT VT) const {
8802 if (!VT.isVector())
8803 return getPointerTy(DL);
8805}
8806
8808 EVT VT = Y.getValueType();
8809
8810 if (VT.isVector())
8811 return Subtarget.hasExtLSX() && VT.isInteger();
8812
8813 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
8814}
8815
8817 const CallInst &I,
8818 MachineFunction &MF,
8819 unsigned Intrinsic) const {
8820 switch (Intrinsic) {
8821 default:
8822 return false;
8823 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8824 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8825 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8826 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8827 Info.opc = ISD::INTRINSIC_W_CHAIN;
8828 Info.memVT = MVT::i32;
8829 Info.ptrVal = I.getArgOperand(0);
8830 Info.offset = 0;
8831 Info.align = Align(4);
8834 return true;
8835 // TODO: Add more Intrinsics later.
8836 }
8837}
8838
8839// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8840// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8841// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8842// regression, we need to implement it manually.
8845
8847 Op == AtomicRMWInst::And) &&
8848 "Unable to expand");
8849 unsigned MinWordSize = 4;
8850
8851 IRBuilder<> Builder(AI);
8852 LLVMContext &Ctx = Builder.getContext();
8853 const DataLayout &DL = AI->getDataLayout();
8854 Type *ValueType = AI->getType();
8855 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8856
8857 Value *Addr = AI->getPointerOperand();
8858 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8859 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8860
8861 Value *AlignedAddr = Builder.CreateIntrinsic(
8862 Intrinsic::ptrmask, {PtrTy, IntTy},
8863 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8864 "AlignedAddr");
8865
8866 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8867 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8868 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8869 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8870 Value *Mask = Builder.CreateShl(
8871 ConstantInt::get(WordType,
8872 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8873 ShiftAmt, "Mask");
8874 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8875 Value *ValOperand_Shifted =
8876 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8877 ShiftAmt, "ValOperand_Shifted");
8878 Value *NewOperand;
8879 if (Op == AtomicRMWInst::And)
8880 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8881 else
8882 NewOperand = ValOperand_Shifted;
8883
8884 AtomicRMWInst *NewAI =
8885 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8886 AI->getOrdering(), AI->getSyncScopeID());
8887
8888 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8889 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8890 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8891 AI->replaceAllUsesWith(FinalOldResult);
8892 AI->eraseFromParent();
8893}
8894
8897 // TODO: Add more AtomicRMWInst that needs to be extended.
8898
8899 // Since floating-point operation requires a non-trivial set of data
8900 // operations, use CmpXChg to expand.
8901 if (AI->isFloatingPointOperation() ||
8907
8908 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8911 AI->getOperation() == AtomicRMWInst::Sub)) {
8913 }
8914
8915 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8916 if (Subtarget.hasLAMCAS()) {
8917 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8921 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8923 }
8924
8925 if (Size == 8 || Size == 16)
8928}
8929
8930static Intrinsic::ID
8932 AtomicRMWInst::BinOp BinOp) {
8933 if (GRLen == 64) {
8934 switch (BinOp) {
8935 default:
8936 llvm_unreachable("Unexpected AtomicRMW BinOp");
8938 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8939 case AtomicRMWInst::Add:
8940 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8941 case AtomicRMWInst::Sub:
8942 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8944 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8946 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8948 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8949 case AtomicRMWInst::Max:
8950 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8951 case AtomicRMWInst::Min:
8952 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8953 // TODO: support other AtomicRMWInst.
8954 }
8955 }
8956
8957 if (GRLen == 32) {
8958 switch (BinOp) {
8959 default:
8960 llvm_unreachable("Unexpected AtomicRMW BinOp");
8962 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8963 case AtomicRMWInst::Add:
8964 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8965 case AtomicRMWInst::Sub:
8966 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8968 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8970 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8972 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8973 case AtomicRMWInst::Max:
8974 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8975 case AtomicRMWInst::Min:
8976 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8977 // TODO: support other AtomicRMWInst.
8978 }
8979 }
8980
8981 llvm_unreachable("Unexpected GRLen\n");
8982}
8983
8986 AtomicCmpXchgInst *CI) const {
8987
8988 if (Subtarget.hasLAMCAS())
8990
8992 if (Size == 8 || Size == 16)
8995}
8996
8998 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8999 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9000 unsigned GRLen = Subtarget.getGRLen();
9001 AtomicOrdering FailOrd = CI->getFailureOrdering();
9002 Value *FailureOrdering =
9003 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9004 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9005 if (GRLen == 64) {
9006 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9007 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9008 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9009 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9010 }
9011 Type *Tys[] = {AlignedAddr->getType()};
9012 Value *Result = Builder.CreateIntrinsic(
9013 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9014 if (GRLen == 64)
9015 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9016 return Result;
9017}
9018
9020 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9021 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9022 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9023 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9024 // mask, as this produces better code than the LL/SC loop emitted by
9025 // int_loongarch_masked_atomicrmw_xchg.
9026 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9029 if (CVal->isZero())
9030 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9031 Builder.CreateNot(Mask, "Inv_Mask"),
9032 AI->getAlign(), Ord);
9033 if (CVal->isMinusOne())
9034 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9035 AI->getAlign(), Ord);
9036 }
9037
9038 unsigned GRLen = Subtarget.getGRLen();
9039 Value *Ordering =
9040 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9041 Type *Tys[] = {AlignedAddr->getType()};
9043 AI->getModule(),
9045
9046 if (GRLen == 64) {
9047 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9048 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9049 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9050 }
9051
9052 Value *Result;
9053
9054 // Must pass the shift amount needed to sign extend the loaded value prior
9055 // to performing a signed comparison for min/max. ShiftAmt is the number of
9056 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9057 // is the number of bits to left+right shift the value in order to
9058 // sign-extend.
9059 if (AI->getOperation() == AtomicRMWInst::Min ||
9061 const DataLayout &DL = AI->getDataLayout();
9062 unsigned ValWidth =
9063 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9064 Value *SextShamt =
9065 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9066 Result = Builder.CreateCall(LlwOpScwLoop,
9067 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9068 } else {
9069 Result =
9070 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9071 }
9072
9073 if (GRLen == 64)
9074 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9075 return Result;
9076}
9077
9079 const MachineFunction &MF, EVT VT) const {
9080 VT = VT.getScalarType();
9081
9082 if (!VT.isSimple())
9083 return false;
9084
9085 switch (VT.getSimpleVT().SimpleTy) {
9086 case MVT::f32:
9087 case MVT::f64:
9088 return true;
9089 default:
9090 break;
9091 }
9092
9093 return false;
9094}
9095
9097 const Constant *PersonalityFn) const {
9098 return LoongArch::R4;
9099}
9100
9102 const Constant *PersonalityFn) const {
9103 return LoongArch::R5;
9104}
9105
9106//===----------------------------------------------------------------------===//
9107// Target Optimization Hooks
9108//===----------------------------------------------------------------------===//
9109
9111 const LoongArchSubtarget &Subtarget) {
9112 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9113 // IEEE float has 23 digits and double has 52 digits.
9114 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9115 return RefinementSteps;
9116}
9117
9119 SelectionDAG &DAG, int Enabled,
9120 int &RefinementSteps,
9121 bool &UseOneConstNR,
9122 bool Reciprocal) const {
9123 if (Subtarget.hasFrecipe()) {
9124 SDLoc DL(Operand);
9125 EVT VT = Operand.getValueType();
9126
9127 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9128 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9129 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9130 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9131 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9132
9133 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9134 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9135
9136 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9137 if (Reciprocal)
9138 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9139
9140 return Estimate;
9141 }
9142 }
9143
9144 return SDValue();
9145}
9146
9148 SelectionDAG &DAG,
9149 int Enabled,
9150 int &RefinementSteps) const {
9151 if (Subtarget.hasFrecipe()) {
9152 SDLoc DL(Operand);
9153 EVT VT = Operand.getValueType();
9154
9155 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9156 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9157 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9158 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9159 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9160
9161 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9162 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9163
9164 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9165 }
9166 }
9167
9168 return SDValue();
9169}
9170
9171//===----------------------------------------------------------------------===//
9172// LoongArch Inline Assembly Support
9173//===----------------------------------------------------------------------===//
9174
9176LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9177 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9178 //
9179 // 'f': A floating-point register (if available).
9180 // 'k': A memory operand whose address is formed by a base register and
9181 // (optionally scaled) index register.
9182 // 'l': A signed 16-bit constant.
9183 // 'm': A memory operand whose address is formed by a base register and
9184 // offset that is suitable for use in instructions with the same
9185 // addressing mode as st.w and ld.w.
9186 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9187 // instruction)
9188 // 'I': A signed 12-bit constant (for arithmetic instructions).
9189 // 'J': Integer zero.
9190 // 'K': An unsigned 12-bit constant (for logic instructions).
9191 // "ZB": An address that is held in a general-purpose register. The offset is
9192 // zero.
9193 // "ZC": A memory operand whose address is formed by a base register and
9194 // offset that is suitable for use in instructions with the same
9195 // addressing mode as ll.w and sc.w.
9196 if (Constraint.size() == 1) {
9197 switch (Constraint[0]) {
9198 default:
9199 break;
9200 case 'f':
9201 case 'q':
9202 return C_RegisterClass;
9203 case 'l':
9204 case 'I':
9205 case 'J':
9206 case 'K':
9207 return C_Immediate;
9208 case 'k':
9209 return C_Memory;
9210 }
9211 }
9212
9213 if (Constraint == "ZC" || Constraint == "ZB")
9214 return C_Memory;
9215
9216 // 'm' is handled here.
9217 return TargetLowering::getConstraintType(Constraint);
9218}
9219
9220InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9221 StringRef ConstraintCode) const {
9222 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9226 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9227}
9228
9229std::pair<unsigned, const TargetRegisterClass *>
9230LoongArchTargetLowering::getRegForInlineAsmConstraint(
9231 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9232 // First, see if this is a constraint that directly corresponds to a LoongArch
9233 // register class.
9234 if (Constraint.size() == 1) {
9235 switch (Constraint[0]) {
9236 case 'r':
9237 // TODO: Support fixed vectors up to GRLen?
9238 if (VT.isVector())
9239 break;
9240 return std::make_pair(0U, &LoongArch::GPRRegClass);
9241 case 'q':
9242 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9243 case 'f':
9244 if (Subtarget.hasBasicF() && VT == MVT::f32)
9245 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9246 if (Subtarget.hasBasicD() && VT == MVT::f64)
9247 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9248 if (Subtarget.hasExtLSX() &&
9249 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9250 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9251 if (Subtarget.hasExtLASX() &&
9252 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9253 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9254 break;
9255 default:
9256 break;
9257 }
9258 }
9259
9260 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9261 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9262 // constraints while the official register name is prefixed with a '$'. So we
9263 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9264 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9265 // case insensitive, so no need to convert the constraint to upper case here.
9266 //
9267 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9268 // decode the usage of register name aliases into their official names. And
9269 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9270 // official register names.
9271 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9272 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9273 bool IsFP = Constraint[2] == 'f';
9274 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9275 std::pair<unsigned, const TargetRegisterClass *> R;
9277 TRI, join_items("", Temp.first, Temp.second), VT);
9278 // Match those names to the widest floating point register type available.
9279 if (IsFP) {
9280 unsigned RegNo = R.first;
9281 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9282 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9283 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9284 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9285 }
9286 }
9287 }
9288 return R;
9289 }
9290
9291 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9292}
9293
9294void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9295 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9296 SelectionDAG &DAG) const {
9297 // Currently only support length 1 constraints.
9298 if (Constraint.size() == 1) {
9299 switch (Constraint[0]) {
9300 case 'l':
9301 // Validate & create a 16-bit signed immediate operand.
9302 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9303 uint64_t CVal = C->getSExtValue();
9304 if (isInt<16>(CVal))
9305 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9306 Subtarget.getGRLenVT()));
9307 }
9308 return;
9309 case 'I':
9310 // Validate & create a 12-bit signed immediate operand.
9311 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9312 uint64_t CVal = C->getSExtValue();
9313 if (isInt<12>(CVal))
9314 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9315 Subtarget.getGRLenVT()));
9316 }
9317 return;
9318 case 'J':
9319 // Validate & create an integer zero operand.
9320 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9321 if (C->getZExtValue() == 0)
9322 Ops.push_back(
9323 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9324 return;
9325 case 'K':
9326 // Validate & create a 12-bit unsigned immediate operand.
9327 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9328 uint64_t CVal = C->getZExtValue();
9329 if (isUInt<12>(CVal))
9330 Ops.push_back(
9331 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9332 }
9333 return;
9334 default:
9335 break;
9336 }
9337 }
9339}
9340
9341#define GET_REGISTER_MATCHER
9342#include "LoongArchGenAsmMatcher.inc"
9343
9346 const MachineFunction &MF) const {
9347 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9348 std::string NewRegName = Name.second.str();
9349 Register Reg = MatchRegisterAltName(NewRegName);
9350 if (!Reg)
9351 Reg = MatchRegisterName(NewRegName);
9352 if (!Reg)
9353 return Reg;
9354 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9355 if (!ReservedRegs.test(Reg))
9356 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9357 StringRef(RegName) + "\"."));
9358 return Reg;
9359}
9360
9362 EVT VT, SDValue C) const {
9363 // TODO: Support vectors.
9364 if (!VT.isScalarInteger())
9365 return false;
9366
9367 // Omit the optimization if the data size exceeds GRLen.
9368 if (VT.getSizeInBits() > Subtarget.getGRLen())
9369 return false;
9370
9371 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9372 const APInt &Imm = ConstNode->getAPIntValue();
9373 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9374 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9375 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9376 return true;
9377 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9378 if (ConstNode->hasOneUse() &&
9379 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9380 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9381 return true;
9382 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9383 // in which the immediate has two set bits. Or Break (MUL x, imm)
9384 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9385 // equals to (1 << s0) - (1 << s1).
9386 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9387 unsigned Shifts = Imm.countr_zero();
9388 // Reject immediates which can be composed via a single LUI.
9389 if (Shifts >= 12)
9390 return false;
9391 // Reject multiplications can be optimized to
9392 // (SLLI (ALSL x, x, 1/2/3/4), s).
9393 APInt ImmPop = Imm.ashr(Shifts);
9394 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9395 return false;
9396 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9397 // since it needs one more instruction than other 3 cases.
9398 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9399 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9400 (ImmSmall - Imm).isPowerOf2())
9401 return true;
9402 }
9403 }
9404
9405 return false;
9406}
9407
9409 const AddrMode &AM,
9410 Type *Ty, unsigned AS,
9411 Instruction *I) const {
9412 // LoongArch has four basic addressing modes:
9413 // 1. reg
9414 // 2. reg + 12-bit signed offset
9415 // 3. reg + 14-bit signed offset left-shifted by 2
9416 // 4. reg1 + reg2
9417 // TODO: Add more checks after support vector extension.
9418
9419 // No global is ever allowed as a base.
9420 if (AM.BaseGV)
9421 return false;
9422
9423 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9424 // with `UAL` feature.
9425 if (!isInt<12>(AM.BaseOffs) &&
9426 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9427 return false;
9428
9429 switch (AM.Scale) {
9430 case 0:
9431 // "r+i" or just "i", depending on HasBaseReg.
9432 break;
9433 case 1:
9434 // "r+r+i" is not allowed.
9435 if (AM.HasBaseReg && AM.BaseOffs)
9436 return false;
9437 // Otherwise we have "r+r" or "r+i".
9438 break;
9439 case 2:
9440 // "2*r+r" or "2*r+i" is not allowed.
9441 if (AM.HasBaseReg || AM.BaseOffs)
9442 return false;
9443 // Allow "2*r" as "r+r".
9444 break;
9445 default:
9446 return false;
9447 }
9448
9449 return true;
9450}
9451
9453 return isInt<12>(Imm);
9454}
9455
9457 return isInt<12>(Imm);
9458}
9459
9461 // Zexts are free if they can be combined with a load.
9462 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9463 // poorly with type legalization of compares preferring sext.
9464 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9465 EVT MemVT = LD->getMemoryVT();
9466 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9467 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9468 LD->getExtensionType() == ISD::ZEXTLOAD))
9469 return true;
9470 }
9471
9472 return TargetLowering::isZExtFree(Val, VT2);
9473}
9474
9476 EVT DstVT) const {
9477 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9478}
9479
9481 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9482}
9483
9485 // TODO: Support vectors.
9486 if (Y.getValueType().isVector())
9487 return false;
9488
9489 return !isa<ConstantSDNode>(Y);
9490}
9491
9493 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9494 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9495}
9496
9498 Type *Ty, bool IsSigned) const {
9499 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9500 return true;
9501
9502 return IsSigned;
9503}
9504
9506 // Return false to suppress the unnecessary extensions if the LibCall
9507 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9508 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9509 Type.getSizeInBits() < Subtarget.getGRLen()))
9510 return false;
9511 return true;
9512}
9513
9514// memcpy, and other memory intrinsics, typically tries to use wider load/store
9515// if the source/dest is aligned and the copy size is large enough. We therefore
9516// want to align such objects passed to memory intrinsics.
9518 unsigned &MinSize,
9519 Align &PrefAlign) const {
9520 if (!isa<MemIntrinsic>(CI))
9521 return false;
9522
9523 if (Subtarget.is64Bit()) {
9524 MinSize = 8;
9525 PrefAlign = Align(8);
9526 } else {
9527 MinSize = 4;
9528 PrefAlign = Align(4);
9529 }
9530
9531 return true;
9532}
9533
9542
9543bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9544 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9545 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9546 bool IsABIRegCopy = CC.has_value();
9547 EVT ValueVT = Val.getValueType();
9548
9549 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9550 PartVT == MVT::f32) {
9551 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9552 // nan, and cast to f32.
9553 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9554 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9555 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9556 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9557 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9558 Parts[0] = Val;
9559 return true;
9560 }
9561
9562 return false;
9563}
9564
9565SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9566 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9567 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9568 bool IsABIRegCopy = CC.has_value();
9569
9570 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9571 PartVT == MVT::f32) {
9572 SDValue Val = Parts[0];
9573
9574 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9575 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9576 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9577 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9578 return Val;
9579 }
9580
9581 return SDValue();
9582}
9583
9584MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9585 CallingConv::ID CC,
9586 EVT VT) const {
9587 // Use f32 to pass f16.
9588 if (VT == MVT::f16 && Subtarget.hasBasicF())
9589 return MVT::f32;
9590
9592}
9593
9594unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9595 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9596 // Use f32 to pass f16.
9597 if (VT == MVT::f16 && Subtarget.hasBasicF())
9598 return 1;
9599
9601}
9602
9604 SDValue Op, const APInt &OriginalDemandedBits,
9605 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9606 unsigned Depth) const {
9607 EVT VT = Op.getValueType();
9608 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9609 unsigned Opc = Op.getOpcode();
9610 switch (Opc) {
9611 default:
9612 break;
9615 SDValue Src = Op.getOperand(0);
9616 MVT SrcVT = Src.getSimpleValueType();
9617 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9618 unsigned NumElts = SrcVT.getVectorNumElements();
9619
9620 // If we don't need the sign bits at all just return zero.
9621 if (OriginalDemandedBits.countr_zero() >= NumElts)
9622 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9623
9624 // Only demand the vector elements of the sign bits we need.
9625 APInt KnownUndef, KnownZero;
9626 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9627 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9628 TLO, Depth + 1))
9629 return true;
9630
9631 Known.Zero = KnownZero.zext(BitWidth);
9632 Known.Zero.setHighBits(BitWidth - NumElts);
9633
9634 // [X]VMSKLTZ only uses the MSB from each vector element.
9635 KnownBits KnownSrc;
9636 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9637 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9638 Depth + 1))
9639 return true;
9640
9641 if (KnownSrc.One[SrcBits - 1])
9642 Known.One.setLowBits(NumElts);
9643 else if (KnownSrc.Zero[SrcBits - 1])
9644 Known.Zero.setLowBits(NumElts);
9645
9646 // Attempt to avoid multi-use ops if we don't need anything from it.
9648 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9649 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9650 return false;
9651 }
9652 }
9653
9655 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9656}
9657
9659 unsigned Opc = VecOp.getOpcode();
9660
9661 // Assume target opcodes can't be scalarized.
9662 // TODO - do we have any exceptions?
9663 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9664 return false;
9665
9666 // If the vector op is not supported, try to convert to scalar.
9667 EVT VecVT = VecOp.getValueType();
9669 return true;
9670
9671 // If the vector op is supported, but the scalar op is not, the transform may
9672 // not be worthwhile.
9673 EVT ScalarVT = VecVT.getScalarType();
9674 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9675}
9676
9678 unsigned Index) const {
9680 return false;
9681
9682 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9683 return Index == 0;
9684}
9685
9687 unsigned Index) const {
9688 EVT EltVT = VT.getScalarType();
9689
9690 // Extract a scalar FP value from index 0 of a vector is free.
9691 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9692}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1414
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1392
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:299
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:538
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...