LLVM 18.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
139
142 }
143
144 // Set operations for LA32 only.
145
146 if (!Subtarget.is64Bit()) {
152
153 // Set libcalls.
154 setLibcallName(RTLIB::MUL_I128, nullptr);
155 }
156
157 static const ISD::CondCode FPCCToExpand[] = {
160
161 // Set operations for 'F' feature.
162
163 if (Subtarget.hasBasicF()) {
164 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
165
179
180 if (Subtarget.is64Bit())
182
183 if (!Subtarget.hasBasicD()) {
185 if (Subtarget.is64Bit()) {
188 }
189 }
190 }
191
192 // Set operations for 'D' feature.
193
194 if (Subtarget.hasBasicD()) {
195 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
196 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
197 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
198
212
213 if (Subtarget.is64Bit())
215 }
216
217 // Set operations for 'LSX' feature.
218
219 if (Subtarget.hasExtLSX())
221 {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}, Legal);
222
223 // Set operations for 'LASX' feature.
224
225 if (Subtarget.hasExtLASX())
227 {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8},
228 Legal);
229
230 // Set DAG combine for LA32 and LA64.
231
235
236 // Set DAG combine for 'LSX' feature.
237
238 if (Subtarget.hasExtLSX())
240
241 // Compute derived properties from the register classes.
243
245
247
249
251
252 // Function alignments.
254 // Set preferred alignments.
258}
259
261 const GlobalAddressSDNode *GA) const {
262 // In order to maximise the opportunity for common subexpression elimination,
263 // keep a separate ADD node for the global address offset instead of folding
264 // it in the global address node. Later peephole optimisations may choose to
265 // fold it back in when profitable.
266 return false;
267}
268
270 SelectionDAG &DAG) const {
271 switch (Op.getOpcode()) {
273 return lowerEH_DWARF_CFA(Op, DAG);
275 return lowerGlobalAddress(Op, DAG);
277 return lowerGlobalTLSAddress(Op, DAG);
279 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
281 return lowerINTRINSIC_W_CHAIN(Op, DAG);
283 return lowerINTRINSIC_VOID(Op, DAG);
285 return lowerBlockAddress(Op, DAG);
286 case ISD::JumpTable:
287 return lowerJumpTable(Op, DAG);
288 case ISD::SHL_PARTS:
289 return lowerShiftLeftParts(Op, DAG);
290 case ISD::SRA_PARTS:
291 return lowerShiftRightParts(Op, DAG, true);
292 case ISD::SRL_PARTS:
293 return lowerShiftRightParts(Op, DAG, false);
295 return lowerConstantPool(Op, DAG);
296 case ISD::FP_TO_SINT:
297 return lowerFP_TO_SINT(Op, DAG);
298 case ISD::BITCAST:
299 return lowerBITCAST(Op, DAG);
300 case ISD::UINT_TO_FP:
301 return lowerUINT_TO_FP(Op, DAG);
302 case ISD::SINT_TO_FP:
303 return lowerSINT_TO_FP(Op, DAG);
304 case ISD::VASTART:
305 return lowerVASTART(Op, DAG);
306 case ISD::FRAMEADDR:
307 return lowerFRAMEADDR(Op, DAG);
308 case ISD::RETURNADDR:
309 return lowerRETURNADDR(Op, DAG);
311 return lowerWRITE_REGISTER(Op, DAG);
312 }
313 return SDValue();
314}
315
316SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
317 SelectionDAG &DAG) const {
318
319 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
320 DAG.getContext()->emitError(
321 "On LA64, only 64-bit registers can be written.");
322 return Op.getOperand(0);
323 }
324
325 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
326 DAG.getContext()->emitError(
327 "On LA32, only 32-bit registers can be written.");
328 return Op.getOperand(0);
329 }
330
331 return Op;
332}
333
334SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
335 SelectionDAG &DAG) const {
336 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
337 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
338 "be a constant integer");
339 return SDValue();
340 }
341
344 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
345 EVT VT = Op.getValueType();
346 SDLoc DL(Op);
347 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
348 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
349 int GRLenInBytes = Subtarget.getGRLen() / 8;
350
351 while (Depth--) {
352 int Offset = -(GRLenInBytes * 2);
353 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
355 FrameAddr =
356 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
357 }
358 return FrameAddr;
359}
360
361SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
362 SelectionDAG &DAG) const {
364 return SDValue();
365
366 // Currently only support lowering return address for current frame.
367 if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
368 DAG.getContext()->emitError(
369 "return address can only be determined for the current frame");
370 return SDValue();
371 }
372
375 MVT GRLenVT = Subtarget.getGRLenVT();
376
377 // Return the value of the return address register, marking it an implicit
378 // live-in.
379 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
380 getRegClassFor(GRLenVT));
381 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
382}
383
384SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
385 SelectionDAG &DAG) const {
387 auto Size = Subtarget.getGRLen() / 8;
388 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
389 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
390}
391
392SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
393 SelectionDAG &DAG) const {
395 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
396
397 SDLoc DL(Op);
398 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
400
401 // vastart just stores the address of the VarArgsFrameIndex slot into the
402 // memory location argument.
403 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
404 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
406}
407
408SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
409 SelectionDAG &DAG) const {
410 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
411 !Subtarget.hasBasicD() && "unexpected target features");
412
413 SDLoc DL(Op);
414 SDValue Op0 = Op.getOperand(0);
415 if (Op0->getOpcode() == ISD::AND) {
416 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
417 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
418 return Op;
419 }
420
421 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
422 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
423 Op0.getConstantOperandVal(2) == UINT64_C(0))
424 return Op;
425
426 if (Op0.getOpcode() == ISD::AssertZext &&
427 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
428 return Op;
429
430 EVT OpVT = Op0.getValueType();
431 EVT RetVT = Op.getValueType();
432 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
433 MakeLibCallOptions CallOptions;
434 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
435 SDValue Chain = SDValue();
437 std::tie(Result, Chain) =
438 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
439 return Result;
440}
441
442SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
443 SelectionDAG &DAG) const {
444 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
445 !Subtarget.hasBasicD() && "unexpected target features");
446
447 SDLoc DL(Op);
448 SDValue Op0 = Op.getOperand(0);
449
450 if ((Op0.getOpcode() == ISD::AssertSext ||
452 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
453 return Op;
454
455 EVT OpVT = Op0.getValueType();
456 EVT RetVT = Op.getValueType();
457 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
458 MakeLibCallOptions CallOptions;
459 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
460 SDValue Chain = SDValue();
462 std::tie(Result, Chain) =
463 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
464 return Result;
465}
466
467SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
468 SelectionDAG &DAG) const {
469
470 SDLoc DL(Op);
471 SDValue Op0 = Op.getOperand(0);
472
473 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
474 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
475 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
476 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
477 }
478 return Op;
479}
480
481SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
482 SelectionDAG &DAG) const {
483
484 SDLoc DL(Op);
485
486 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
487 !Subtarget.hasBasicD()) {
488 SDValue Dst =
489 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
490 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
491 }
492
493 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
494 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
495 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
496}
497
499 SelectionDAG &DAG, unsigned Flags) {
500 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
501}
502
504 SelectionDAG &DAG, unsigned Flags) {
505 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
506 Flags);
507}
508
510 SelectionDAG &DAG, unsigned Flags) {
511 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
512 N->getOffset(), Flags);
513}
514
516 SelectionDAG &DAG, unsigned Flags) {
517 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
518}
519
520template <class NodeTy>
521SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
522 bool IsLocal) const {
523 SDLoc DL(N);
524 EVT Ty = getPointerTy(DAG.getDataLayout());
525 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
526
527 switch (DAG.getTarget().getCodeModel()) {
528 default:
529 report_fatal_error("Unsupported code model");
530
531 case CodeModel::Large: {
532 assert(Subtarget.is64Bit() && "Large code model requires LA64");
533
534 // This is not actually used, but is necessary for successfully matching
535 // the PseudoLA_*_LARGE nodes.
536 SDValue Tmp = DAG.getConstant(0, DL, Ty);
537 if (IsLocal)
538 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
539 // eventually becomes the desired 5-insn code sequence.
540 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
541 Tmp, Addr),
542 0);
543
544 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
545 // becomes the desired 5-insn code sequence.
546 return SDValue(
547 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
548 0);
549 }
550
551 case CodeModel::Small:
553 if (IsLocal)
554 // This generates the pattern (PseudoLA_PCREL sym), which expands to
555 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
556 return SDValue(
557 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
558
559 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
560 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
561 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
562 0);
563 }
564}
565
566SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
567 SelectionDAG &DAG) const {
568 return getAddr(cast<BlockAddressSDNode>(Op), DAG);
569}
570
571SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
572 SelectionDAG &DAG) const {
573 return getAddr(cast<JumpTableSDNode>(Op), DAG);
574}
575
576SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
577 SelectionDAG &DAG) const {
578 return getAddr(cast<ConstantPoolSDNode>(Op), DAG);
579}
580
581SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
582 SelectionDAG &DAG) const {
583 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
584 assert(N->getOffset() == 0 && "unexpected offset in global node");
585 return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
586}
587
588SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
589 SelectionDAG &DAG,
590 unsigned Opc,
591 bool Large) const {
592 SDLoc DL(N);
593 EVT Ty = getPointerTy(DAG.getDataLayout());
594 MVT GRLenVT = Subtarget.getGRLenVT();
595
596 // This is not actually used, but is necessary for successfully matching the
597 // PseudoLA_*_LARGE nodes.
598 SDValue Tmp = DAG.getConstant(0, DL, Ty);
599 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
601 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
602 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
603
604 // Add the thread pointer.
605 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
606 DAG.getRegister(LoongArch::R2, GRLenVT));
607}
608
609SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
610 SelectionDAG &DAG,
611 unsigned Opc,
612 bool Large) const {
613 SDLoc DL(N);
614 EVT Ty = getPointerTy(DAG.getDataLayout());
615 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
616
617 // This is not actually used, but is necessary for successfully matching the
618 // PseudoLA_*_LARGE nodes.
619 SDValue Tmp = DAG.getConstant(0, DL, Ty);
620
621 // Use a PC-relative addressing mode to access the dynamic GOT address.
622 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
623 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
624 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
625
626 // Prepare argument list to generate call.
628 ArgListEntry Entry;
629 Entry.Node = Load;
630 Entry.Ty = CallTy;
631 Args.push_back(Entry);
632
633 // Setup call to __tls_get_addr.
635 CLI.setDebugLoc(DL)
636 .setChain(DAG.getEntryNode())
637 .setLibCallee(CallingConv::C, CallTy,
638 DAG.getExternalSymbol("__tls_get_addr", Ty),
639 std::move(Args));
640
641 return LowerCallTo(CLI).first;
642}
643
645LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
646 SelectionDAG &DAG) const {
649 report_fatal_error("In GHC calling convention TLS is not supported");
650
652 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
653
654 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
655 assert(N->getOffset() == 0 && "unexpected offset in global node");
656
658 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
660 // In this model, application code calls the dynamic linker function
661 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
662 // runtime.
663 Addr = getDynamicTLSAddr(N, DAG,
664 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
665 : LoongArch::PseudoLA_TLS_GD,
666 Large);
667 break;
669 // Same as GeneralDynamic, except for assembly modifiers and relocation
670 // records.
671 Addr = getDynamicTLSAddr(N, DAG,
672 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
673 : LoongArch::PseudoLA_TLS_LD,
674 Large);
675 break;
677 // This model uses the GOT to resolve TLS offsets.
678 Addr = getStaticTLSAddr(N, DAG,
679 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
680 : LoongArch::PseudoLA_TLS_IE,
681 Large);
682 break;
684 // This model is used when static linking as the TLS offsets are resolved
685 // during program linking.
686 //
687 // This node doesn't need an extra argument for the large code model.
688 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
689 break;
690 }
691
692 return Addr;
693}
694
695template <unsigned N>
696static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
697 SelectionDAG &DAG, bool IsSigned = false) {
698 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
699 // Check the ImmArg.
700 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
701 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
702 DAG.getContext()->emitError(Op->getOperationName(0) +
703 ": argument out of range.");
704 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
705 }
706 return SDValue();
707}
708
710LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
711 SelectionDAG &DAG) const {
712 SDLoc DL(Op);
713 switch (Op.getConstantOperandVal(0)) {
714 default:
715 return SDValue(); // Don't custom lower most intrinsics.
716 case Intrinsic::thread_pointer: {
717 EVT PtrVT = getPointerTy(DAG.getDataLayout());
718 return DAG.getRegister(LoongArch::R2, PtrVT);
719 }
720 case Intrinsic::loongarch_lsx_vpickve2gr_d:
721 case Intrinsic::loongarch_lsx_vpickve2gr_du:
722 case Intrinsic::loongarch_lsx_vreplvei_d:
723 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
724 return checkIntrinsicImmArg<1>(Op, 2, DAG);
725 case Intrinsic::loongarch_lsx_vreplvei_w:
726 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
727 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
728 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
729 case Intrinsic::loongarch_lasx_xvpickve_d:
730 case Intrinsic::loongarch_lasx_xvpickve_d_f:
731 return checkIntrinsicImmArg<2>(Op, 2, DAG);
732 case Intrinsic::loongarch_lasx_xvinsve0_d:
733 return checkIntrinsicImmArg<2>(Op, 3, DAG);
734 case Intrinsic::loongarch_lsx_vsat_b:
735 case Intrinsic::loongarch_lsx_vsat_bu:
736 case Intrinsic::loongarch_lsx_vrotri_b:
737 case Intrinsic::loongarch_lsx_vsllwil_h_b:
738 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
739 case Intrinsic::loongarch_lsx_vsrlri_b:
740 case Intrinsic::loongarch_lsx_vsrari_b:
741 case Intrinsic::loongarch_lsx_vreplvei_h:
742 case Intrinsic::loongarch_lasx_xvsat_b:
743 case Intrinsic::loongarch_lasx_xvsat_bu:
744 case Intrinsic::loongarch_lasx_xvrotri_b:
745 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
746 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
747 case Intrinsic::loongarch_lasx_xvsrlri_b:
748 case Intrinsic::loongarch_lasx_xvsrari_b:
749 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
750 case Intrinsic::loongarch_lasx_xvpickve_w:
751 case Intrinsic::loongarch_lasx_xvpickve_w_f:
752 return checkIntrinsicImmArg<3>(Op, 2, DAG);
753 case Intrinsic::loongarch_lasx_xvinsve0_w:
754 return checkIntrinsicImmArg<3>(Op, 3, DAG);
755 case Intrinsic::loongarch_lsx_vsat_h:
756 case Intrinsic::loongarch_lsx_vsat_hu:
757 case Intrinsic::loongarch_lsx_vrotri_h:
758 case Intrinsic::loongarch_lsx_vsllwil_w_h:
759 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
760 case Intrinsic::loongarch_lsx_vsrlri_h:
761 case Intrinsic::loongarch_lsx_vsrari_h:
762 case Intrinsic::loongarch_lsx_vreplvei_b:
763 case Intrinsic::loongarch_lasx_xvsat_h:
764 case Intrinsic::loongarch_lasx_xvsat_hu:
765 case Intrinsic::loongarch_lasx_xvrotri_h:
766 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
767 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
768 case Intrinsic::loongarch_lasx_xvsrlri_h:
769 case Intrinsic::loongarch_lasx_xvsrari_h:
770 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
771 return checkIntrinsicImmArg<4>(Op, 2, DAG);
772 case Intrinsic::loongarch_lsx_vsrlni_b_h:
773 case Intrinsic::loongarch_lsx_vsrani_b_h:
774 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
775 case Intrinsic::loongarch_lsx_vsrarni_b_h:
776 case Intrinsic::loongarch_lsx_vssrlni_b_h:
777 case Intrinsic::loongarch_lsx_vssrani_b_h:
778 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
779 case Intrinsic::loongarch_lsx_vssrani_bu_h:
780 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
781 case Intrinsic::loongarch_lsx_vssrarni_b_h:
782 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
783 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
784 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
785 case Intrinsic::loongarch_lasx_xvsrani_b_h:
786 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
787 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
788 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
789 case Intrinsic::loongarch_lasx_xvssrani_b_h:
790 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
791 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
792 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
793 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
794 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
795 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
796 return checkIntrinsicImmArg<4>(Op, 3, DAG);
797 case Intrinsic::loongarch_lsx_vsat_w:
798 case Intrinsic::loongarch_lsx_vsat_wu:
799 case Intrinsic::loongarch_lsx_vrotri_w:
800 case Intrinsic::loongarch_lsx_vsllwil_d_w:
801 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
802 case Intrinsic::loongarch_lsx_vsrlri_w:
803 case Intrinsic::loongarch_lsx_vsrari_w:
804 case Intrinsic::loongarch_lsx_vslei_bu:
805 case Intrinsic::loongarch_lsx_vslei_hu:
806 case Intrinsic::loongarch_lsx_vslei_wu:
807 case Intrinsic::loongarch_lsx_vslei_du:
808 case Intrinsic::loongarch_lsx_vslti_bu:
809 case Intrinsic::loongarch_lsx_vslti_hu:
810 case Intrinsic::loongarch_lsx_vslti_wu:
811 case Intrinsic::loongarch_lsx_vslti_du:
812 case Intrinsic::loongarch_lsx_vbsll_v:
813 case Intrinsic::loongarch_lsx_vbsrl_v:
814 case Intrinsic::loongarch_lasx_xvsat_w:
815 case Intrinsic::loongarch_lasx_xvsat_wu:
816 case Intrinsic::loongarch_lasx_xvrotri_w:
817 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
818 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
819 case Intrinsic::loongarch_lasx_xvsrlri_w:
820 case Intrinsic::loongarch_lasx_xvsrari_w:
821 case Intrinsic::loongarch_lasx_xvslei_bu:
822 case Intrinsic::loongarch_lasx_xvslei_hu:
823 case Intrinsic::loongarch_lasx_xvslei_wu:
824 case Intrinsic::loongarch_lasx_xvslei_du:
825 case Intrinsic::loongarch_lasx_xvslti_bu:
826 case Intrinsic::loongarch_lasx_xvslti_hu:
827 case Intrinsic::loongarch_lasx_xvslti_wu:
828 case Intrinsic::loongarch_lasx_xvslti_du:
829 case Intrinsic::loongarch_lasx_xvbsll_v:
830 case Intrinsic::loongarch_lasx_xvbsrl_v:
831 return checkIntrinsicImmArg<5>(Op, 2, DAG);
832 case Intrinsic::loongarch_lsx_vseqi_b:
833 case Intrinsic::loongarch_lsx_vseqi_h:
834 case Intrinsic::loongarch_lsx_vseqi_w:
835 case Intrinsic::loongarch_lsx_vseqi_d:
836 case Intrinsic::loongarch_lsx_vslei_b:
837 case Intrinsic::loongarch_lsx_vslei_h:
838 case Intrinsic::loongarch_lsx_vslei_w:
839 case Intrinsic::loongarch_lsx_vslei_d:
840 case Intrinsic::loongarch_lsx_vslti_b:
841 case Intrinsic::loongarch_lsx_vslti_h:
842 case Intrinsic::loongarch_lsx_vslti_w:
843 case Intrinsic::loongarch_lsx_vslti_d:
844 case Intrinsic::loongarch_lasx_xvseqi_b:
845 case Intrinsic::loongarch_lasx_xvseqi_h:
846 case Intrinsic::loongarch_lasx_xvseqi_w:
847 case Intrinsic::loongarch_lasx_xvseqi_d:
848 case Intrinsic::loongarch_lasx_xvslei_b:
849 case Intrinsic::loongarch_lasx_xvslei_h:
850 case Intrinsic::loongarch_lasx_xvslei_w:
851 case Intrinsic::loongarch_lasx_xvslei_d:
852 case Intrinsic::loongarch_lasx_xvslti_b:
853 case Intrinsic::loongarch_lasx_xvslti_h:
854 case Intrinsic::loongarch_lasx_xvslti_w:
855 case Intrinsic::loongarch_lasx_xvslti_d:
856 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
857 case Intrinsic::loongarch_lsx_vsrlni_h_w:
858 case Intrinsic::loongarch_lsx_vsrani_h_w:
859 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
860 case Intrinsic::loongarch_lsx_vsrarni_h_w:
861 case Intrinsic::loongarch_lsx_vssrlni_h_w:
862 case Intrinsic::loongarch_lsx_vssrani_h_w:
863 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
864 case Intrinsic::loongarch_lsx_vssrani_hu_w:
865 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
866 case Intrinsic::loongarch_lsx_vssrarni_h_w:
867 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
868 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
869 case Intrinsic::loongarch_lsx_vfrstpi_b:
870 case Intrinsic::loongarch_lsx_vfrstpi_h:
871 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
872 case Intrinsic::loongarch_lasx_xvsrani_h_w:
873 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
874 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
875 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
876 case Intrinsic::loongarch_lasx_xvssrani_h_w:
877 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
878 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
879 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
880 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
881 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
882 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
883 case Intrinsic::loongarch_lasx_xvfrstpi_b:
884 case Intrinsic::loongarch_lasx_xvfrstpi_h:
885 return checkIntrinsicImmArg<5>(Op, 3, DAG);
886 case Intrinsic::loongarch_lsx_vsat_d:
887 case Intrinsic::loongarch_lsx_vsat_du:
888 case Intrinsic::loongarch_lsx_vrotri_d:
889 case Intrinsic::loongarch_lsx_vsrlri_d:
890 case Intrinsic::loongarch_lsx_vsrari_d:
891 case Intrinsic::loongarch_lasx_xvsat_d:
892 case Intrinsic::loongarch_lasx_xvsat_du:
893 case Intrinsic::loongarch_lasx_xvrotri_d:
894 case Intrinsic::loongarch_lasx_xvsrlri_d:
895 case Intrinsic::loongarch_lasx_xvsrari_d:
896 return checkIntrinsicImmArg<6>(Op, 2, DAG);
897 case Intrinsic::loongarch_lsx_vsrlni_w_d:
898 case Intrinsic::loongarch_lsx_vsrani_w_d:
899 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
900 case Intrinsic::loongarch_lsx_vsrarni_w_d:
901 case Intrinsic::loongarch_lsx_vssrlni_w_d:
902 case Intrinsic::loongarch_lsx_vssrani_w_d:
903 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
904 case Intrinsic::loongarch_lsx_vssrani_wu_d:
905 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
906 case Intrinsic::loongarch_lsx_vssrarni_w_d:
907 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
908 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
909 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
910 case Intrinsic::loongarch_lasx_xvsrani_w_d:
911 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
912 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
913 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
914 case Intrinsic::loongarch_lasx_xvssrani_w_d:
915 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
916 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
917 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
918 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
919 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
920 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
921 return checkIntrinsicImmArg<6>(Op, 3, DAG);
922 case Intrinsic::loongarch_lsx_vsrlni_d_q:
923 case Intrinsic::loongarch_lsx_vsrani_d_q:
924 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
925 case Intrinsic::loongarch_lsx_vsrarni_d_q:
926 case Intrinsic::loongarch_lsx_vssrlni_d_q:
927 case Intrinsic::loongarch_lsx_vssrani_d_q:
928 case Intrinsic::loongarch_lsx_vssrlni_du_q:
929 case Intrinsic::loongarch_lsx_vssrani_du_q:
930 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
931 case Intrinsic::loongarch_lsx_vssrarni_d_q:
932 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
933 case Intrinsic::loongarch_lsx_vssrarni_du_q:
934 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
935 case Intrinsic::loongarch_lasx_xvsrani_d_q:
936 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
937 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
938 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
939 case Intrinsic::loongarch_lasx_xvssrani_d_q:
940 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
941 case Intrinsic::loongarch_lasx_xvssrani_du_q:
942 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
943 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
944 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
945 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
946 return checkIntrinsicImmArg<7>(Op, 3, DAG);
947 case Intrinsic::loongarch_lsx_vnori_b:
948 case Intrinsic::loongarch_lsx_vshuf4i_b:
949 case Intrinsic::loongarch_lsx_vshuf4i_h:
950 case Intrinsic::loongarch_lsx_vshuf4i_w:
951 case Intrinsic::loongarch_lasx_xvnori_b:
952 case Intrinsic::loongarch_lasx_xvshuf4i_b:
953 case Intrinsic::loongarch_lasx_xvshuf4i_h:
954 case Intrinsic::loongarch_lasx_xvshuf4i_w:
955 case Intrinsic::loongarch_lasx_xvpermi_d:
956 return checkIntrinsicImmArg<8>(Op, 2, DAG);
957 case Intrinsic::loongarch_lsx_vshuf4i_d:
958 case Intrinsic::loongarch_lsx_vpermi_w:
959 case Intrinsic::loongarch_lsx_vbitseli_b:
960 case Intrinsic::loongarch_lsx_vextrins_b:
961 case Intrinsic::loongarch_lsx_vextrins_h:
962 case Intrinsic::loongarch_lsx_vextrins_w:
963 case Intrinsic::loongarch_lsx_vextrins_d:
964 case Intrinsic::loongarch_lasx_xvshuf4i_d:
965 case Intrinsic::loongarch_lasx_xvpermi_w:
966 case Intrinsic::loongarch_lasx_xvpermi_q:
967 case Intrinsic::loongarch_lasx_xvbitseli_b:
968 case Intrinsic::loongarch_lasx_xvextrins_b:
969 case Intrinsic::loongarch_lasx_xvextrins_h:
970 case Intrinsic::loongarch_lasx_xvextrins_w:
971 case Intrinsic::loongarch_lasx_xvextrins_d:
972 return checkIntrinsicImmArg<8>(Op, 3, DAG);
973 case Intrinsic::loongarch_lsx_vrepli_b:
974 case Intrinsic::loongarch_lsx_vrepli_h:
975 case Intrinsic::loongarch_lsx_vrepli_w:
976 case Intrinsic::loongarch_lsx_vrepli_d:
977 case Intrinsic::loongarch_lasx_xvrepli_b:
978 case Intrinsic::loongarch_lasx_xvrepli_h:
979 case Intrinsic::loongarch_lasx_xvrepli_w:
980 case Intrinsic::loongarch_lasx_xvrepli_d:
981 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
982 case Intrinsic::loongarch_lsx_vldi:
983 case Intrinsic::loongarch_lasx_xvldi:
984 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
985 }
986}
987
988// Helper function that emits error message for intrinsics with chain and return
989// merge values of a UNDEF and the chain.
991 StringRef ErrorMsg,
992 SelectionDAG &DAG) {
993 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
994 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
995 SDLoc(Op));
996}
997
999LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1000 SelectionDAG &DAG) const {
1001 SDLoc DL(Op);
1002 MVT GRLenVT = Subtarget.getGRLenVT();
1003 EVT VT = Op.getValueType();
1004 SDValue Chain = Op.getOperand(0);
1005 const StringRef ErrorMsgOOR = "argument out of range";
1006 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1007 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1008
1009 switch (Op.getConstantOperandVal(1)) {
1010 default:
1011 return Op;
1012 case Intrinsic::loongarch_crc_w_b_w:
1013 case Intrinsic::loongarch_crc_w_h_w:
1014 case Intrinsic::loongarch_crc_w_w_w:
1015 case Intrinsic::loongarch_crc_w_d_w:
1016 case Intrinsic::loongarch_crcc_w_b_w:
1017 case Intrinsic::loongarch_crcc_w_h_w:
1018 case Intrinsic::loongarch_crcc_w_w_w:
1019 case Intrinsic::loongarch_crcc_w_d_w:
1020 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1021 case Intrinsic::loongarch_csrrd_w:
1022 case Intrinsic::loongarch_csrrd_d: {
1023 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
1024 return !isUInt<14>(Imm)
1025 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1026 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1027 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1028 }
1029 case Intrinsic::loongarch_csrwr_w:
1030 case Intrinsic::loongarch_csrwr_d: {
1031 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
1032 return !isUInt<14>(Imm)
1033 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1034 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1035 {Chain, Op.getOperand(2),
1036 DAG.getConstant(Imm, DL, GRLenVT)});
1037 }
1038 case Intrinsic::loongarch_csrxchg_w:
1039 case Intrinsic::loongarch_csrxchg_d: {
1040 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
1041 return !isUInt<14>(Imm)
1042 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1043 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1044 {Chain, Op.getOperand(2), Op.getOperand(3),
1045 DAG.getConstant(Imm, DL, GRLenVT)});
1046 }
1047 case Intrinsic::loongarch_iocsrrd_d: {
1048 return DAG.getNode(
1049 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1050 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1051 }
1052#define IOCSRRD_CASE(NAME, NODE) \
1053 case Intrinsic::loongarch_##NAME: { \
1054 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
1055 {Chain, Op.getOperand(2)}); \
1056 }
1057 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1058 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1059 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1060#undef IOCSRRD_CASE
1061 case Intrinsic::loongarch_cpucfg: {
1062 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1063 {Chain, Op.getOperand(2)});
1064 }
1065 case Intrinsic::loongarch_lddir_d: {
1066 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
1067 return !isUInt<8>(Imm)
1068 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1069 : Op;
1070 }
1071 case Intrinsic::loongarch_movfcsr2gr: {
1072 if (!Subtarget.hasBasicF())
1073 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1074 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
1075 return !isUInt<2>(Imm)
1076 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1077 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1078 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1079 }
1080 case Intrinsic::loongarch_lsx_vld:
1081 case Intrinsic::loongarch_lsx_vldrepl_b:
1082 case Intrinsic::loongarch_lasx_xvld:
1083 case Intrinsic::loongarch_lasx_xvldrepl_b:
1084 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1085 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1086 : SDValue();
1087 case Intrinsic::loongarch_lsx_vldrepl_h:
1088 case Intrinsic::loongarch_lasx_xvldrepl_h:
1089 return !isShiftedInt<11, 1>(
1090 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1092 Op, "argument out of range or not a multiple of 2", DAG)
1093 : SDValue();
1094 case Intrinsic::loongarch_lsx_vldrepl_w:
1095 case Intrinsic::loongarch_lasx_xvldrepl_w:
1096 return !isShiftedInt<10, 2>(
1097 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1099 Op, "argument out of range or not a multiple of 4", DAG)
1100 : SDValue();
1101 case Intrinsic::loongarch_lsx_vldrepl_d:
1102 case Intrinsic::loongarch_lasx_xvldrepl_d:
1103 return !isShiftedInt<9, 3>(
1104 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1106 Op, "argument out of range or not a multiple of 8", DAG)
1107 : SDValue();
1108 }
1109}
1110
1111// Helper function that emits error message for intrinsics with void return
1112// value and return the chain.
1114 SelectionDAG &DAG) {
1115
1116 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1117 return Op.getOperand(0);
1118}
1119
1120SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1121 SelectionDAG &DAG) const {
1122 SDLoc DL(Op);
1123 MVT GRLenVT = Subtarget.getGRLenVT();
1124 SDValue Chain = Op.getOperand(0);
1125 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1126 SDValue Op2 = Op.getOperand(2);
1127 const StringRef ErrorMsgOOR = "argument out of range";
1128 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1129 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1130 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1131
1132 switch (IntrinsicEnum) {
1133 default:
1134 // TODO: Add more Intrinsics.
1135 return SDValue();
1136 case Intrinsic::loongarch_cacop_d:
1137 case Intrinsic::loongarch_cacop_w: {
1138 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1139 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1140 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1141 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1142 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1143 unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue();
1144 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1145 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1146 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1147 return Op;
1148 }
1149 case Intrinsic::loongarch_dbar: {
1150 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1151 return !isUInt<15>(Imm)
1152 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1153 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1154 DAG.getConstant(Imm, DL, GRLenVT));
1155 }
1156 case Intrinsic::loongarch_ibar: {
1157 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1158 return !isUInt<15>(Imm)
1159 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1160 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1161 DAG.getConstant(Imm, DL, GRLenVT));
1162 }
1163 case Intrinsic::loongarch_break: {
1164 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1165 return !isUInt<15>(Imm)
1166 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1167 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1168 DAG.getConstant(Imm, DL, GRLenVT));
1169 }
1170 case Intrinsic::loongarch_movgr2fcsr: {
1171 if (!Subtarget.hasBasicF())
1172 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1173 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1174 return !isUInt<2>(Imm)
1175 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1176 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1177 DAG.getConstant(Imm, DL, GRLenVT),
1178 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1179 Op.getOperand(3)));
1180 }
1181 case Intrinsic::loongarch_syscall: {
1182 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1183 return !isUInt<15>(Imm)
1184 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1185 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1186 DAG.getConstant(Imm, DL, GRLenVT));
1187 }
1188#define IOCSRWR_CASE(NAME, NODE) \
1189 case Intrinsic::loongarch_##NAME: { \
1190 SDValue Op3 = Op.getOperand(3); \
1191 return Subtarget.is64Bit() \
1192 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
1193 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1194 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
1195 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
1196 Op3); \
1197 }
1198 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1199 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1200 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1201#undef IOCSRWR_CASE
1202 case Intrinsic::loongarch_iocsrwr_d: {
1203 return !Subtarget.is64Bit()
1204 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1205 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1206 Op2,
1207 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1208 Op.getOperand(3)));
1209 }
1210#define ASRT_LE_GT_CASE(NAME) \
1211 case Intrinsic::loongarch_##NAME: { \
1212 return !Subtarget.is64Bit() \
1213 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
1214 : Op; \
1215 }
1216 ASRT_LE_GT_CASE(asrtle_d)
1217 ASRT_LE_GT_CASE(asrtgt_d)
1218#undef ASRT_LE_GT_CASE
1219 case Intrinsic::loongarch_ldpte_d: {
1220 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
1221 return !Subtarget.is64Bit()
1222 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1223 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1224 : Op;
1225 }
1226 case Intrinsic::loongarch_lsx_vst:
1227 case Intrinsic::loongarch_lasx_xvst:
1228 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1229 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1230 : SDValue();
1231 case Intrinsic::loongarch_lasx_xvstelm_b:
1232 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1233 !isUInt<5>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1234 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1235 : SDValue();
1236 case Intrinsic::loongarch_lsx_vstelm_b:
1237 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1238 !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1239 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1240 : SDValue();
1241 case Intrinsic::loongarch_lasx_xvstelm_h:
1242 return (!isShiftedInt<8, 1>(
1243 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1244 !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1246 Op, "argument out of range or not a multiple of 2", DAG)
1247 : SDValue();
1248 case Intrinsic::loongarch_lsx_vstelm_h:
1249 return (!isShiftedInt<8, 1>(
1250 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1251 !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1253 Op, "argument out of range or not a multiple of 2", DAG)
1254 : SDValue();
1255 case Intrinsic::loongarch_lasx_xvstelm_w:
1256 return (!isShiftedInt<8, 2>(
1257 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1258 !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1260 Op, "argument out of range or not a multiple of 4", DAG)
1261 : SDValue();
1262 case Intrinsic::loongarch_lsx_vstelm_w:
1263 return (!isShiftedInt<8, 2>(
1264 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1265 !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1267 Op, "argument out of range or not a multiple of 4", DAG)
1268 : SDValue();
1269 case Intrinsic::loongarch_lasx_xvstelm_d:
1270 return (!isShiftedInt<8, 3>(
1271 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1272 !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1274 Op, "argument out of range or not a multiple of 8", DAG)
1275 : SDValue();
1276 case Intrinsic::loongarch_lsx_vstelm_d:
1277 return (!isShiftedInt<8, 3>(
1278 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1279 !isUInt<1>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1281 Op, "argument out of range or not a multiple of 8", DAG)
1282 : SDValue();
1283 }
1284}
1285
1286SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1287 SelectionDAG &DAG) const {
1288 SDLoc DL(Op);
1289 SDValue Lo = Op.getOperand(0);
1290 SDValue Hi = Op.getOperand(1);
1291 SDValue Shamt = Op.getOperand(2);
1292 EVT VT = Lo.getValueType();
1293
1294 // if Shamt-GRLen < 0: // Shamt < GRLen
1295 // Lo = Lo << Shamt
1296 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1297 // else:
1298 // Lo = 0
1299 // Hi = Lo << (Shamt-GRLen)
1300
1301 SDValue Zero = DAG.getConstant(0, DL, VT);
1302 SDValue One = DAG.getConstant(1, DL, VT);
1303 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1304 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1305 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1306 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1307
1308 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1309 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1310 SDValue ShiftRightLo =
1311 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1312 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1313 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1314 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1315
1316 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1317
1318 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1319 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1320
1321 SDValue Parts[2] = {Lo, Hi};
1322 return DAG.getMergeValues(Parts, DL);
1323}
1324
1325SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1326 SelectionDAG &DAG,
1327 bool IsSRA) const {
1328 SDLoc DL(Op);
1329 SDValue Lo = Op.getOperand(0);
1330 SDValue Hi = Op.getOperand(1);
1331 SDValue Shamt = Op.getOperand(2);
1332 EVT VT = Lo.getValueType();
1333
1334 // SRA expansion:
1335 // if Shamt-GRLen < 0: // Shamt < GRLen
1336 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1337 // Hi = Hi >>s Shamt
1338 // else:
1339 // Lo = Hi >>s (Shamt-GRLen);
1340 // Hi = Hi >>s (GRLen-1)
1341 //
1342 // SRL expansion:
1343 // if Shamt-GRLen < 0: // Shamt < GRLen
1344 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1345 // Hi = Hi >>u Shamt
1346 // else:
1347 // Lo = Hi >>u (Shamt-GRLen);
1348 // Hi = 0;
1349
1350 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1351
1352 SDValue Zero = DAG.getConstant(0, DL, VT);
1353 SDValue One = DAG.getConstant(1, DL, VT);
1354 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1355 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1356 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1357 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1358
1359 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1360 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1361 SDValue ShiftLeftHi =
1362 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1363 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1364 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1365 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1366 SDValue HiFalse =
1367 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1368
1369 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1370
1371 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1372 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1373
1374 SDValue Parts[2] = {Lo, Hi};
1375 return DAG.getMergeValues(Parts, DL);
1376}
1377
1378// Returns the opcode of the target-specific SDNode that implements the 32-bit
1379// form of the given Opcode.
1381 switch (Opcode) {
1382 default:
1383 llvm_unreachable("Unexpected opcode");
1384 case ISD::SHL:
1385 return LoongArchISD::SLL_W;
1386 case ISD::SRA:
1387 return LoongArchISD::SRA_W;
1388 case ISD::SRL:
1389 return LoongArchISD::SRL_W;
1390 case ISD::ROTR:
1391 return LoongArchISD::ROTR_W;
1392 case ISD::ROTL:
1393 return LoongArchISD::ROTL_W;
1394 case ISD::CTTZ:
1395 return LoongArchISD::CTZ_W;
1396 case ISD::CTLZ:
1397 return LoongArchISD::CLZ_W;
1398 }
1399}
1400
1401// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1402// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1403// otherwise be promoted to i64, making it difficult to select the
1404// SLL_W/.../*W later one because the fact the operation was originally of
1405// type i8/i16/i32 is lost.
1407 unsigned ExtOpc = ISD::ANY_EXTEND) {
1408 SDLoc DL(N);
1409 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1410 SDValue NewOp0, NewRes;
1411
1412 switch (NumOp) {
1413 default:
1414 llvm_unreachable("Unexpected NumOp");
1415 case 1: {
1416 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1417 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1418 break;
1419 }
1420 case 2: {
1421 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1422 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1423 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1424 break;
1425 }
1426 // TODO:Handle more NumOp.
1427 }
1428
1429 // ReplaceNodeResults requires we maintain the same type for the return
1430 // value.
1431 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1432}
1433
1434// Helper function that emits error message for intrinsics with/without chain
1435// and return a UNDEF or and the chain as the results.
1438 StringRef ErrorMsg, bool WithChain = true) {
1439 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1440 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1441 if (!WithChain)
1442 return;
1443 Results.push_back(N->getOperand(0));
1444}
1445
1446template <unsigned N>
1447static void
1449 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1450 unsigned ResOp) {
1451 const StringRef ErrorMsgOOR = "argument out of range";
1452 unsigned Imm = cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue();
1453 if (!isUInt<N>(Imm)) {
1455 /*WithChain=*/false);
1456 return;
1457 }
1458 SDLoc DL(Node);
1459 SDValue Vec = Node->getOperand(1);
1460
1461 SDValue PickElt =
1462 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1463 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1465 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1466 PickElt.getValue(0)));
1467}
1468
1471 SelectionDAG &DAG,
1472 const LoongArchSubtarget &Subtarget,
1473 unsigned ResOp) {
1474 SDLoc DL(N);
1475 SDValue Vec = N->getOperand(1);
1476
1477 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1478 Results.push_back(
1479 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1480}
1481
1482static void
1484 SelectionDAG &DAG,
1485 const LoongArchSubtarget &Subtarget) {
1486 switch (N->getConstantOperandVal(0)) {
1487 default:
1488 llvm_unreachable("Unexpected Intrinsic.");
1489 case Intrinsic::loongarch_lsx_vpickve2gr_b:
1490 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1492 break;
1493 case Intrinsic::loongarch_lsx_vpickve2gr_h:
1494 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1495 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1497 break;
1498 case Intrinsic::loongarch_lsx_vpickve2gr_w:
1499 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1501 break;
1502 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1503 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1505 break;
1506 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1507 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1508 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1510 break;
1511 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1512 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1514 break;
1515 case Intrinsic::loongarch_lsx_bz_b:
1516 case Intrinsic::loongarch_lsx_bz_h:
1517 case Intrinsic::loongarch_lsx_bz_w:
1518 case Intrinsic::loongarch_lsx_bz_d:
1519 case Intrinsic::loongarch_lasx_xbz_b:
1520 case Intrinsic::loongarch_lasx_xbz_h:
1521 case Intrinsic::loongarch_lasx_xbz_w:
1522 case Intrinsic::loongarch_lasx_xbz_d:
1523 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1525 break;
1526 case Intrinsic::loongarch_lsx_bz_v:
1527 case Intrinsic::loongarch_lasx_xbz_v:
1528 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1530 break;
1531 case Intrinsic::loongarch_lsx_bnz_b:
1532 case Intrinsic::loongarch_lsx_bnz_h:
1533 case Intrinsic::loongarch_lsx_bnz_w:
1534 case Intrinsic::loongarch_lsx_bnz_d:
1535 case Intrinsic::loongarch_lasx_xbnz_b:
1536 case Intrinsic::loongarch_lasx_xbnz_h:
1537 case Intrinsic::loongarch_lasx_xbnz_w:
1538 case Intrinsic::loongarch_lasx_xbnz_d:
1539 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1541 break;
1542 case Intrinsic::loongarch_lsx_bnz_v:
1543 case Intrinsic::loongarch_lasx_xbnz_v:
1544 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1546 break;
1547 }
1548}
1549
1552 SDLoc DL(N);
1553 EVT VT = N->getValueType(0);
1554 switch (N->getOpcode()) {
1555 default:
1556 llvm_unreachable("Don't know how to legalize this operation");
1557 case ISD::SHL:
1558 case ISD::SRA:
1559 case ISD::SRL:
1560 case ISD::ROTR:
1561 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1562 "Unexpected custom legalisation");
1563 if (N->getOperand(1).getOpcode() != ISD::Constant) {
1564 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1565 break;
1566 }
1567 break;
1568 case ISD::ROTL:
1569 ConstantSDNode *CN;
1570 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1571 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1572 break;
1573 }
1574 break;
1575 case ISD::FP_TO_SINT: {
1576 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1577 "Unexpected custom legalisation");
1578 SDValue Src = N->getOperand(0);
1579 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1580 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1582 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1583 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1584 return;
1585 }
1586 // If the FP type needs to be softened, emit a library call using the 'si'
1587 // version. If we left it to default legalization we'd end up with 'di'.
1588 RTLIB::Libcall LC;
1589 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1590 MakeLibCallOptions CallOptions;
1591 EVT OpVT = Src.getValueType();
1592 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1593 SDValue Chain = SDValue();
1594 SDValue Result;
1595 std::tie(Result, Chain) =
1596 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1597 Results.push_back(Result);
1598 break;
1599 }
1600 case ISD::BITCAST: {
1601 SDValue Src = N->getOperand(0);
1602 EVT SrcVT = Src.getValueType();
1603 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1604 Subtarget.hasBasicF()) {
1605 SDValue Dst =
1606 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1607 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1608 }
1609 break;
1610 }
1611 case ISD::FP_TO_UINT: {
1612 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1613 "Unexpected custom legalisation");
1614 auto &TLI = DAG.getTargetLoweringInfo();
1615 SDValue Tmp1, Tmp2;
1616 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1617 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1618 break;
1619 }
1620 case ISD::BSWAP: {
1621 SDValue Src = N->getOperand(0);
1622 assert((VT == MVT::i16 || VT == MVT::i32) &&
1623 "Unexpected custom legalization");
1624 MVT GRLenVT = Subtarget.getGRLenVT();
1625 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1626 SDValue Tmp;
1627 switch (VT.getSizeInBits()) {
1628 default:
1629 llvm_unreachable("Unexpected operand width");
1630 case 16:
1631 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1632 break;
1633 case 32:
1634 // Only LA64 will get to here due to the size mismatch between VT and
1635 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1636 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1637 break;
1638 }
1639 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1640 break;
1641 }
1642 case ISD::BITREVERSE: {
1643 SDValue Src = N->getOperand(0);
1644 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1645 "Unexpected custom legalization");
1646 MVT GRLenVT = Subtarget.getGRLenVT();
1647 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1648 SDValue Tmp;
1649 switch (VT.getSizeInBits()) {
1650 default:
1651 llvm_unreachable("Unexpected operand width");
1652 case 8:
1653 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1654 break;
1655 case 32:
1656 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1657 break;
1658 }
1659 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1660 break;
1661 }
1662 case ISD::CTLZ:
1663 case ISD::CTTZ: {
1664 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1665 "Unexpected custom legalisation");
1666 Results.push_back(customLegalizeToWOp(N, DAG, 1));
1667 break;
1668 }
1670 SDValue Chain = N->getOperand(0);
1671 SDValue Op2 = N->getOperand(2);
1672 MVT GRLenVT = Subtarget.getGRLenVT();
1673 const StringRef ErrorMsgOOR = "argument out of range";
1674 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1675 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1676
1677 switch (N->getConstantOperandVal(1)) {
1678 default:
1679 llvm_unreachable("Unexpected Intrinsic.");
1680 case Intrinsic::loongarch_movfcsr2gr: {
1681 if (!Subtarget.hasBasicF()) {
1682 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
1683 return;
1684 }
1685 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1686 if (!isUInt<2>(Imm)) {
1687 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1688 return;
1689 }
1690 SDValue MOVFCSR2GRResults = DAG.getNode(
1691 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1692 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1693 Results.push_back(
1694 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1695 Results.push_back(MOVFCSR2GRResults.getValue(1));
1696 break;
1697 }
1698#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
1699 case Intrinsic::loongarch_##NAME: { \
1700 SDValue NODE = DAG.getNode( \
1701 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1702 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1703 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1704 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1705 Results.push_back(NODE.getValue(1)); \
1706 break; \
1707 }
1708 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1709 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1710 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1711 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1712 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1713 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1714#undef CRC_CASE_EXT_BINARYOP
1715
1716#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
1717 case Intrinsic::loongarch_##NAME: { \
1718 SDValue NODE = DAG.getNode( \
1719 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1720 {Chain, Op2, \
1721 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1722 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1723 Results.push_back(NODE.getValue(1)); \
1724 break; \
1725 }
1726 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1727 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1728#undef CRC_CASE_EXT_UNARYOP
1729#define CSR_CASE(ID) \
1730 case Intrinsic::loongarch_##ID: { \
1731 if (!Subtarget.is64Bit()) \
1732 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
1733 break; \
1734 }
1735 CSR_CASE(csrrd_d);
1736 CSR_CASE(csrwr_d);
1737 CSR_CASE(csrxchg_d);
1738 CSR_CASE(iocsrrd_d);
1739#undef CSR_CASE
1740 case Intrinsic::loongarch_csrrd_w: {
1741 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1742 if (!isUInt<14>(Imm)) {
1743 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1744 return;
1745 }
1746 SDValue CSRRDResults =
1747 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1748 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1749 Results.push_back(
1750 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
1751 Results.push_back(CSRRDResults.getValue(1));
1752 break;
1753 }
1754 case Intrinsic::loongarch_csrwr_w: {
1755 unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
1756 if (!isUInt<14>(Imm)) {
1757 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1758 return;
1759 }
1760 SDValue CSRWRResults =
1761 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1762 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1763 DAG.getConstant(Imm, DL, GRLenVT)});
1764 Results.push_back(
1765 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
1766 Results.push_back(CSRWRResults.getValue(1));
1767 break;
1768 }
1769 case Intrinsic::loongarch_csrxchg_w: {
1770 unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
1771 if (!isUInt<14>(Imm)) {
1772 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1773 return;
1774 }
1775 SDValue CSRXCHGResults = DAG.getNode(
1776 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1777 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1778 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
1779 DAG.getConstant(Imm, DL, GRLenVT)});
1780 Results.push_back(
1781 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
1782 Results.push_back(CSRXCHGResults.getValue(1));
1783 break;
1784 }
1785#define IOCSRRD_CASE(NAME, NODE) \
1786 case Intrinsic::loongarch_##NAME: { \
1787 SDValue IOCSRRDResults = \
1788 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1789 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
1790 Results.push_back( \
1791 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
1792 Results.push_back(IOCSRRDResults.getValue(1)); \
1793 break; \
1794 }
1795 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1796 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1797 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1798#undef IOCSRRD_CASE
1799 case Intrinsic::loongarch_cpucfg: {
1800 SDValue CPUCFGResults =
1801 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1802 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
1803 Results.push_back(
1804 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
1805 Results.push_back(CPUCFGResults.getValue(1));
1806 break;
1807 }
1808 case Intrinsic::loongarch_lddir_d: {
1809 if (!Subtarget.is64Bit()) {
1810 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
1811 return;
1812 }
1813 break;
1814 }
1815 }
1816 break;
1817 }
1818 case ISD::READ_REGISTER: {
1819 if (Subtarget.is64Bit())
1820 DAG.getContext()->emitError(
1821 "On LA64, only 64-bit registers can be read.");
1822 else
1823 DAG.getContext()->emitError(
1824 "On LA32, only 32-bit registers can be read.");
1825 Results.push_back(DAG.getUNDEF(VT));
1826 Results.push_back(N->getOperand(0));
1827 break;
1828 }
1830 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
1831 break;
1832 }
1833 }
1834}
1835
1838 const LoongArchSubtarget &Subtarget) {
1839 if (DCI.isBeforeLegalizeOps())
1840 return SDValue();
1841
1842 SDValue FirstOperand = N->getOperand(0);
1843 SDValue SecondOperand = N->getOperand(1);
1844 unsigned FirstOperandOpc = FirstOperand.getOpcode();
1845 EVT ValTy = N->getValueType(0);
1846 SDLoc DL(N);
1847 uint64_t lsb, msb;
1848 unsigned SMIdx, SMLen;
1849 ConstantSDNode *CN;
1850 SDValue NewOperand;
1851 MVT GRLenVT = Subtarget.getGRLenVT();
1852
1853 // Op's second operand must be a shifted mask.
1854 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
1855 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
1856 return SDValue();
1857
1858 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
1859 // Pattern match BSTRPICK.
1860 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
1861 // => BSTRPICK $dst, $src, msb, lsb
1862 // where msb = lsb + len - 1
1863
1864 // The second operand of the shift must be an immediate.
1865 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
1866 return SDValue();
1867
1868 lsb = CN->getZExtValue();
1869
1870 // Return if the shifted mask does not start at bit 0 or the sum of its
1871 // length and lsb exceeds the word's size.
1872 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
1873 return SDValue();
1874
1875 NewOperand = FirstOperand.getOperand(0);
1876 } else {
1877 // Pattern match BSTRPICK.
1878 // $dst = and $src, (2**len- 1) , if len > 12
1879 // => BSTRPICK $dst, $src, msb, lsb
1880 // where lsb = 0 and msb = len - 1
1881
1882 // If the mask is <= 0xfff, andi can be used instead.
1883 if (CN->getZExtValue() <= 0xfff)
1884 return SDValue();
1885
1886 // Return if the MSB exceeds.
1887 if (SMIdx + SMLen > ValTy.getSizeInBits())
1888 return SDValue();
1889
1890 if (SMIdx > 0) {
1891 // Omit if the constant has more than 2 uses. This a conservative
1892 // decision. Whether it is a win depends on the HW microarchitecture.
1893 // However it should always be better for 1 and 2 uses.
1894 if (CN->use_size() > 2)
1895 return SDValue();
1896 // Return if the constant can be composed by a single LU12I.W.
1897 if ((CN->getZExtValue() & 0xfff) == 0)
1898 return SDValue();
1899 // Return if the constand can be composed by a single ADDI with
1900 // the zero register.
1901 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
1902 return SDValue();
1903 }
1904
1905 lsb = SMIdx;
1906 NewOperand = FirstOperand;
1907 }
1908
1909 msb = lsb + SMLen - 1;
1910 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
1911 DAG.getConstant(msb, DL, GRLenVT),
1912 DAG.getConstant(lsb, DL, GRLenVT));
1913 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
1914 return NR0;
1915 // Try to optimize to
1916 // bstrpick $Rd, $Rs, msb, lsb
1917 // slli $Rd, $Rd, lsb
1918 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
1919 DAG.getConstant(lsb, DL, GRLenVT));
1920}
1921
1924 const LoongArchSubtarget &Subtarget) {
1925 if (DCI.isBeforeLegalizeOps())
1926 return SDValue();
1927
1928 // $dst = srl (and $src, Mask), Shamt
1929 // =>
1930 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
1931 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
1932 //
1933
1934 SDValue FirstOperand = N->getOperand(0);
1935 ConstantSDNode *CN;
1936 EVT ValTy = N->getValueType(0);
1937 SDLoc DL(N);
1938 MVT GRLenVT = Subtarget.getGRLenVT();
1939 unsigned MaskIdx, MaskLen;
1940 uint64_t Shamt;
1941
1942 // The first operand must be an AND and the second operand of the AND must be
1943 // a shifted mask.
1944 if (FirstOperand.getOpcode() != ISD::AND ||
1945 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
1946 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
1947 return SDValue();
1948
1949 // The second operand (shift amount) must be an immediate.
1950 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
1951 return SDValue();
1952
1953 Shamt = CN->getZExtValue();
1954 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
1955 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
1956 FirstOperand->getOperand(0),
1957 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
1958 DAG.getConstant(Shamt, DL, GRLenVT));
1959
1960 return SDValue();
1961}
1962
1965 const LoongArchSubtarget &Subtarget) {
1966 MVT GRLenVT = Subtarget.getGRLenVT();
1967 EVT ValTy = N->getValueType(0);
1968 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
1969 ConstantSDNode *CN0, *CN1;
1970 SDLoc DL(N);
1971 unsigned ValBits = ValTy.getSizeInBits();
1972 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
1973 unsigned Shamt;
1974 bool SwapAndRetried = false;
1975
1976 if (DCI.isBeforeLegalizeOps())
1977 return SDValue();
1978
1979 if (ValBits != 32 && ValBits != 64)
1980 return SDValue();
1981
1982Retry:
1983 // 1st pattern to match BSTRINS:
1984 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
1985 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
1986 // =>
1987 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
1988 if (N0.getOpcode() == ISD::AND &&
1989 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
1990 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
1991 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
1992 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
1993 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
1994 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
1995 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
1996 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
1997 (MaskIdx0 + MaskLen0 <= ValBits)) {
1998 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
1999 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2000 N1.getOperand(0).getOperand(0),
2001 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2002 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2003 }
2004
2005 // 2nd pattern to match BSTRINS:
2006 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
2007 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2008 // =>
2009 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2010 if (N0.getOpcode() == ISD::AND &&
2011 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2012 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2013 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2014 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2015 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2016 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2017 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2018 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2019 (MaskIdx0 + MaskLen0 <= ValBits)) {
2020 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2021 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2022 N1.getOperand(0).getOperand(0),
2023 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2024 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2025 }
2026
2027 // 3rd pattern to match BSTRINS:
2028 // R = or (and X, mask0), (and Y, mask1)
2029 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2030 // =>
2031 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2032 // where msb = lsb + size - 1
2033 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2034 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2035 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2036 (MaskIdx0 + MaskLen0 <= 64) &&
2037 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2038 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2039 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2040 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2041 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2042 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2043 DAG.getConstant(ValBits == 32
2044 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2045 : (MaskIdx0 + MaskLen0 - 1),
2046 DL, GRLenVT),
2047 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2048 }
2049
2050 // 4th pattern to match BSTRINS:
2051 // R = or (and X, mask), (shl Y, shamt)
2052 // where mask = (2**shamt - 1)
2053 // =>
2054 // R = BSTRINS X, Y, ValBits - 1, shamt
2055 // where ValBits = 32 or 64
2056 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2057 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2058 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2059 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2060 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2061 (MaskIdx0 + MaskLen0 <= ValBits)) {
2062 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2063 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2064 N1.getOperand(0),
2065 DAG.getConstant((ValBits - 1), DL, GRLenVT),
2066 DAG.getConstant(Shamt, DL, GRLenVT));
2067 }
2068
2069 // 5th pattern to match BSTRINS:
2070 // R = or (and X, mask), const
2071 // where ~mask = (2**size - 1) << lsb, mask & const = 0
2072 // =>
2073 // R = BSTRINS X, (const >> lsb), msb, lsb
2074 // where msb = lsb + size - 1
2075 if (N0.getOpcode() == ISD::AND &&
2076 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2077 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2078 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2079 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2080 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2081 return DAG.getNode(
2082 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2083 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2084 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2085 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2086 }
2087
2088 // 6th pattern.
2089 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2090 // by the incoming bits are known to be zero.
2091 // =>
2092 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2093 //
2094 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2095 // pattern is more common than the 1st. So we put the 1st before the 6th in
2096 // order to match as many nodes as possible.
2097 ConstantSDNode *CNMask, *CNShamt;
2098 unsigned MaskIdx, MaskLen;
2099 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2100 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2101 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2102 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2103 CNShamt->getZExtValue() + MaskLen <= ValBits) {
2104 Shamt = CNShamt->getZExtValue();
2105 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2106 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2107 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2108 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2109 N1.getOperand(0).getOperand(0),
2110 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2111 DAG.getConstant(Shamt, DL, GRLenVT));
2112 }
2113 }
2114
2115 // 7th pattern.
2116 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2117 // overwritten by the incoming bits are known to be zero.
2118 // =>
2119 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2120 //
2121 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2122 // before the 7th in order to match as many nodes as possible.
2123 if (N1.getOpcode() == ISD::AND &&
2124 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2125 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2126 N1.getOperand(0).getOpcode() == ISD::SHL &&
2127 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2128 CNShamt->getZExtValue() == MaskIdx) {
2129 APInt ShMask(ValBits, CNMask->getZExtValue());
2130 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2131 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2132 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2133 N1.getOperand(0).getOperand(0),
2134 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2135 DAG.getConstant(MaskIdx, DL, GRLenVT));
2136 }
2137 }
2138
2139 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2140 if (!SwapAndRetried) {
2141 std::swap(N0, N1);
2142 SwapAndRetried = true;
2143 goto Retry;
2144 }
2145
2146 SwapAndRetried = false;
2147Retry2:
2148 // 8th pattern.
2149 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2150 // the incoming bits are known to be zero.
2151 // =>
2152 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2153 //
2154 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2155 // we put it here in order to match as many nodes as possible or generate less
2156 // instructions.
2157 if (N1.getOpcode() == ISD::AND &&
2158 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2159 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2160 APInt ShMask(ValBits, CNMask->getZExtValue());
2161 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2162 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2163 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2164 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2165 N1->getOperand(0),
2166 DAG.getConstant(MaskIdx, DL, GRLenVT)),
2167 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2168 DAG.getConstant(MaskIdx, DL, GRLenVT));
2169 }
2170 }
2171 // Swap N0/N1 and retry.
2172 if (!SwapAndRetried) {
2173 std::swap(N0, N1);
2174 SwapAndRetried = true;
2175 goto Retry2;
2176 }
2177
2178 return SDValue();
2179}
2180
2181// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2184 const LoongArchSubtarget &Subtarget) {
2185 if (DCI.isBeforeLegalizeOps())
2186 return SDValue();
2187
2188 SDValue Src = N->getOperand(0);
2189 if (Src.getOpcode() != LoongArchISD::REVB_2W)
2190 return SDValue();
2191
2192 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2193 Src.getOperand(0));
2194}
2195
2196template <unsigned N>
2198 SelectionDAG &DAG,
2199 const LoongArchSubtarget &Subtarget,
2200 bool IsSigned = false) {
2201 SDLoc DL(Node);
2202 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2203 // Check the ImmArg.
2204 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2205 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2206 DAG.getContext()->emitError(Node->getOperationName(0) +
2207 ": argument out of range.");
2208 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2209 }
2210 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2211}
2212
2213template <unsigned N>
2214static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2215 SelectionDAG &DAG, bool IsSigned = false) {
2216 SDLoc DL(Node);
2217 EVT ResTy = Node->getValueType(0);
2218 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2219
2220 // Check the ImmArg.
2221 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2222 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2223 DAG.getContext()->emitError(Node->getOperationName(0) +
2224 ": argument out of range.");
2225 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2226 }
2227 return DAG.getConstant(
2229 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2230 DL, ResTy);
2231}
2232
2234 SDLoc DL(Node);
2235 EVT ResTy = Node->getValueType(0);
2236 SDValue Vec = Node->getOperand(2);
2237 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2238 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2239}
2240
2242 SDLoc DL(Node);
2243 EVT ResTy = Node->getValueType(0);
2244 SDValue One = DAG.getConstant(1, DL, ResTy);
2245 SDValue Bit =
2246 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2247
2248 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2249 DAG.getNOT(DL, Bit, ResTy));
2250}
2251
2252template <unsigned N>
2254 SDLoc DL(Node);
2255 EVT ResTy = Node->getValueType(0);
2256 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2257 // Check the unsigned ImmArg.
2258 if (!isUInt<N>(CImm->getZExtValue())) {
2259 DAG.getContext()->emitError(Node->getOperationName(0) +
2260 ": argument out of range.");
2261 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2262 }
2263
2264 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2265 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2266
2267 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2268}
2269
2270template <unsigned N>
2272 SDLoc DL(Node);
2273 EVT ResTy = Node->getValueType(0);
2274 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2275 // Check the unsigned ImmArg.
2276 if (!isUInt<N>(CImm->getZExtValue())) {
2277 DAG.getContext()->emitError(Node->getOperationName(0) +
2278 ": argument out of range.");
2279 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2280 }
2281
2282 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2283 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2284 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2285}
2286
2287template <unsigned N>
2289 SDLoc DL(Node);
2290 EVT ResTy = Node->getValueType(0);
2291 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2292 // Check the unsigned ImmArg.
2293 if (!isUInt<N>(CImm->getZExtValue())) {
2294 DAG.getContext()->emitError(Node->getOperationName(0) +
2295 ": argument out of range.");
2296 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2297 }
2298
2299 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2300 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2301 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2302}
2303
2304static SDValue
2307 const LoongArchSubtarget &Subtarget) {
2308 SDLoc DL(N);
2309 switch (N->getConstantOperandVal(0)) {
2310 default:
2311 break;
2312 case Intrinsic::loongarch_lsx_vadd_b:
2313 case Intrinsic::loongarch_lsx_vadd_h:
2314 case Intrinsic::loongarch_lsx_vadd_w:
2315 case Intrinsic::loongarch_lsx_vadd_d:
2316 case Intrinsic::loongarch_lasx_xvadd_b:
2317 case Intrinsic::loongarch_lasx_xvadd_h:
2318 case Intrinsic::loongarch_lasx_xvadd_w:
2319 case Intrinsic::loongarch_lasx_xvadd_d:
2320 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2321 N->getOperand(2));
2322 case Intrinsic::loongarch_lsx_vaddi_bu:
2323 case Intrinsic::loongarch_lsx_vaddi_hu:
2324 case Intrinsic::loongarch_lsx_vaddi_wu:
2325 case Intrinsic::loongarch_lsx_vaddi_du:
2326 case Intrinsic::loongarch_lasx_xvaddi_bu:
2327 case Intrinsic::loongarch_lasx_xvaddi_hu:
2328 case Intrinsic::loongarch_lasx_xvaddi_wu:
2329 case Intrinsic::loongarch_lasx_xvaddi_du:
2330 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2331 lowerVectorSplatImm<5>(N, 2, DAG));
2332 case Intrinsic::loongarch_lsx_vsub_b:
2333 case Intrinsic::loongarch_lsx_vsub_h:
2334 case Intrinsic::loongarch_lsx_vsub_w:
2335 case Intrinsic::loongarch_lsx_vsub_d:
2336 case Intrinsic::loongarch_lasx_xvsub_b:
2337 case Intrinsic::loongarch_lasx_xvsub_h:
2338 case Intrinsic::loongarch_lasx_xvsub_w:
2339 case Intrinsic::loongarch_lasx_xvsub_d:
2340 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2341 N->getOperand(2));
2342 case Intrinsic::loongarch_lsx_vsubi_bu:
2343 case Intrinsic::loongarch_lsx_vsubi_hu:
2344 case Intrinsic::loongarch_lsx_vsubi_wu:
2345 case Intrinsic::loongarch_lsx_vsubi_du:
2346 case Intrinsic::loongarch_lasx_xvsubi_bu:
2347 case Intrinsic::loongarch_lasx_xvsubi_hu:
2348 case Intrinsic::loongarch_lasx_xvsubi_wu:
2349 case Intrinsic::loongarch_lasx_xvsubi_du:
2350 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2351 lowerVectorSplatImm<5>(N, 2, DAG));
2352 case Intrinsic::loongarch_lsx_vneg_b:
2353 case Intrinsic::loongarch_lsx_vneg_h:
2354 case Intrinsic::loongarch_lsx_vneg_w:
2355 case Intrinsic::loongarch_lsx_vneg_d:
2356 case Intrinsic::loongarch_lasx_xvneg_b:
2357 case Intrinsic::loongarch_lasx_xvneg_h:
2358 case Intrinsic::loongarch_lasx_xvneg_w:
2359 case Intrinsic::loongarch_lasx_xvneg_d:
2360 return DAG.getNode(
2361 ISD::SUB, DL, N->getValueType(0),
2362 DAG.getConstant(
2363 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2364 /*isSigned=*/true),
2365 SDLoc(N), N->getValueType(0)),
2366 N->getOperand(1));
2367 case Intrinsic::loongarch_lsx_vmax_b:
2368 case Intrinsic::loongarch_lsx_vmax_h:
2369 case Intrinsic::loongarch_lsx_vmax_w:
2370 case Intrinsic::loongarch_lsx_vmax_d:
2371 case Intrinsic::loongarch_lasx_xvmax_b:
2372 case Intrinsic::loongarch_lasx_xvmax_h:
2373 case Intrinsic::loongarch_lasx_xvmax_w:
2374 case Intrinsic::loongarch_lasx_xvmax_d:
2375 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2376 N->getOperand(2));
2377 case Intrinsic::loongarch_lsx_vmax_bu:
2378 case Intrinsic::loongarch_lsx_vmax_hu:
2379 case Intrinsic::loongarch_lsx_vmax_wu:
2380 case Intrinsic::loongarch_lsx_vmax_du:
2381 case Intrinsic::loongarch_lasx_xvmax_bu:
2382 case Intrinsic::loongarch_lasx_xvmax_hu:
2383 case Intrinsic::loongarch_lasx_xvmax_wu:
2384 case Intrinsic::loongarch_lasx_xvmax_du:
2385 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2386 N->getOperand(2));
2387 case Intrinsic::loongarch_lsx_vmaxi_b:
2388 case Intrinsic::loongarch_lsx_vmaxi_h:
2389 case Intrinsic::loongarch_lsx_vmaxi_w:
2390 case Intrinsic::loongarch_lsx_vmaxi_d:
2391 case Intrinsic::loongarch_lasx_xvmaxi_b:
2392 case Intrinsic::loongarch_lasx_xvmaxi_h:
2393 case Intrinsic::loongarch_lasx_xvmaxi_w:
2394 case Intrinsic::loongarch_lasx_xvmaxi_d:
2395 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2396 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2397 case Intrinsic::loongarch_lsx_vmaxi_bu:
2398 case Intrinsic::loongarch_lsx_vmaxi_hu:
2399 case Intrinsic::loongarch_lsx_vmaxi_wu:
2400 case Intrinsic::loongarch_lsx_vmaxi_du:
2401 case Intrinsic::loongarch_lasx_xvmaxi_bu:
2402 case Intrinsic::loongarch_lasx_xvmaxi_hu:
2403 case Intrinsic::loongarch_lasx_xvmaxi_wu:
2404 case Intrinsic::loongarch_lasx_xvmaxi_du:
2405 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2406 lowerVectorSplatImm<5>(N, 2, DAG));
2407 case Intrinsic::loongarch_lsx_vmin_b:
2408 case Intrinsic::loongarch_lsx_vmin_h:
2409 case Intrinsic::loongarch_lsx_vmin_w:
2410 case Intrinsic::loongarch_lsx_vmin_d:
2411 case Intrinsic::loongarch_lasx_xvmin_b:
2412 case Intrinsic::loongarch_lasx_xvmin_h:
2413 case Intrinsic::loongarch_lasx_xvmin_w:
2414 case Intrinsic::loongarch_lasx_xvmin_d:
2415 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2416 N->getOperand(2));
2417 case Intrinsic::loongarch_lsx_vmin_bu:
2418 case Intrinsic::loongarch_lsx_vmin_hu:
2419 case Intrinsic::loongarch_lsx_vmin_wu:
2420 case Intrinsic::loongarch_lsx_vmin_du:
2421 case Intrinsic::loongarch_lasx_xvmin_bu:
2422 case Intrinsic::loongarch_lasx_xvmin_hu:
2423 case Intrinsic::loongarch_lasx_xvmin_wu:
2424 case Intrinsic::loongarch_lasx_xvmin_du:
2425 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2426 N->getOperand(2));
2427 case Intrinsic::loongarch_lsx_vmini_b:
2428 case Intrinsic::loongarch_lsx_vmini_h:
2429 case Intrinsic::loongarch_lsx_vmini_w:
2430 case Intrinsic::loongarch_lsx_vmini_d:
2431 case Intrinsic::loongarch_lasx_xvmini_b:
2432 case Intrinsic::loongarch_lasx_xvmini_h:
2433 case Intrinsic::loongarch_lasx_xvmini_w:
2434 case Intrinsic::loongarch_lasx_xvmini_d:
2435 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2436 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2437 case Intrinsic::loongarch_lsx_vmini_bu:
2438 case Intrinsic::loongarch_lsx_vmini_hu:
2439 case Intrinsic::loongarch_lsx_vmini_wu:
2440 case Intrinsic::loongarch_lsx_vmini_du:
2441 case Intrinsic::loongarch_lasx_xvmini_bu:
2442 case Intrinsic::loongarch_lasx_xvmini_hu:
2443 case Intrinsic::loongarch_lasx_xvmini_wu:
2444 case Intrinsic::loongarch_lasx_xvmini_du:
2445 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2446 lowerVectorSplatImm<5>(N, 2, DAG));
2447 case Intrinsic::loongarch_lsx_vmul_b:
2448 case Intrinsic::loongarch_lsx_vmul_h:
2449 case Intrinsic::loongarch_lsx_vmul_w:
2450 case Intrinsic::loongarch_lsx_vmul_d:
2451 case Intrinsic::loongarch_lasx_xvmul_b:
2452 case Intrinsic::loongarch_lasx_xvmul_h:
2453 case Intrinsic::loongarch_lasx_xvmul_w:
2454 case Intrinsic::loongarch_lasx_xvmul_d:
2455 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2456 N->getOperand(2));
2457 case Intrinsic::loongarch_lsx_vmadd_b:
2458 case Intrinsic::loongarch_lsx_vmadd_h:
2459 case Intrinsic::loongarch_lsx_vmadd_w:
2460 case Intrinsic::loongarch_lsx_vmadd_d:
2461 case Intrinsic::loongarch_lasx_xvmadd_b:
2462 case Intrinsic::loongarch_lasx_xvmadd_h:
2463 case Intrinsic::loongarch_lasx_xvmadd_w:
2464 case Intrinsic::loongarch_lasx_xvmadd_d: {
2465 EVT ResTy = N->getValueType(0);
2466 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2467 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2468 N->getOperand(3)));
2469 }
2470 case Intrinsic::loongarch_lsx_vmsub_b:
2471 case Intrinsic::loongarch_lsx_vmsub_h:
2472 case Intrinsic::loongarch_lsx_vmsub_w:
2473 case Intrinsic::loongarch_lsx_vmsub_d:
2474 case Intrinsic::loongarch_lasx_xvmsub_b:
2475 case Intrinsic::loongarch_lasx_xvmsub_h:
2476 case Intrinsic::loongarch_lasx_xvmsub_w:
2477 case Intrinsic::loongarch_lasx_xvmsub_d: {
2478 EVT ResTy = N->getValueType(0);
2479 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2480 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2481 N->getOperand(3)));
2482 }
2483 case Intrinsic::loongarch_lsx_vdiv_b:
2484 case Intrinsic::loongarch_lsx_vdiv_h:
2485 case Intrinsic::loongarch_lsx_vdiv_w:
2486 case Intrinsic::loongarch_lsx_vdiv_d:
2487 case Intrinsic::loongarch_lasx_xvdiv_b:
2488 case Intrinsic::loongarch_lasx_xvdiv_h:
2489 case Intrinsic::loongarch_lasx_xvdiv_w:
2490 case Intrinsic::loongarch_lasx_xvdiv_d:
2491 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
2492 N->getOperand(2));
2493 case Intrinsic::loongarch_lsx_vdiv_bu:
2494 case Intrinsic::loongarch_lsx_vdiv_hu:
2495 case Intrinsic::loongarch_lsx_vdiv_wu:
2496 case Intrinsic::loongarch_lsx_vdiv_du:
2497 case Intrinsic::loongarch_lasx_xvdiv_bu:
2498 case Intrinsic::loongarch_lasx_xvdiv_hu:
2499 case Intrinsic::loongarch_lasx_xvdiv_wu:
2500 case Intrinsic::loongarch_lasx_xvdiv_du:
2501 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
2502 N->getOperand(2));
2503 case Intrinsic::loongarch_lsx_vmod_b:
2504 case Intrinsic::loongarch_lsx_vmod_h:
2505 case Intrinsic::loongarch_lsx_vmod_w:
2506 case Intrinsic::loongarch_lsx_vmod_d:
2507 case Intrinsic::loongarch_lasx_xvmod_b:
2508 case Intrinsic::loongarch_lasx_xvmod_h:
2509 case Intrinsic::loongarch_lasx_xvmod_w:
2510 case Intrinsic::loongarch_lasx_xvmod_d:
2511 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
2512 N->getOperand(2));
2513 case Intrinsic::loongarch_lsx_vmod_bu:
2514 case Intrinsic::loongarch_lsx_vmod_hu:
2515 case Intrinsic::loongarch_lsx_vmod_wu:
2516 case Intrinsic::loongarch_lsx_vmod_du:
2517 case Intrinsic::loongarch_lasx_xvmod_bu:
2518 case Intrinsic::loongarch_lasx_xvmod_hu:
2519 case Intrinsic::loongarch_lasx_xvmod_wu:
2520 case Intrinsic::loongarch_lasx_xvmod_du:
2521 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
2522 N->getOperand(2));
2523 case Intrinsic::loongarch_lsx_vand_v:
2524 case Intrinsic::loongarch_lasx_xvand_v:
2525 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2526 N->getOperand(2));
2527 case Intrinsic::loongarch_lsx_vor_v:
2528 case Intrinsic::loongarch_lasx_xvor_v:
2529 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2530 N->getOperand(2));
2531 case Intrinsic::loongarch_lsx_vxor_v:
2532 case Intrinsic::loongarch_lasx_xvxor_v:
2533 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2534 N->getOperand(2));
2535 case Intrinsic::loongarch_lsx_vnor_v:
2536 case Intrinsic::loongarch_lasx_xvnor_v: {
2537 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2538 N->getOperand(2));
2539 return DAG.getNOT(DL, Res, Res->getValueType(0));
2540 }
2541 case Intrinsic::loongarch_lsx_vandi_b:
2542 case Intrinsic::loongarch_lasx_xvandi_b:
2543 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2544 lowerVectorSplatImm<8>(N, 2, DAG));
2545 case Intrinsic::loongarch_lsx_vori_b:
2546 case Intrinsic::loongarch_lasx_xvori_b:
2547 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2548 lowerVectorSplatImm<8>(N, 2, DAG));
2549 case Intrinsic::loongarch_lsx_vxori_b:
2550 case Intrinsic::loongarch_lasx_xvxori_b:
2551 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2552 lowerVectorSplatImm<8>(N, 2, DAG));
2553 case Intrinsic::loongarch_lsx_vsll_b:
2554 case Intrinsic::loongarch_lsx_vsll_h:
2555 case Intrinsic::loongarch_lsx_vsll_w:
2556 case Intrinsic::loongarch_lsx_vsll_d:
2557 case Intrinsic::loongarch_lasx_xvsll_b:
2558 case Intrinsic::loongarch_lasx_xvsll_h:
2559 case Intrinsic::loongarch_lasx_xvsll_w:
2560 case Intrinsic::loongarch_lasx_xvsll_d:
2561 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2562 truncateVecElts(N, DAG));
2563 case Intrinsic::loongarch_lsx_vslli_b:
2564 case Intrinsic::loongarch_lasx_xvslli_b:
2565 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2566 lowerVectorSplatImm<3>(N, 2, DAG));
2567 case Intrinsic::loongarch_lsx_vslli_h:
2568 case Intrinsic::loongarch_lasx_xvslli_h:
2569 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2570 lowerVectorSplatImm<4>(N, 2, DAG));
2571 case Intrinsic::loongarch_lsx_vslli_w:
2572 case Intrinsic::loongarch_lasx_xvslli_w:
2573 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2574 lowerVectorSplatImm<5>(N, 2, DAG));
2575 case Intrinsic::loongarch_lsx_vslli_d:
2576 case Intrinsic::loongarch_lasx_xvslli_d:
2577 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2578 lowerVectorSplatImm<6>(N, 2, DAG));
2579 case Intrinsic::loongarch_lsx_vsrl_b:
2580 case Intrinsic::loongarch_lsx_vsrl_h:
2581 case Intrinsic::loongarch_lsx_vsrl_w:
2582 case Intrinsic::loongarch_lsx_vsrl_d:
2583 case Intrinsic::loongarch_lasx_xvsrl_b:
2584 case Intrinsic::loongarch_lasx_xvsrl_h:
2585 case Intrinsic::loongarch_lasx_xvsrl_w:
2586 case Intrinsic::loongarch_lasx_xvsrl_d:
2587 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2588 truncateVecElts(N, DAG));
2589 case Intrinsic::loongarch_lsx_vsrli_b:
2590 case Intrinsic::loongarch_lasx_xvsrli_b:
2591 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2592 lowerVectorSplatImm<3>(N, 2, DAG));
2593 case Intrinsic::loongarch_lsx_vsrli_h:
2594 case Intrinsic::loongarch_lasx_xvsrli_h:
2595 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2596 lowerVectorSplatImm<4>(N, 2, DAG));
2597 case Intrinsic::loongarch_lsx_vsrli_w:
2598 case Intrinsic::loongarch_lasx_xvsrli_w:
2599 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2600 lowerVectorSplatImm<5>(N, 2, DAG));
2601 case Intrinsic::loongarch_lsx_vsrli_d:
2602 case Intrinsic::loongarch_lasx_xvsrli_d:
2603 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2604 lowerVectorSplatImm<6>(N, 2, DAG));
2605 case Intrinsic::loongarch_lsx_vsra_b:
2606 case Intrinsic::loongarch_lsx_vsra_h:
2607 case Intrinsic::loongarch_lsx_vsra_w:
2608 case Intrinsic::loongarch_lsx_vsra_d:
2609 case Intrinsic::loongarch_lasx_xvsra_b:
2610 case Intrinsic::loongarch_lasx_xvsra_h:
2611 case Intrinsic::loongarch_lasx_xvsra_w:
2612 case Intrinsic::loongarch_lasx_xvsra_d:
2613 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2614 truncateVecElts(N, DAG));
2615 case Intrinsic::loongarch_lsx_vsrai_b:
2616 case Intrinsic::loongarch_lasx_xvsrai_b:
2617 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2618 lowerVectorSplatImm<3>(N, 2, DAG));
2619 case Intrinsic::loongarch_lsx_vsrai_h:
2620 case Intrinsic::loongarch_lasx_xvsrai_h:
2621 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2622 lowerVectorSplatImm<4>(N, 2, DAG));
2623 case Intrinsic::loongarch_lsx_vsrai_w:
2624 case Intrinsic::loongarch_lasx_xvsrai_w:
2625 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2626 lowerVectorSplatImm<5>(N, 2, DAG));
2627 case Intrinsic::loongarch_lsx_vsrai_d:
2628 case Intrinsic::loongarch_lasx_xvsrai_d:
2629 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2630 lowerVectorSplatImm<6>(N, 2, DAG));
2631 case Intrinsic::loongarch_lsx_vpcnt_b:
2632 case Intrinsic::loongarch_lsx_vpcnt_h:
2633 case Intrinsic::loongarch_lsx_vpcnt_w:
2634 case Intrinsic::loongarch_lsx_vpcnt_d:
2635 case Intrinsic::loongarch_lasx_xvpcnt_b:
2636 case Intrinsic::loongarch_lasx_xvpcnt_h:
2637 case Intrinsic::loongarch_lasx_xvpcnt_w:
2638 case Intrinsic::loongarch_lasx_xvpcnt_d:
2639 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
2640 case Intrinsic::loongarch_lsx_vbitclr_b:
2641 case Intrinsic::loongarch_lsx_vbitclr_h:
2642 case Intrinsic::loongarch_lsx_vbitclr_w:
2643 case Intrinsic::loongarch_lsx_vbitclr_d:
2644 case Intrinsic::loongarch_lasx_xvbitclr_b:
2645 case Intrinsic::loongarch_lasx_xvbitclr_h:
2646 case Intrinsic::loongarch_lasx_xvbitclr_w:
2647 case Intrinsic::loongarch_lasx_xvbitclr_d:
2648 return lowerVectorBitClear(N, DAG);
2649 case Intrinsic::loongarch_lsx_vbitclri_b:
2650 case Intrinsic::loongarch_lasx_xvbitclri_b:
2651 return lowerVectorBitClearImm<3>(N, DAG);
2652 case Intrinsic::loongarch_lsx_vbitclri_h:
2653 case Intrinsic::loongarch_lasx_xvbitclri_h:
2654 return lowerVectorBitClearImm<4>(N, DAG);
2655 case Intrinsic::loongarch_lsx_vbitclri_w:
2656 case Intrinsic::loongarch_lasx_xvbitclri_w:
2657 return lowerVectorBitClearImm<5>(N, DAG);
2658 case Intrinsic::loongarch_lsx_vbitclri_d:
2659 case Intrinsic::loongarch_lasx_xvbitclri_d:
2660 return lowerVectorBitClearImm<6>(N, DAG);
2661 case Intrinsic::loongarch_lsx_vbitset_b:
2662 case Intrinsic::loongarch_lsx_vbitset_h:
2663 case Intrinsic::loongarch_lsx_vbitset_w:
2664 case Intrinsic::loongarch_lsx_vbitset_d:
2665 case Intrinsic::loongarch_lasx_xvbitset_b:
2666 case Intrinsic::loongarch_lasx_xvbitset_h:
2667 case Intrinsic::loongarch_lasx_xvbitset_w:
2668 case Intrinsic::loongarch_lasx_xvbitset_d: {
2669 EVT VecTy = N->getValueType(0);
2670 SDValue One = DAG.getConstant(1, DL, VecTy);
2671 return DAG.getNode(
2672 ISD::OR, DL, VecTy, N->getOperand(1),
2673 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2674 }
2675 case Intrinsic::loongarch_lsx_vbitseti_b:
2676 case Intrinsic::loongarch_lasx_xvbitseti_b:
2677 return lowerVectorBitSetImm<3>(N, DAG);
2678 case Intrinsic::loongarch_lsx_vbitseti_h:
2679 case Intrinsic::loongarch_lasx_xvbitseti_h:
2680 return lowerVectorBitSetImm<4>(N, DAG);
2681 case Intrinsic::loongarch_lsx_vbitseti_w:
2682 case Intrinsic::loongarch_lasx_xvbitseti_w:
2683 return lowerVectorBitSetImm<5>(N, DAG);
2684 case Intrinsic::loongarch_lsx_vbitseti_d:
2685 case Intrinsic::loongarch_lasx_xvbitseti_d:
2686 return lowerVectorBitSetImm<6>(N, DAG);
2687 case Intrinsic::loongarch_lsx_vbitrev_b:
2688 case Intrinsic::loongarch_lsx_vbitrev_h:
2689 case Intrinsic::loongarch_lsx_vbitrev_w:
2690 case Intrinsic::loongarch_lsx_vbitrev_d:
2691 case Intrinsic::loongarch_lasx_xvbitrev_b:
2692 case Intrinsic::loongarch_lasx_xvbitrev_h:
2693 case Intrinsic::loongarch_lasx_xvbitrev_w:
2694 case Intrinsic::loongarch_lasx_xvbitrev_d: {
2695 EVT VecTy = N->getValueType(0);
2696 SDValue One = DAG.getConstant(1, DL, VecTy);
2697 return DAG.getNode(
2698 ISD::XOR, DL, VecTy, N->getOperand(1),
2699 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2700 }
2701 case Intrinsic::loongarch_lsx_vbitrevi_b:
2702 case Intrinsic::loongarch_lasx_xvbitrevi_b:
2703 return lowerVectorBitRevImm<3>(N, DAG);
2704 case Intrinsic::loongarch_lsx_vbitrevi_h:
2705 case Intrinsic::loongarch_lasx_xvbitrevi_h:
2706 return lowerVectorBitRevImm<4>(N, DAG);
2707 case Intrinsic::loongarch_lsx_vbitrevi_w:
2708 case Intrinsic::loongarch_lasx_xvbitrevi_w:
2709 return lowerVectorBitRevImm<5>(N, DAG);
2710 case Intrinsic::loongarch_lsx_vbitrevi_d:
2711 case Intrinsic::loongarch_lasx_xvbitrevi_d:
2712 return lowerVectorBitRevImm<6>(N, DAG);
2713 case Intrinsic::loongarch_lsx_vfadd_s:
2714 case Intrinsic::loongarch_lsx_vfadd_d:
2715 case Intrinsic::loongarch_lasx_xvfadd_s:
2716 case Intrinsic::loongarch_lasx_xvfadd_d:
2717 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
2718 N->getOperand(2));
2719 case Intrinsic::loongarch_lsx_vfsub_s:
2720 case Intrinsic::loongarch_lsx_vfsub_d:
2721 case Intrinsic::loongarch_lasx_xvfsub_s:
2722 case Intrinsic::loongarch_lasx_xvfsub_d:
2723 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
2724 N->getOperand(2));
2725 case Intrinsic::loongarch_lsx_vfmul_s:
2726 case Intrinsic::loongarch_lsx_vfmul_d:
2727 case Intrinsic::loongarch_lasx_xvfmul_s:
2728 case Intrinsic::loongarch_lasx_xvfmul_d:
2729 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
2730 N->getOperand(2));
2731 case Intrinsic::loongarch_lsx_vfdiv_s:
2732 case Intrinsic::loongarch_lsx_vfdiv_d:
2733 case Intrinsic::loongarch_lasx_xvfdiv_s:
2734 case Intrinsic::loongarch_lasx_xvfdiv_d:
2735 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
2736 N->getOperand(2));
2737 case Intrinsic::loongarch_lsx_vfmadd_s:
2738 case Intrinsic::loongarch_lsx_vfmadd_d:
2739 case Intrinsic::loongarch_lasx_xvfmadd_s:
2740 case Intrinsic::loongarch_lasx_xvfmadd_d:
2741 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
2742 N->getOperand(2), N->getOperand(3));
2743 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
2744 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
2745 N->getOperand(1), N->getOperand(2),
2746 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
2747 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
2748 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
2749 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
2750 N->getOperand(1), N->getOperand(2),
2751 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
2752 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
2753 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
2754 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
2755 N->getOperand(1), N->getOperand(2),
2756 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
2757 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
2758 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
2759 N->getOperand(1), N->getOperand(2),
2760 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
2761 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
2762 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
2763 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
2764 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
2765 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
2766 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
2767 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
2768 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
2769 EVT ResTy = N->getValueType(0);
2770 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
2771 return DAG.getBuildVector(ResTy, DL, Ops);
2772 }
2773 case Intrinsic::loongarch_lsx_vreplve_b:
2774 case Intrinsic::loongarch_lsx_vreplve_h:
2775 case Intrinsic::loongarch_lsx_vreplve_w:
2776 case Intrinsic::loongarch_lsx_vreplve_d:
2777 case Intrinsic::loongarch_lasx_xvreplve_b:
2778 case Intrinsic::loongarch_lasx_xvreplve_h:
2779 case Intrinsic::loongarch_lasx_xvreplve_w:
2780 case Intrinsic::loongarch_lasx_xvreplve_d:
2781 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
2782 N->getOperand(1),
2783 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
2784 N->getOperand(2)));
2785 }
2786 return SDValue();
2787}
2788
2790 DAGCombinerInfo &DCI) const {
2791 SelectionDAG &DAG = DCI.DAG;
2792 switch (N->getOpcode()) {
2793 default:
2794 break;
2795 case ISD::AND:
2796 return performANDCombine(N, DAG, DCI, Subtarget);
2797 case ISD::OR:
2798 return performORCombine(N, DAG, DCI, Subtarget);
2799 case ISD::SRL:
2800 return performSRLCombine(N, DAG, DCI, Subtarget);
2802 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
2804 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
2805 }
2806 return SDValue();
2807}
2808
2811 if (!ZeroDivCheck)
2812 return MBB;
2813
2814 // Build instructions:
2815 // MBB:
2816 // div(or mod) $dst, $dividend, $divisor
2817 // bnez $divisor, SinkMBB
2818 // BreakMBB:
2819 // break 7 // BRK_DIVZERO
2820 // SinkMBB:
2821 // fallthrough
2822 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2824 MachineFunction *MF = MBB->getParent();
2825 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
2826 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
2827 MF->insert(It, BreakMBB);
2828 MF->insert(It, SinkMBB);
2829
2830 // Transfer the remainder of MBB and its successor edges to SinkMBB.
2831 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
2832 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2833
2834 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
2835 DebugLoc DL = MI.getDebugLoc();
2836 MachineOperand &Divisor = MI.getOperand(2);
2837 Register DivisorReg = Divisor.getReg();
2838
2839 // MBB:
2840 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
2841 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
2842 .addMBB(SinkMBB);
2843 MBB->addSuccessor(BreakMBB);
2844 MBB->addSuccessor(SinkMBB);
2845
2846 // BreakMBB:
2847 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
2848 // definition of BRK_DIVZERO.
2849 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
2850 BreakMBB->addSuccessor(SinkMBB);
2851
2852 // Clear Divisor's kill flag.
2853 Divisor.setIsKill(false);
2854
2855 return SinkMBB;
2856}
2857
2858static MachineBasicBlock *
2860 const LoongArchSubtarget &Subtarget) {
2861 unsigned CondOpc;
2862 switch (MI.getOpcode()) {
2863 default:
2864 llvm_unreachable("Unexpected opcode");
2865 case LoongArch::PseudoVBZ:
2866 CondOpc = LoongArch::VSETEQZ_V;
2867 break;
2868 case LoongArch::PseudoVBZ_B:
2869 CondOpc = LoongArch::VSETANYEQZ_B;
2870 break;
2871 case LoongArch::PseudoVBZ_H:
2872 CondOpc = LoongArch::VSETANYEQZ_H;
2873 break;
2874 case LoongArch::PseudoVBZ_W:
2875 CondOpc = LoongArch::VSETANYEQZ_W;
2876 break;
2877 case LoongArch::PseudoVBZ_D:
2878 CondOpc = LoongArch::VSETANYEQZ_D;
2879 break;
2880 case LoongArch::PseudoVBNZ:
2881 CondOpc = LoongArch::VSETNEZ_V;
2882 break;
2883 case LoongArch::PseudoVBNZ_B:
2884 CondOpc = LoongArch::VSETALLNEZ_B;
2885 break;
2886 case LoongArch::PseudoVBNZ_H:
2887 CondOpc = LoongArch::VSETALLNEZ_H;
2888 break;
2889 case LoongArch::PseudoVBNZ_W:
2890 CondOpc = LoongArch::VSETALLNEZ_W;
2891 break;
2892 case LoongArch::PseudoVBNZ_D:
2893 CondOpc = LoongArch::VSETALLNEZ_D;
2894 break;
2895 case LoongArch::PseudoXVBZ:
2896 CondOpc = LoongArch::XVSETEQZ_V;
2897 break;
2898 case LoongArch::PseudoXVBZ_B:
2899 CondOpc = LoongArch::XVSETANYEQZ_B;
2900 break;
2901 case LoongArch::PseudoXVBZ_H:
2902 CondOpc = LoongArch::XVSETANYEQZ_H;
2903 break;
2904 case LoongArch::PseudoXVBZ_W:
2905 CondOpc = LoongArch::XVSETANYEQZ_W;
2906 break;
2907 case LoongArch::PseudoXVBZ_D:
2908 CondOpc = LoongArch::XVSETANYEQZ_D;
2909 break;
2910 case LoongArch::PseudoXVBNZ:
2911 CondOpc = LoongArch::XVSETNEZ_V;
2912 break;
2913 case LoongArch::PseudoXVBNZ_B:
2914 CondOpc = LoongArch::XVSETALLNEZ_B;
2915 break;
2916 case LoongArch::PseudoXVBNZ_H:
2917 CondOpc = LoongArch::XVSETALLNEZ_H;
2918 break;
2919 case LoongArch::PseudoXVBNZ_W:
2920 CondOpc = LoongArch::XVSETALLNEZ_W;
2921 break;
2922 case LoongArch::PseudoXVBNZ_D:
2923 CondOpc = LoongArch::XVSETALLNEZ_D;
2924 break;
2925 }
2926
2927 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
2928 const BasicBlock *LLVM_BB = BB->getBasicBlock();
2929 DebugLoc DL = MI.getDebugLoc();
2932
2933 MachineFunction *F = BB->getParent();
2934 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
2935 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
2936 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
2937
2938 F->insert(It, FalseBB);
2939 F->insert(It, TrueBB);
2940 F->insert(It, SinkBB);
2941
2942 // Transfer the remainder of MBB and its successor edges to Sink.
2943 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
2945
2946 // Insert the real instruction to BB.
2947 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
2948 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
2949
2950 // Insert branch.
2951 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
2952 BB->addSuccessor(FalseBB);
2953 BB->addSuccessor(TrueBB);
2954
2955 // FalseBB.
2956 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
2957 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
2958 .addReg(LoongArch::R0)
2959 .addImm(0);
2960 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
2961 FalseBB->addSuccessor(SinkBB);
2962
2963 // TrueBB.
2964 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
2965 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
2966 .addReg(LoongArch::R0)
2967 .addImm(1);
2968 TrueBB->addSuccessor(SinkBB);
2969
2970 // SinkBB: merge the results.
2971 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
2972 MI.getOperand(0).getReg())
2973 .addReg(RD1)
2974 .addMBB(FalseBB)
2975 .addReg(RD2)
2976 .addMBB(TrueBB);
2977
2978 // The pseudo instruction is gone now.
2979 MI.eraseFromParent();
2980 return SinkBB;
2981}
2982
2983MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
2984 MachineInstr &MI, MachineBasicBlock *BB) const {
2985 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
2986 DebugLoc DL = MI.getDebugLoc();
2987
2988 switch (MI.getOpcode()) {
2989 default:
2990 llvm_unreachable("Unexpected instr type to insert");
2991 case LoongArch::DIV_W:
2992 case LoongArch::DIV_WU:
2993 case LoongArch::MOD_W:
2994 case LoongArch::MOD_WU:
2995 case LoongArch::DIV_D:
2996 case LoongArch::DIV_DU:
2997 case LoongArch::MOD_D:
2998 case LoongArch::MOD_DU:
2999 return insertDivByZeroTrap(MI, BB);
3000 break;
3001 case LoongArch::WRFCSR: {
3002 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3003 LoongArch::FCSR0 + MI.getOperand(0).getImm())
3004 .addReg(MI.getOperand(1).getReg());
3005 MI.eraseFromParent();
3006 return BB;
3007 }
3008 case LoongArch::RDFCSR: {
3009 MachineInstr *ReadFCSR =
3010 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3011 MI.getOperand(0).getReg())
3012 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3013 ReadFCSR->getOperand(1).setIsUndef();
3014 MI.eraseFromParent();
3015 return BB;
3016 }
3017 case LoongArch::PseudoVBZ:
3018 case LoongArch::PseudoVBZ_B:
3019 case LoongArch::PseudoVBZ_H:
3020 case LoongArch::PseudoVBZ_W:
3021 case LoongArch::PseudoVBZ_D:
3022 case LoongArch::PseudoVBNZ:
3023 case LoongArch::PseudoVBNZ_B:
3024 case LoongArch::PseudoVBNZ_H:
3025 case LoongArch::PseudoVBNZ_W:
3026 case LoongArch::PseudoVBNZ_D:
3027 case LoongArch::PseudoXVBZ:
3028 case LoongArch::PseudoXVBZ_B:
3029 case LoongArch::PseudoXVBZ_H:
3030 case LoongArch::PseudoXVBZ_W:
3031 case LoongArch::PseudoXVBZ_D:
3032 case LoongArch::PseudoXVBNZ:
3033 case LoongArch::PseudoXVBNZ_B:
3034 case LoongArch::PseudoXVBNZ_H:
3035 case LoongArch::PseudoXVBNZ_W:
3036 case LoongArch::PseudoXVBNZ_D:
3037 return emitVecCondBranchPseudo(MI, BB, Subtarget);
3038 }
3039}
3040
3042 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3043 unsigned *Fast) const {
3044 if (!Subtarget.hasUAL())
3045 return false;
3046
3047 // TODO: set reasonable speed number.
3048 if (Fast)
3049 *Fast = 1;
3050 return true;
3051}
3052
3053const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3054 switch ((LoongArchISD::NodeType)Opcode) {
3056 break;
3057
3058#define NODE_NAME_CASE(node) \
3059 case LoongArchISD::node: \
3060 return "LoongArchISD::" #node;
3061
3062 // TODO: Add more target-dependent nodes later.
3063 NODE_NAME_CASE(CALL)
3064 NODE_NAME_CASE(RET)
3065 NODE_NAME_CASE(TAIL)
3066 NODE_NAME_CASE(SLL_W)
3067 NODE_NAME_CASE(SRA_W)
3068 NODE_NAME_CASE(SRL_W)
3069 NODE_NAME_CASE(BSTRINS)
3070 NODE_NAME_CASE(BSTRPICK)
3071 NODE_NAME_CASE(MOVGR2FR_W_LA64)
3072 NODE_NAME_CASE(MOVFR2GR_S_LA64)
3073 NODE_NAME_CASE(FTINT)
3074 NODE_NAME_CASE(REVB_2H)
3075 NODE_NAME_CASE(REVB_2W)
3076 NODE_NAME_CASE(BITREV_4B)
3077 NODE_NAME_CASE(BITREV_W)
3078 NODE_NAME_CASE(ROTR_W)
3079 NODE_NAME_CASE(ROTL_W)
3080 NODE_NAME_CASE(CLZ_W)
3081 NODE_NAME_CASE(CTZ_W)
3082 NODE_NAME_CASE(DBAR)
3083 NODE_NAME_CASE(IBAR)
3084 NODE_NAME_CASE(BREAK)
3085 NODE_NAME_CASE(SYSCALL)
3086 NODE_NAME_CASE(CRC_W_B_W)
3087 NODE_NAME_CASE(CRC_W_H_W)
3088 NODE_NAME_CASE(CRC_W_W_W)
3089 NODE_NAME_CASE(CRC_W_D_W)
3090 NODE_NAME_CASE(CRCC_W_B_W)
3091 NODE_NAME_CASE(CRCC_W_H_W)
3092 NODE_NAME_CASE(CRCC_W_W_W)
3093 NODE_NAME_CASE(CRCC_W_D_W)
3094 NODE_NAME_CASE(CSRRD)
3095 NODE_NAME_CASE(CSRWR)
3096 NODE_NAME_CASE(CSRXCHG)
3097 NODE_NAME_CASE(IOCSRRD_B)
3098 NODE_NAME_CASE(IOCSRRD_H)
3099 NODE_NAME_CASE(IOCSRRD_W)
3100 NODE_NAME_CASE(IOCSRRD_D)
3101 NODE_NAME_CASE(IOCSRWR_B)
3102 NODE_NAME_CASE(IOCSRWR_H)
3103 NODE_NAME_CASE(IOCSRWR_W)
3104 NODE_NAME_CASE(IOCSRWR_D)
3105 NODE_NAME_CASE(CPUCFG)
3106 NODE_NAME_CASE(MOVGR2FCSR)
3107 NODE_NAME_CASE(MOVFCSR2GR)
3108 NODE_NAME_CASE(CACOP_D)
3109 NODE_NAME_CASE(CACOP_W)
3110 NODE_NAME_CASE(VPICK_SEXT_ELT)
3111 NODE_NAME_CASE(VPICK_ZEXT_ELT)
3112 NODE_NAME_CASE(VREPLVE)
3113 NODE_NAME_CASE(VALL_ZERO)
3114 NODE_NAME_CASE(VANY_ZERO)
3115 NODE_NAME_CASE(VALL_NONZERO)
3116 NODE_NAME_CASE(VANY_NONZERO)
3117 }
3118#undef NODE_NAME_CASE
3119 return nullptr;
3120}
3121
3122//===----------------------------------------------------------------------===//
3123// Calling Convention Implementation
3124//===----------------------------------------------------------------------===//
3125
3126// Eight general-purpose registers a0-a7 used for passing integer arguments,
3127// with a0-a1 reused to return values. Generally, the GPRs are used to pass
3128// fixed-point arguments, and floating-point arguments when no FPR is available
3129// or with soft float ABI.
3130const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
3131 LoongArch::R7, LoongArch::R8, LoongArch::R9,
3132 LoongArch::R10, LoongArch::R11};
3133// Eight floating-point registers fa0-fa7 used for passing floating-point
3134// arguments, and fa0-fa1 are also used to return values.
3135const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3136 LoongArch::F3, LoongArch::F4, LoongArch::F5,
3137 LoongArch::F6, LoongArch::F7};
3138// FPR32 and FPR64 alias each other.
3140 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3141 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3142
3143const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3144 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3145 LoongArch::VR6, LoongArch::VR7};
3146
3147const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3148 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3149 LoongArch::XR6, LoongArch::XR7};
3150
3151// Pass a 2*GRLen argument that has been split into two GRLen values through
3152// registers or the stack as necessary.
3153static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3154 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3155 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3156 ISD::ArgFlagsTy ArgFlags2) {
3157 unsigned GRLenInBytes = GRLen / 8;
3158 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3159 // At least one half can be passed via register.
3160 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3161 VA1.getLocVT(), CCValAssign::Full));
3162 } else {
3163 // Both halves must be passed on the stack, with proper alignment.
3164 Align StackAlign =
3165 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3166 State.addLoc(
3168 State.AllocateStack(GRLenInBytes, StackAlign),
3169 VA1.getLocVT(), CCValAssign::Full));
3171 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3172 LocVT2, CCValAssign::Full));
3173 return false;
3174 }
3175 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3176 // The second half can also be passed via register.
3177 State.addLoc(
3178 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3179 } else {
3180 // The second half is passed via the stack, without additional alignment.
3182 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3183 LocVT2, CCValAssign::Full));
3184 }
3185 return false;
3186}
3187
3188// Implements the LoongArch calling convention. Returns true upon failure.
3190 unsigned ValNo, MVT ValVT,
3191 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3192 CCState &State, bool IsFixed, bool IsRet,
3193 Type *OrigTy) {
3194 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3195 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3196 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3197 MVT LocVT = ValVT;
3198
3199 // Any return value split into more than two values can't be returned
3200 // directly.
3201 if (IsRet && ValNo > 1)
3202 return true;
3203
3204 // If passing a variadic argument, or if no FPR is available.
3205 bool UseGPRForFloat = true;
3206
3207 switch (ABI) {
3208 default:
3209 llvm_unreachable("Unexpected ABI");
3213 report_fatal_error("Unimplemented ABI");
3214 break;
3217 UseGPRForFloat = !IsFixed;
3218 break;
3220 break;
3221 }
3222
3223 // FPR32 and FPR64 alias each other.
3224 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3225 UseGPRForFloat = true;
3226
3227 if (UseGPRForFloat && ValVT == MVT::f32) {
3228 LocVT = GRLenVT;
3229 LocInfo = CCValAssign::BCvt;
3230 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3231 LocVT = MVT::i64;
3232 LocInfo = CCValAssign::BCvt;
3233 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3234 // TODO: Handle passing f64 on LA32 with D feature.
3235 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3236 }
3237
3238 // If this is a variadic argument, the LoongArch calling convention requires
3239 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3240 // byte alignment. An aligned register should be used regardless of whether
3241 // the original argument was split during legalisation or not. The argument
3242 // will not be passed by registers if the original type is larger than
3243 // 2*GRLen, so the register alignment rule does not apply.
3244 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3245 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3246 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3247 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3248 // Skip 'odd' register if necessary.
3249 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3250 State.AllocateReg(ArgGPRs);
3251 }
3252
3253 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3254 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3255 State.getPendingArgFlags();
3256
3257 assert(PendingLocs.size() == PendingArgFlags.size() &&
3258 "PendingLocs and PendingArgFlags out of sync");
3259
3260 // Split arguments might be passed indirectly, so keep track of the pending
3261 // values.
3262 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3263 LocVT = GRLenVT;
3264 LocInfo = CCValAssign::Indirect;
3265 PendingLocs.push_back(
3266 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3267 PendingArgFlags.push_back(ArgFlags);
3268 if (!ArgFlags.isSplitEnd()) {
3269 return false;
3270 }
3271 }
3272
3273 // If the split argument only had two elements, it should be passed directly
3274 // in registers or on the stack.
3275 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3276 PendingLocs.size() <= 2) {
3277 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3278 // Apply the normal calling convention rules to the first half of the
3279 // split argument.
3280 CCValAssign VA = PendingLocs[0];
3281 ISD::ArgFlagsTy AF = PendingArgFlags[0];
3282 PendingLocs.clear();
3283 PendingArgFlags.clear();
3284 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3285 ArgFlags);
3286 }
3287
3288 // Allocate to a register if possible, or else a stack slot.
3289 Register Reg;
3290 unsigned StoreSizeBytes = GRLen / 8;
3291 Align StackAlign = Align(GRLen / 8);
3292
3293 if (ValVT == MVT::f32 && !UseGPRForFloat)
3294 Reg = State.AllocateReg(ArgFPR32s);
3295 else if (ValVT == MVT::f64 && !UseGPRForFloat)
3296 Reg = State.AllocateReg(ArgFPR64s);
3297 else if (ValVT.is128BitVector())
3298 Reg = State.AllocateReg(ArgVRs);
3299 else if (ValVT.is256BitVector())
3300 Reg = State.AllocateReg(ArgXRs);
3301 else
3302 Reg = State.AllocateReg(ArgGPRs);
3303
3304 unsigned StackOffset =
3305 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3306
3307 // If we reach this point and PendingLocs is non-empty, we must be at the
3308 // end of a split argument that must be passed indirectly.
3309 if (!PendingLocs.empty()) {
3310 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3311 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3312 for (auto &It : PendingLocs) {
3313 if (Reg)
3314 It.convertToReg(Reg);
3315 else
3316 It.convertToMem(StackOffset);
3317 State.addLoc(It);
3318 }
3319 PendingLocs.clear();
3320 PendingArgFlags.clear();
3321 return false;
3322 }
3323 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3324 "Expected an GRLenVT at this stage");
3325
3326 if (Reg) {
3327 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3328 return false;
3329 }
3330
3331 // When a floating-point value is passed on the stack, no bit-cast is needed.
3332 if (ValVT.isFloatingPoint()) {
3333 LocVT = ValVT;
3334 LocInfo = CCValAssign::Full;
3335 }
3336
3337 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3338 return false;
3339}
3340
3341void LoongArchTargetLowering::analyzeInputArgs(
3342 MachineFunction &MF, CCState &CCInfo,
3343 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3344 LoongArchCCAssignFn Fn) const {
3346 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3347 MVT ArgVT = Ins[i].VT;
3348 Type *ArgTy = nullptr;
3349 if (IsRet)
3350 ArgTy = FType->getReturnType();
3351 else if (Ins[i].isOrigArg())
3352 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3355 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3356 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3357 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3358 << '\n');
3359 llvm_unreachable("");
3360 }
3361 }
3362}
3363
3364void LoongArchTargetLowering::analyzeOutputArgs(
3365 MachineFunction &MF, CCState &CCInfo,
3366 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3367 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3368 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3369 MVT ArgVT = Outs[i].VT;
3370 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3373 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3374 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3375 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3376 << "\n");
3377 llvm_unreachable("");
3378 }
3379 }
3380}
3381
3382// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3383// values.
3385 const CCValAssign &VA, const SDLoc &DL) {
3386 switch (VA.getLocInfo()) {
3387 default:
3388 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3389 case CCValAssign::Full:
3391 break;
3392 case CCValAssign::BCvt:
3393 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3394 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3395 else
3396 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3397 break;
3398 }
3399 return Val;
3400}
3401
3403 const CCValAssign &VA, const SDLoc &DL,
3404 const LoongArchTargetLowering &TLI) {
3407 EVT LocVT = VA.getLocVT();
3408 SDValue Val;
3409 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3410 Register VReg = RegInfo.createVirtualRegister(RC);
3411 RegInfo.addLiveIn(VA.getLocReg(), VReg);
3412 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3413
3414 return convertLocVTToValVT(DAG, Val, VA, DL);
3415}
3416
3417// The caller is responsible for loading the full value if the argument is
3418// passed with CCValAssign::Indirect.
3420 const CCValAssign &VA, const SDLoc &DL) {
3422 MachineFrameInfo &MFI = MF.getFrameInfo();
3423 EVT ValVT = VA.getValVT();
3424 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
3425 /*IsImmutable=*/true);
3426 SDValue FIN = DAG.getFrameIndex(
3428
3429 ISD::LoadExtType ExtType;
3430 switch (VA.getLocInfo()) {
3431 default:
3432 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3433 case CCValAssign::Full:
3435 case CCValAssign::BCvt:
3436 ExtType = ISD::NON_EXTLOAD;
3437 break;
3438 }
3439 return DAG.getExtLoad(
3440 ExtType, DL, VA.getLocVT(), Chain, FIN,
3442}
3443
3445 const CCValAssign &VA, const SDLoc &DL) {
3446 EVT LocVT = VA.getLocVT();
3447
3448 switch (VA.getLocInfo()) {
3449 default:
3450 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3451 case CCValAssign::Full:
3452 break;
3453 case CCValAssign::BCvt:
3454 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3455 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3456 else
3457 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3458 break;
3459 }
3460 return Val;
3461}
3462
3463static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3464 CCValAssign::LocInfo LocInfo,
3465 ISD::ArgFlagsTy ArgFlags, CCState &State) {
3466 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3467 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3468 // s0 s1 s2 s3 s4 s5 s6 s7 s8
3469 static const MCPhysReg GPRList[] = {
3470 LoongArch::R23, LoongArch::R24, LoongArch::R25,
3471 LoongArch::R26, LoongArch::R27, LoongArch::R28,
3472 LoongArch::R29, LoongArch::R30, LoongArch::R31};
3473 if (unsigned Reg = State.AllocateReg(GPRList)) {
3474 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3475 return false;
3476 }
3477 }
3478
3479 if (LocVT == MVT::f32) {
3480 // Pass in STG registers: F1, F2, F3, F4
3481 // fs0,fs1,fs2,fs3
3482 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3483 LoongArch::F26, LoongArch::F27};
3484 if (unsigned Reg = State.AllocateReg(FPR32List)) {
3485 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3486 return false;
3487 }
3488 }
3489
3490 if (LocVT == MVT::f64) {
3491 // Pass in STG registers: D1, D2, D3, D4
3492 // fs4,fs5,fs6,fs7
3493 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3494 LoongArch::F30_64, LoongArch::F31_64};
3495 if (unsigned Reg = State.AllocateReg(FPR64List)) {
3496 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3497 return false;
3498 }
3499 }
3500
3501 report_fatal_error("No registers left in GHC calling convention");
3502 return true;
3503}
3504
3505// Transform physical registers into virtual registers.
3507 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3508 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3509 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3510
3512
3513 switch (CallConv) {
3514 default:
3515 llvm_unreachable("Unsupported calling convention");
3516 case CallingConv::C:
3517 case CallingConv::Fast:
3518 break;
3519 case CallingConv::GHC:
3520 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3521 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3523 "GHC calling convention requires the F and D extensions");
3524 }
3525
3526 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3527 MVT GRLenVT = Subtarget.getGRLenVT();
3528 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3529 // Used with varargs to acumulate store chains.
3530 std::vector<SDValue> OutChains;
3531
3532 // Assign locations to all of the incoming arguments.
3534 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3535
3536 if (CallConv == CallingConv::GHC)
3538 else
3539 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
3540
3541 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3542 CCValAssign &VA = ArgLocs[i];
3543 SDValue ArgValue;
3544 if (VA.isRegLoc())
3545 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3546 else
3547 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3548 if (VA.getLocInfo() == CCValAssign::Indirect) {
3549 // If the original argument was split and passed by reference, we need to
3550 // load all parts of it here (using the same address).
3551 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3553 unsigned ArgIndex = Ins[i].OrigArgIndex;
3554 unsigned ArgPartOffset = Ins[i].PartOffset;
3555 assert(ArgPartOffset == 0);
3556 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3557 CCValAssign &PartVA = ArgLocs[i + 1];
3558 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3559 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3560 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
3561 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3563 ++i;
3564 }
3565 continue;
3566 }
3567 InVals.push_back(ArgValue);
3568 }
3569
3570 if (IsVarArg) {
3572 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3573 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3574 MachineFrameInfo &MFI = MF.getFrameInfo();
3575 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3576 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3577
3578 // Offset of the first variable argument from stack pointer, and size of
3579 // the vararg save area. For now, the varargs save area is either zero or
3580 // large enough to hold a0-a7.
3581 int VaArgOffset, VarArgsSaveSize;
3582
3583 // If all registers are allocated, then all varargs must be passed on the
3584 // stack and we don't need to save any argregs.
3585 if (ArgRegs.size() == Idx) {
3586 VaArgOffset = CCInfo.getStackSize();
3587 VarArgsSaveSize = 0;
3588 } else {
3589 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3590 VaArgOffset = -VarArgsSaveSize;
3591 }
3592
3593 // Record the frame index of the first variable argument
3594 // which is a value necessary to VASTART.
3595 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3596 LoongArchFI->setVarArgsFrameIndex(FI);
3597
3598 // If saving an odd number of registers then create an extra stack slot to
3599 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3600 // offsets to even-numbered registered remain 2*GRLen-aligned.
3601 if (Idx % 2) {
3602 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
3603 true);
3604 VarArgsSaveSize += GRLenInBytes;
3605 }
3606
3607 // Copy the integer registers that may have been used for passing varargs
3608 // to the vararg save area.
3609 for (unsigned I = Idx; I < ArgRegs.size();
3610 ++I, VaArgOffset += GRLenInBytes) {
3611 const Register Reg = RegInfo.createVirtualRegister(RC);
3612 RegInfo.addLiveIn(ArgRegs[I], Reg);
3613 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
3614 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3615 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3616 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3618 cast<StoreSDNode>(Store.getNode())
3619 ->getMemOperand()
3620 ->setValue((Value *)nullptr);
3621 OutChains.push_back(Store);
3622 }
3623 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
3624 }
3625
3626 // All stores are grouped in one node to allow the matching between
3627 // the size of Ins and InVals. This only happens for vararg functions.
3628 if (!OutChains.empty()) {
3629 OutChains.push_back(Chain);
3630 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3631 }
3632
3633 return Chain;
3634}
3635
3637 return CI->isTailCall();
3638}
3639
3640// Check if the return value is used as only a return value, as otherwise
3641// we can't perform a tail-call.
3643 SDValue &Chain) const {
3644 if (N->getNumValues() != 1)
3645 return false;
3646 if (!N->hasNUsesOfValue(1, 0))
3647 return false;
3648
3649 SDNode *Copy = *N->use_begin();
3650 if (Copy->getOpcode() != ISD::CopyToReg)
3651 return false;
3652
3653 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
3654 // isn't safe to perform a tail call.
3655 if (Copy->getGluedNode())
3656 return false;
3657
3658 // The copy must be used by a LoongArchISD::RET, and nothing else.
3659 bool HasRet = false;
3660 for (SDNode *Node : Copy->uses()) {
3661 if (Node->getOpcode() != LoongArchISD::RET)
3662 return false;
3663 HasRet = true;
3664 }
3665
3666 if (!HasRet)
3667 return false;
3668
3669 Chain = Copy->getOperand(0);
3670 return true;
3671}
3672
3673// Check whether the call is eligible for tail call optimization.
3674bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
3675 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
3676 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
3677
3678 auto CalleeCC = CLI.CallConv;
3679 auto &Outs = CLI.Outs;
3680 auto &Caller = MF.getFunction();
3681 auto CallerCC = Caller.getCallingConv();
3682
3683 // Do not tail call opt if the stack is used to pass parameters.
3684 if (CCInfo.getStackSize() != 0)
3685 return false;
3686
3687 // Do not tail call opt if any parameters need to be passed indirectly.
3688 for (auto &VA : ArgLocs)
3689 if (VA.getLocInfo() == CCValAssign::Indirect)
3690 return false;
3691
3692 // Do not tail call opt if either caller or callee uses struct return
3693 // semantics.
3694 auto IsCallerStructRet = Caller.hasStructRetAttr();
3695 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3696 if (IsCallerStructRet || IsCalleeStructRet)
3697 return false;
3698
3699 // Do not tail call opt if either the callee or caller has a byval argument.
3700 for (auto &Arg : Outs)
3701 if (Arg.Flags.isByVal())
3702 return false;
3703
3704 // The callee has to preserve all registers the caller needs to preserve.
3705 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
3706 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3707 if (CalleeCC != CallerCC) {
3708 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3709 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3710 return false;
3711 }
3712 return true;
3713}
3714
3716 return DAG.getDataLayout().getPrefTypeAlign(
3717 VT.getTypeForEVT(*DAG.getContext()));
3718}
3719
3720// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
3721// and output parameter nodes.
3722SDValue
3724 SmallVectorImpl<SDValue> &InVals) const {
3725 SelectionDAG &DAG = CLI.DAG;
3726 SDLoc &DL = CLI.DL;
3728 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3730 SDValue Chain = CLI.Chain;
3731 SDValue Callee = CLI.Callee;
3732 CallingConv::ID CallConv = CLI.CallConv;
3733 bool IsVarArg = CLI.IsVarArg;
3734 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3735 MVT GRLenVT = Subtarget.getGRLenVT();
3736 bool &IsTailCall = CLI.IsTailCall;
3737
3739
3740 // Analyze the operands of the call, assigning locations to each operand.
3742 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3743
3744 if (CallConv == CallingConv::GHC)
3745 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
3746 else
3747 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
3748
3749 // Check if it's really possible to do a tail call.
3750 if (IsTailCall)
3751 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
3752
3753 if (IsTailCall)
3754 ++NumTailCalls;
3755 else if (CLI.CB && CLI.CB->isMustTailCall())
3756 report_fatal_error("failed to perform tail call elimination on a call "
3757 "site marked musttail");
3758
3759 // Get a count of how many bytes are to be pushed on the stack.
3760 unsigned NumBytes = ArgCCInfo.getStackSize();
3761
3762 // Create local copies for byval args.
3763 SmallVector<SDValue> ByValArgs;
3764 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3765 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3766 if (!Flags.isByVal())
3767 continue;
3768
3769 SDValue Arg = OutVals[i];
3770 unsigned Size = Flags.getByValSize();
3771 Align Alignment = Flags.getNonZeroByValAlign();
3772
3773 int FI =
3774 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
3775 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3776 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
3777
3778 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
3779 /*IsVolatile=*/false,
3780 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
3782 ByValArgs.push_back(FIPtr);
3783 }
3784
3785 if (!IsTailCall)
3786 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
3787
3788 // Copy argument values to their designated locations.
3790 SmallVector<SDValue> MemOpChains;
3791 SDValue StackPtr;
3792 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
3793 CCValAssign &VA = ArgLocs[i];
3794 SDValue ArgValue = OutVals[i];
3795 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3796
3797 // Promote the value if needed.
3798 // For now, only handle fully promoted and indirect arguments.
3799 if (VA.getLocInfo() == CCValAssign::Indirect) {
3800 // Store the argument in a stack slot and pass its address.
3801 Align StackAlign =
3802 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
3803 getPrefTypeAlign(ArgValue.getValueType(), DAG));
3804 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
3805 // If the original argument was split and passed by reference, we need to
3806 // store the required parts of it here (and pass just one address).
3807 unsigned ArgIndex = Outs[i].OrigArgIndex;
3808 unsigned ArgPartOffset = Outs[i].PartOffset;
3809 assert(ArgPartOffset == 0);
3810 // Calculate the total size to store. We don't have access to what we're
3811 // actually storing other than performing the loop and collecting the
3812 // info.
3814 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
3815 SDValue PartValue = OutVals[i + 1];
3816 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
3817 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3818 EVT PartVT = PartValue.getValueType();
3819
3820 StoredSize += PartVT.getStoreSize();
3821 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
3822 Parts.push_back(std::make_pair(PartValue, Offset));
3823 ++i;
3824 }
3825 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
3826 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3827 MemOpChains.push_back(
3828 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
3830 for (const auto &Part : Parts) {
3831 SDValue PartValue = Part.first;
3832 SDValue PartOffset = Part.second;
3834 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
3835 MemOpChains.push_back(
3836 DAG.getStore(Chain, DL, PartValue, Address,
3838 }
3839 ArgValue = SpillSlot;
3840 } else {
3841 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
3842 }
3843
3844 // Use local copy if it is a byval arg.
3845 if (Flags.isByVal())
3846 ArgValue = ByValArgs[j++];
3847
3848 if (VA.isRegLoc()) {
3849 // Queue up the argument copies and emit them at the end.
3850 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
3851 } else {
3852 assert(VA.isMemLoc() && "Argument not register or memory");
3853 assert(!IsTailCall && "Tail call not allowed if stack is used "
3854 "for passing parameters");
3855
3856 // Work out the address of the stack slot.
3857 if (!StackPtr.getNode())
3858 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
3860 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
3862
3863 // Emit the store.
3864 MemOpChains.push_back(
3865 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
3866 }
3867 }
3868
3869 // Join the stores, which are independent of one another.
3870 if (!MemOpChains.empty())
3871 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3872
3873 SDValue Glue;
3874
3875 // Build a sequence of copy-to-reg nodes, chained and glued together.
3876 for (auto &Reg : RegsToPass) {
3877 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
3878 Glue = Chain.getValue(1);
3879 }
3880
3881 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
3882 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
3883 // split it and then direct call can be matched by PseudoCALL.
3884 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
3885 const GlobalValue *GV = S->getGlobal();
3886 unsigned OpFlags =
3890 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
3891 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3892 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
3893 *MF.getFunction().getParent(), nullptr)
3896 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
3897 }
3898
3899 // The first call operand is the chain and the second is the target address.
3901 Ops.push_back(Chain);
3902 Ops.push_back(Callee);
3903
3904 // Add argument registers to the end of the list so that they are
3905 // known live into the call.
3906 for (auto &Reg : RegsToPass)
3907 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
3908
3909 if (!IsTailCall) {
3910 // Add a register mask operand representing the call-preserved registers.
3911 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3912 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
3913 assert(Mask && "Missing call preserved mask for calling convention");
3914 Ops.push_back(DAG.getRegisterMask(Mask));
3915 }
3916
3917 // Glue the call to the argument copies, if any.
3918 if (Glue.getNode())
3919 Ops.push_back(Glue);
3920
3921 // Emit the call.
3922 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3923
3924 if (IsTailCall) {
3926 SDValue Ret = DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops);
3927 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
3928 return Ret;
3929 }
3930
3931 Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
3932 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
3933 Glue = Chain.getValue(1);
3934
3935 // Mark the end of the call, which is glued to the call itself.
3936 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
3937 Glue = Chain.getValue(1);
3938
3939 // Assign locations to each value returned by this call.
3941 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
3942 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
3943
3944 // Copy all of the result registers out of their specified physreg.
3945 for (auto &VA : RVLocs) {
3946 // Copy the value out.
3947 SDValue RetValue =
3948 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
3949 // Glue the RetValue to the end of the call sequence.
3950 Chain = RetValue.getValue(1);
3951 Glue = RetValue.getValue(2);
3952
3953 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
3954
3955 InVals.push_back(RetValue);
3956 }
3957
3958 return Chain;
3959}
3960
3962 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
3963 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3965 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
3966
3967 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3968 LoongArchABI::ABI ABI =
3969 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
3970 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
3971 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
3972 nullptr))
3973 return false;
3974 }
3975 return true;
3976}
3977
3979 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3981 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
3982 SelectionDAG &DAG) const {
3983 // Stores the assignment of the return value to a location.
3985
3986 // Info about the registers and stack slot.
3987 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
3988 *DAG.getContext());
3989
3990 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
3991 nullptr, CC_LoongArch);
3992 if (CallConv == CallingConv::GHC && !RVLocs.empty())
3993 report_fatal_error("GHC functions return void only");
3994 SDValue Glue;
3995 SmallVector<SDValue, 4> RetOps(1, Chain);
3996
3997 // Copy the result values into the output registers.
3998 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
3999 CCValAssign &VA = RVLocs[i];
4000 assert(VA.isRegLoc() && "Can only return in registers!");
4001
4002 // Handle a 'normal' return.
4003 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
4004 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4005
4006 // Guarantee that all emitted copies are stuck together.
4007 Glue = Chain.getValue(1);
4008 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4009 }
4010
4011 RetOps[0] = Chain; // Update chain.
4012
4013 // Add the glue node if we have it.
4014 if (Glue.getNode())
4015 RetOps.push_back(Glue);
4016
4017 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
4018}
4019
4020bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4021 bool ForCodeSize) const {
4022 // TODO: Maybe need more checks here after vector extension is supported.
4023 if (VT == MVT::f32 && !Subtarget.hasBasicF())
4024 return false;
4025 if (VT == MVT::f64 && !Subtarget.hasBasicD())
4026 return false;
4027 return (Imm.isZero() || Imm.isExactlyValue(+1.0));
4028}
4029
4031 return true;
4032}
4033
4035 return true;
4036}
4037
4038bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
4039 const Instruction *I) const {
4040 if (!Subtarget.is64Bit())
4041 return isa<LoadInst>(I) || isa<StoreInst>(I);
4042
4043 if (isa<LoadInst>(I))
4044 return true;
4045
4046 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
4047 // require fences beacuse we can use amswap_db.[w/d].
4048 if (isa<StoreInst>(I)) {
4049 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
4050 return (Size == 8 || Size == 16);
4051 }
4052
4053 return false;
4054}
4055
4057 LLVMContext &Context,
4058 EVT VT) const {
4059 if (!VT.isVector())
4060 return getPointerTy(DL);
4062}
4063
4065 // TODO: Support vectors.
4066 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
4067}
4068
4070 const CallInst &I,
4071 MachineFunction &MF,
4072 unsigned Intrinsic) const {
4073 switch (Intrinsic) {
4074 default:
4075 return false;
4076 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
4077 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
4078 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
4079 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
4081 Info.memVT = MVT::i32;
4082 Info.ptrVal = I.getArgOperand(0);
4083 Info.offset = 0;
4084 Info.align = Align(4);
4087 return true;
4088 // TODO: Add more Intrinsics later.
4089 }
4090}
4091
4094 // TODO: Add more AtomicRMWInst that needs to be extended.
4095
4096 // Since floating-point operation requires a non-trivial set of data
4097 // operations, use CmpXChg to expand.
4098 if (AI->isFloatingPointOperation() ||
4102
4103 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4104 if (Size == 8 || Size == 16)
4107}
4108
4109static Intrinsic::ID
4111 AtomicRMWInst::BinOp BinOp) {
4112 if (GRLen == 64) {
4113 switch (BinOp) {
4114 default:
4115 llvm_unreachable("Unexpected AtomicRMW BinOp");
4117 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
4118 case AtomicRMWInst::Add:
4119 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
4120 case AtomicRMWInst::Sub:
4121 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
4123 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
4125 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
4127 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
4128 case AtomicRMWInst::Max:
4129 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
4130 case AtomicRMWInst::Min:
4131 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
4132 // TODO: support other AtomicRMWInst.
4133 }
4134 }
4135
4136 if (GRLen == 32) {
4137 switch (BinOp) {
4138 default:
4139 llvm_unreachable("Unexpected AtomicRMW BinOp");
4141 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
4142 case AtomicRMWInst::Add:
4143 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
4144 case AtomicRMWInst::Sub:
4145 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
4147 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
4148 // TODO: support other AtomicRMWInst.
4149 }
4150 }
4151
4152 llvm_unreachable("Unexpected GRLen\n");
4153}
4154
4157 AtomicCmpXchgInst *CI) const {
4159 if (Size == 8 || Size == 16)
4162}
4163
4165 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4166 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4167 Value *Ordering =
4168 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord));
4169
4170 // TODO: Support cmpxchg on LA32.
4171 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
4172 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4173 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4174 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4175 Type *Tys[] = {AlignedAddr->getType()};
4176 Function *MaskedCmpXchg =
4177 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4178 Value *Result = Builder.CreateCall(
4179 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
4180 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4181 return Result;
4182}
4183
4185 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4186 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4187 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
4188 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
4189 // mask, as this produces better code than the LL/SC loop emitted by
4190 // int_loongarch_masked_atomicrmw_xchg.
4191 if (AI->getOperation() == AtomicRMWInst::Xchg &&
4192 isa<ConstantInt>(AI->getValOperand())) {
4193 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
4194 if (CVal->isZero())
4195 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
4196 Builder.CreateNot(Mask, "Inv_Mask"),
4197 AI->getAlign(), Ord);
4198 if (CVal->isMinusOne())
4199 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
4200 AI->getAlign(), Ord);
4201 }
4202
4203 unsigned GRLen = Subtarget.getGRLen();
4204 Value *Ordering =
4205 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
4206 Type *Tys[] = {AlignedAddr->getType()};
4207 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
4208 AI->getModule(),
4210
4211 if (GRLen == 64) {
4212 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4213 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4214 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4215 }
4216
4217 Value *Result;
4218
4219 // Must pass the shift amount needed to sign extend the loaded value prior
4220 // to performing a signed comparison for min/max. ShiftAmt is the number of
4221 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
4222 // is the number of bits to left+right shift the value in order to
4223 // sign-extend.
4224 if (AI->getOperation() == AtomicRMWInst::Min ||
4226 const DataLayout &DL = AI->getModule()->getDataLayout();
4227 unsigned ValWidth =
4228 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4229 Value *SextShamt =
4230 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
4231 Result = Builder.CreateCall(LlwOpScwLoop,
4232 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4233 } else {
4234 Result =
4235 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4236 }
4237
4238 if (GRLen == 64)
4239 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4240 return Result;
4241}
4242
4244 const MachineFunction &MF, EVT VT) const {
4245 VT = VT.getScalarType();
4246
4247 if (!VT.isSimple())
4248 return false;
4249
4250 switch (VT.getSimpleVT().SimpleTy) {
4251 case MVT::f32:
4252 case MVT::f64:
4253 return true;
4254 default:
4255 break;
4256 }
4257
4258 return false;
4259}
4260
4262 const Constant *PersonalityFn) const {
4263 return LoongArch::R4;
4264}
4265
4267 const Constant *PersonalityFn) const {
4268 return LoongArch::R5;
4269}
4270
4271//===----------------------------------------------------------------------===//
4272// LoongArch Inline Assembly Support
4273//===----------------------------------------------------------------------===//
4274
4276LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
4277 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
4278 //
4279 // 'f': A floating-point register (if available).
4280 // 'k': A memory operand whose address is formed by a base register and
4281 // (optionally scaled) index register.
4282 // 'l': A signed 16-bit constant.
4283 // 'm': A memory operand whose address is formed by a base register and
4284 // offset that is suitable for use in instructions with the same
4285 // addressing mode as st.w and ld.w.
4286 // 'I': A signed 12-bit constant (for arithmetic instructions).
4287 // 'J': Integer zero.
4288 // 'K': An unsigned 12-bit constant (for logic instructions).
4289 // "ZB": An address that is held in a general-purpose register. The offset is
4290 // zero.
4291 // "ZC": A memory operand whose address is formed by a base register and
4292 // offset that is suitable for use in instructions with the same
4293 // addressing mode as ll.w and sc.w.
4294 if (Constraint.size() == 1) {
4295 switch (Constraint[0]) {
4296 default:
4297 break;
4298 case 'f':
4299 return C_RegisterClass;
4300 case 'l':
4301 case 'I':
4302 case 'J':
4303 case 'K':
4304 return C_Immediate;
4305 case 'k':
4306 return C_Memory;
4307 }
4308 }
4309
4310 if (Constraint == "ZC" || Constraint == "ZB")
4311 return C_Memory;
4312
4313 // 'm' is handled here.
4314 return TargetLowering::getConstraintType(Constraint);
4315}
4316
4317InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
4318 StringRef ConstraintCode) const {
4319 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
4324}
4325
4326std::pair<unsigned, const TargetRegisterClass *>
4327LoongArchTargetLowering::getRegForInlineAsmConstraint(
4328 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4329 // First, see if this is a constraint that directly corresponds to a LoongArch
4330 // register class.
4331 if (Constraint.size() == 1) {
4332 switch (Constraint[0]) {
4333 case 'r':
4334 // TODO: Support fixed vectors up to GRLen?
4335 if (VT.isVector())
4336 break;
4337 return std::make_pair(0U, &LoongArch::GPRRegClass);
4338 case 'f':
4339 if (Subtarget.hasBasicF() && VT == MVT::f32)
4340 return std::make_pair(0U, &LoongArch::FPR