LLVM 20.0.0git
MipsSEISelLowering.cpp
Go to the documentation of this file.
1//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Subclass of MipsTargetLowering specialized for mips32/64.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MipsSEISelLowering.h"
14#include "MipsMachineFunction.h"
15#include "MipsRegisterInfo.h"
16#include "MipsSubtarget.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/STLExtras.h"
34#include "llvm/IR/DebugLoc.h"
35#include "llvm/IR/Intrinsics.h"
36#include "llvm/IR/IntrinsicsMips.h"
39#include "llvm/Support/Debug.h"
44#include <algorithm>
45#include <cassert>
46#include <cstdint>
47#include <iterator>
48#include <utility>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "mips-isel"
53
54static cl::opt<bool>
55UseMipsTailCalls("mips-tail-calls", cl::Hidden,
56 cl::desc("MIPS: permit tail calls."), cl::init(false));
57
58static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
59 cl::desc("Expand double precision loads and "
60 "stores to their single precision "
61 "counterparts"));
62
64 const MipsSubtarget &STI)
65 : MipsTargetLowering(TM, STI) {
66 // Set up the register classes
67 addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
68
69 if (Subtarget.isGP64bit())
70 addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
71
72 if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
73 // Expand all truncating stores and extending loads.
76 setTruncStoreAction(VT0, VT1, Expand);
80 }
81 }
82 }
83
84 if (Subtarget.hasDSP()) {
85 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
86
87 for (const auto &VecTy : VecTys) {
88 addRegisterClass(VecTy, &Mips::DSPRRegClass);
89
90 // Expand all builtin opcodes.
91 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
92 setOperationAction(Opc, VecTy, Expand);
93
99 }
100
103
104 if (Subtarget.hasMips32r2()) {
107 }
108 }
109
110 if (Subtarget.hasDSPR2())
111 setOperationAction(ISD::MUL, MVT::v2i16, Legal);
112
113 if (Subtarget.hasMSA()) {
114 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
115 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
116 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
117 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
118 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
119 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
120 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
121
122 // f16 is a storage-only type, always promote it to f32.
123 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass);
159
161 }
162
163 if (!Subtarget.useSoftFloat()) {
164 addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
165
166 // When dealing with single precision only, use libcalls
167 if (!Subtarget.isSingleFloat()) {
168 if (Subtarget.isFP64bit())
169 addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
170 else
171 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
172 }
173 }
174
179
180 if (Subtarget.hasCnMips())
182 else if (Subtarget.isGP64bit())
184
185 if (Subtarget.isGP64bit()) {
192 }
193
196
200 if (Subtarget.hasMips32r6()) {
203 } else {
206 }
207
209
213
215 !Subtarget.hasMips64()) {
217 }
218
219 if (NoDPLoadStore) {
222 }
223
224 if (Subtarget.hasMips32r6()) {
225 // MIPS32r6 replaces the accumulator-based multiplies with a three register
226 // instruction
232
233 // MIPS32r6 replaces the accumulator-based division/remainder with separate
234 // three register division and remainder instructions.
241
242 // MIPS32r6 replaces conditional moves with an equivalent that removes the
243 // need for three GPR read ports.
247
251
252 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
256
258
259 // Floating point > and >= are supported via < and <=
264
269 }
270
271 if (Subtarget.hasMips64r6()) {
272 // MIPS64r6 replaces the accumulator-based multiplies with a three register
273 // instruction
279
280 // MIPS32r6 replaces the accumulator-based division/remainder with separate
281 // three register division and remainder instructions.
288
289 // MIPS64r6 replaces conditional moves with an equivalent that removes the
290 // need for three GPR read ports.
294 }
295
297}
298
299const MipsTargetLowering *
301 const MipsSubtarget &STI) {
302 return new MipsSETargetLowering(TM, STI);
303}
304
307 if (VT == MVT::Untyped)
308 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
309
311}
312
313// Enable MSA support for the given integer type and Register class.
316 addRegisterClass(Ty, RC);
317
318 // Expand all builtin opcodes.
319 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
320 setOperationAction(Opc, Ty, Expand);
321
329
351
352 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
357 }
358
365}
366
367// Enable MSA support for the given floating-point type and Register class.
370 addRegisterClass(Ty, RC);
371
372 // Expand all builtin opcodes.
373 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
374 setOperationAction(Opc, Ty, Expand);
375
382
383 if (Ty != MVT::v8f16) {
395
403 }
404}
405
406SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
409
410 EVT ResTy = Op->getValueType(0);
411 SDLoc DL(Op);
412
413 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
414 // floating point register are undefined. Not really an issue as sel.d, which
415 // is produced from an FSELECT node, only looks at bit 0.
416 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0));
417 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1),
418 Op->getOperand(2));
419}
420
422 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
424
426 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
427 // implementation defined whether this is handled by hardware, software, or
428 // a hybrid of the two but it's expected that most implementations will
429 // handle the majority of cases in hardware.
430 if (Fast)
431 *Fast = 1;
432 return true;
433 } else if (Subtarget.hasMips32r6()) {
434 return false;
435 }
436
437 switch (SVT) {
438 case MVT::i64:
439 case MVT::i32:
440 if (Fast)
441 *Fast = 1;
442 return true;
443 default:
444 return false;
445 }
446}
447
449 SelectionDAG &DAG) const {
450 switch(Op.getOpcode()) {
451 case ISD::LOAD: return lowerLOAD(Op, DAG);
452 case ISD::STORE: return lowerSTORE(Op, DAG);
453 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
454 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
455 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
456 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
457 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
458 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
459 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
460 DAG);
461 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
462 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
463 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
464 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
465 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
466 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
467 case ISD::SELECT: return lowerSELECT(Op, DAG);
468 case ISD::BITCAST: return lowerBITCAST(Op, DAG);
469 }
470
472}
473
474// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
475//
476// Performs the following transformations:
477// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
478// sign/zero-extension is completely overwritten by the new one performed by
479// the ISD::AND.
480// - Removes redundant zero extensions performed by an ISD::AND.
483 const MipsSubtarget &Subtarget) {
484 if (!Subtarget.hasMSA())
485 return SDValue();
486
487 SDValue Op0 = N->getOperand(0);
488 SDValue Op1 = N->getOperand(1);
489 unsigned Op0Opcode = Op0->getOpcode();
490
491 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
492 // where $d + 1 == 2^n and n == 32
493 // or $d + 1 == 2^n and n <= 32 and ZExt
494 // -> (MipsVExtractZExt $a, $b, $c)
495 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
496 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
497 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1);
498
499 if (!Mask)
500 return SDValue();
501
502 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
503
504 if (Log2IfPositive <= 0)
505 return SDValue(); // Mask+1 is not a power of 2
506
507 SDValue Op0Op2 = Op0->getOperand(2);
508 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
509 unsigned ExtendTySize = ExtendTy.getSizeInBits();
510 unsigned Log2 = Log2IfPositive;
511
512 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
513 Log2 == ExtendTySize) {
514 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
516 Op0->getVTList(),
517 ArrayRef(Ops, Op0->getNumOperands()));
518 }
519 }
520
521 return SDValue();
522}
523
524// Determine if the specified node is a constant vector splat.
525//
526// Returns true and sets Imm if:
527// * N is a ISD::BUILD_VECTOR representing a constant splat
528//
529// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
530// differences are that it assumes the MSA has already been checked and the
531// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
532// must not be in order for binsri.d to be selectable).
533static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
534 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode());
535
536 if (!Node)
537 return false;
538
539 APInt SplatValue, SplatUndef;
540 unsigned SplatBitSize;
541 bool HasAnyUndefs;
542
543 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
544 8, !IsLittleEndian))
545 return false;
546
547 Imm = SplatValue;
548
549 return true;
550}
551
552// Test whether the given node is an all-ones build_vector.
554 // Look through bitcasts. Endianness doesn't matter because we are looking
555 // for an all-ones value.
556 if (N->getOpcode() == ISD::BITCAST)
557 N = N->getOperand(0);
558
559 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
560
561 if (!BVN)
562 return false;
563
564 APInt SplatValue, SplatUndef;
565 unsigned SplatBitSize;
566 bool HasAnyUndefs;
567
568 // Endianness doesn't matter in this context because we are looking for
569 // an all-ones value.
570 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
571 return SplatValue.isAllOnes();
572
573 return false;
574}
575
576// Test whether N is the bitwise inverse of OfNode.
577static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
578 if (N->getOpcode() != ISD::XOR)
579 return false;
580
581 if (isVectorAllOnes(N->getOperand(0)))
582 return N->getOperand(1) == OfNode;
583
584 if (isVectorAllOnes(N->getOperand(1)))
585 return N->getOperand(0) == OfNode;
586
587 return false;
588}
589
590// Perform combines where ISD::OR is the root node.
591//
592// Performs the following transformations:
593// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
594// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
595// vector type.
598 const MipsSubtarget &Subtarget) {
599 if (!Subtarget.hasMSA())
600 return SDValue();
601
602 EVT Ty = N->getValueType(0);
603
604 if (!Ty.is128BitVector())
605 return SDValue();
606
607 SDValue Op0 = N->getOperand(0);
608 SDValue Op1 = N->getOperand(1);
609
610 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
611 SDValue Op0Op0 = Op0->getOperand(0);
612 SDValue Op0Op1 = Op0->getOperand(1);
613 SDValue Op1Op0 = Op1->getOperand(0);
614 SDValue Op1Op1 = Op1->getOperand(1);
615 bool IsLittleEndian = !Subtarget.isLittle();
616
617 SDValue IfSet, IfClr, Cond;
618 bool IsConstantMask = false;
619 APInt Mask, InvMask;
620
621 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
622 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
623 // looking.
624 // IfClr will be set if we find a valid match.
625 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
626 Cond = Op0Op0;
627 IfSet = Op0Op1;
628
629 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
630 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
631 IfClr = Op1Op1;
632 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
633 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
634 IfClr = Op1Op0;
635
636 IsConstantMask = true;
637 }
638
639 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
640 // thing again using this mask.
641 // IfClr will be set if we find a valid match.
642 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
643 Cond = Op0Op1;
644 IfSet = Op0Op0;
645
646 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
647 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
648 IfClr = Op1Op1;
649 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
650 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
651 IfClr = Op1Op0;
652
653 IsConstantMask = true;
654 }
655
656 // If IfClr is not yet set, try looking for a non-constant match.
657 // IfClr will be set if we find a valid match amongst the eight
658 // possibilities.
659 if (!IfClr.getNode()) {
660 if (isBitwiseInverse(Op0Op0, Op1Op0)) {
661 Cond = Op1Op0;
662 IfSet = Op1Op1;
663 IfClr = Op0Op1;
664 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
665 Cond = Op1Op0;
666 IfSet = Op1Op1;
667 IfClr = Op0Op0;
668 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
669 Cond = Op1Op1;
670 IfSet = Op1Op0;
671 IfClr = Op0Op1;
672 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
673 Cond = Op1Op1;
674 IfSet = Op1Op0;
675 IfClr = Op0Op0;
676 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
677 Cond = Op0Op0;
678 IfSet = Op0Op1;
679 IfClr = Op1Op1;
680 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
681 Cond = Op0Op0;
682 IfSet = Op0Op1;
683 IfClr = Op1Op0;
684 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
685 Cond = Op0Op1;
686 IfSet = Op0Op0;
687 IfClr = Op1Op1;
688 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
689 Cond = Op0Op1;
690 IfSet = Op0Op0;
691 IfClr = Op1Op0;
692 }
693 }
694
695 // At this point, IfClr will be set if we have a valid match.
696 if (!IfClr.getNode())
697 return SDValue();
698
699 assert(Cond.getNode() && IfSet.getNode());
700
701 // Fold degenerate cases.
702 if (IsConstantMask) {
703 if (Mask.isAllOnes())
704 return IfSet;
705 else if (Mask == 0)
706 return IfClr;
707 }
708
709 // Transform the DAG into an equivalent VSELECT.
710 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr);
711 }
712
713 return SDValue();
714}
715
717 SelectionDAG &DAG,
718 const MipsSubtarget &Subtarget) {
719 // Estimate the number of operations the below transform will turn a
720 // constant multiply into. The number is approximately equal to the minimal
721 // number of powers of two that constant can be broken down to by adding
722 // or subtracting them.
723 //
724 // If we have taken more than 12[1] / 8[2] steps to attempt the
725 // optimization for a native sized value, it is more than likely that this
726 // optimization will make things worse.
727 //
728 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
729 // multiplication requires at least 4 cycles, but another cycle (or two)
730 // to retrieve the result from the HI/LO registers.
731 //
732 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
733 // materialized in 2 instructions, multiplication requires at least 4
734 // cycles, but another cycle (or two) to retrieve the result from the
735 // HI/LO registers.
736 //
737 // TODO:
738 // - MaxSteps needs to consider the `VT` of the constant for the current
739 // target.
740 // - Consider to perform this optimization after type legalization.
741 // That allows to remove a workaround for types not supported natively.
742 // - Take in account `-Os, -Oz` flags because this optimization
743 // increases code size.
744 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
745
746 SmallVector<APInt, 16> WorkStack(1, C);
747 unsigned Steps = 0;
748 unsigned BitWidth = C.getBitWidth();
749
750 while (!WorkStack.empty()) {
751 APInt Val = WorkStack.pop_back_val();
752
753 if (Val == 0 || Val == 1)
754 continue;
755
756 if (Steps >= MaxSteps)
757 return false;
758
759 if (Val.isPowerOf2()) {
760 ++Steps;
761 continue;
762 }
763
764 APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
765 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
766 : APInt(BitWidth, 1) << C.ceilLogBase2();
767 if ((Val - Floor).ule(Ceil - Val)) {
768 WorkStack.push_back(Floor);
769 WorkStack.push_back(Val - Floor);
770 } else {
771 WorkStack.push_back(Ceil);
772 WorkStack.push_back(Ceil - Val);
773 }
774
775 ++Steps;
776 }
777
778 // If the value being multiplied is not supported natively, we have to pay
779 // an additional legalization cost, conservatively assume an increase in the
780 // cost of 3 instructions per step. This values for this heuristic were
781 // determined experimentally.
782 unsigned RegisterSize = DAG.getTargetLoweringInfo()
783 .getRegisterType(*DAG.getContext(), VT)
784 .getSizeInBits();
785 Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
786 if (Steps > 27)
787 return false;
788
789 return true;
790}
791
793 EVT ShiftTy, SelectionDAG &DAG) {
794 // Return 0.
795 if (C == 0)
796 return DAG.getConstant(0, DL, VT);
797
798 // Return x.
799 if (C == 1)
800 return X;
801
802 // If c is power of 2, return (shl x, log2(c)).
803 if (C.isPowerOf2())
804 return DAG.getNode(ISD::SHL, DL, VT, X,
805 DAG.getConstant(C.logBase2(), DL, ShiftTy));
806
807 unsigned BitWidth = C.getBitWidth();
808 APInt Floor = APInt(BitWidth, 1) << C.logBase2();
809 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
810 APInt(BitWidth, 1) << C.ceilLogBase2();
811
812 // If |c - floor_c| <= |c - ceil_c|,
813 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
814 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
815 if ((C - Floor).ule(Ceil - C)) {
816 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
817 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
818 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
819 }
820
821 // If |c - floor_c| > |c - ceil_c|,
822 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
823 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
824 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
825 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
826}
827
830 const MipsSETargetLowering *TL,
831 const MipsSubtarget &Subtarget) {
832 EVT VT = N->getValueType(0);
833
834 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
836 C->getAPIntValue(), VT, DAG, Subtarget))
837 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
839 DAG);
840
841 return SDValue(N, 0);
842}
843
844static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
845 SelectionDAG &DAG,
846 const MipsSubtarget &Subtarget) {
847 // See if this is a vector splat immediate node.
848 APInt SplatValue, SplatUndef;
849 unsigned SplatBitSize;
850 bool HasAnyUndefs;
851 unsigned EltSize = Ty.getScalarSizeInBits();
852 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
853
854 if (!Subtarget.hasDSP())
855 return SDValue();
856
857 if (!BV ||
858 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
859 EltSize, !Subtarget.isLittle()) ||
860 (SplatBitSize != EltSize) ||
861 (SplatValue.getZExtValue() >= EltSize))
862 return SDValue();
863
864 SDLoc DL(N);
865 return DAG.getNode(Opc, DL, Ty, N->getOperand(0),
866 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32));
867}
868
871 const MipsSubtarget &Subtarget) {
872 EVT Ty = N->getValueType(0);
873
874 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
875 return SDValue();
876
877 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
878}
879
880// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
881// constant splats into MipsISD::SHRA_DSP for DSPr2.
882//
883// Performs the following transformations:
884// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
885// sign/zero-extension is completely overwritten by the new one performed by
886// the ISD::SRA and ISD::SHL nodes.
887// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
888// sequence.
889//
890// See performDSPShiftCombine for more information about the transformation
891// used for DSPr2.
894 const MipsSubtarget &Subtarget) {
895 EVT Ty = N->getValueType(0);
896
897 if (Subtarget.hasMSA()) {
898 SDValue Op0 = N->getOperand(0);
899 SDValue Op1 = N->getOperand(1);
900
901 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
902 // where $d + sizeof($c) == 32
903 // or $d + sizeof($c) <= 32 and SExt
904 // -> (MipsVExtractSExt $a, $b, $c)
905 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
906 SDValue Op0Op0 = Op0->getOperand(0);
907 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1);
908
909 if (!ShAmount)
910 return SDValue();
911
912 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
914 return SDValue();
915
916 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
917 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
918
919 if (TotalBits == 32 ||
921 TotalBits <= 32)) {
922 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
923 Op0Op0->getOperand(2) };
924 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
925 Op0Op0->getVTList(),
926 ArrayRef(Ops, Op0Op0->getNumOperands()));
927 }
928 }
929 }
930
931 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
932 return SDValue();
933
934 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
935}
936
937
940 const MipsSubtarget &Subtarget) {
941 EVT Ty = N->getValueType(0);
942
943 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
944 return SDValue();
945
946 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
947}
948
950 bool IsV216 = (Ty == MVT::v2i16);
951
952 switch (CC) {
953 case ISD::SETEQ:
954 case ISD::SETNE: return true;
955 case ISD::SETLT:
956 case ISD::SETLE:
957 case ISD::SETGT:
958 case ISD::SETGE: return IsV216;
959 case ISD::SETULT:
960 case ISD::SETULE:
961 case ISD::SETUGT:
962 case ISD::SETUGE: return !IsV216;
963 default: return false;
964 }
965}
966
968 EVT Ty = N->getValueType(0);
969
970 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
971 return SDValue();
972
973 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
974 return SDValue();
975
976 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0),
977 N->getOperand(1), N->getOperand(2));
978}
979
981 EVT Ty = N->getValueType(0);
982
983 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) {
984 SDValue SetCC = N->getOperand(0);
985
986 if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
987 return SDValue();
988
989 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
990 SetCC.getOperand(0), SetCC.getOperand(1),
991 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
992 }
993
994 return SDValue();
995}
996
998 const MipsSubtarget &Subtarget) {
999 EVT Ty = N->getValueType(0);
1000
1001 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
1002 // Try the following combines:
1003 // (xor (or $a, $b), (build_vector allones))
1004 // (xor (or $a, $b), (bitcast (build_vector allones)))
1005 SDValue Op0 = N->getOperand(0);
1006 SDValue Op1 = N->getOperand(1);
1007 SDValue NotOp;
1008
1010 NotOp = Op1;
1011 else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
1012 NotOp = Op0;
1013 else
1014 return SDValue();
1015
1016 if (NotOp->getOpcode() == ISD::OR)
1017 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
1018 NotOp->getOperand(1));
1019 }
1020
1021 return SDValue();
1022}
1023
1024SDValue
1026 SelectionDAG &DAG = DCI.DAG;
1027 SDValue Val;
1028
1029 switch (N->getOpcode()) {
1030 case ISD::AND:
1031 Val = performANDCombine(N, DAG, DCI, Subtarget);
1032 break;
1033 case ISD::OR:
1034 Val = performORCombine(N, DAG, DCI, Subtarget);
1035 break;
1036 case ISD::MUL:
1037 return performMULCombine(N, DAG, DCI, this, Subtarget);
1038 case ISD::SHL:
1039 Val = performSHLCombine(N, DAG, DCI, Subtarget);
1040 break;
1041 case ISD::SRA:
1042 return performSRACombine(N, DAG, DCI, Subtarget);
1043 case ISD::SRL:
1044 return performSRLCombine(N, DAG, DCI, Subtarget);
1045 case ISD::VSELECT:
1046 return performVSELECTCombine(N, DAG);
1047 case ISD::XOR:
1048 Val = performXORCombine(N, DAG, Subtarget);
1049 break;
1050 case ISD::SETCC:
1051 Val = performSETCCCombine(N, DAG);
1052 break;
1053 }
1054
1055 if (Val.getNode()) {
1056 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1057 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1058 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1059 return Val;
1060 }
1061
1063}
1064
1067 MachineBasicBlock *BB) const {
1068 switch (MI.getOpcode()) {
1069 default:
1071 case Mips::BPOSGE32_PSEUDO:
1072 return emitBPOSGE32(MI, BB);
1073 case Mips::SNZ_B_PSEUDO:
1074 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
1075 case Mips::SNZ_H_PSEUDO:
1076 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
1077 case Mips::SNZ_W_PSEUDO:
1078 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
1079 case Mips::SNZ_D_PSEUDO:
1080 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
1081 case Mips::SNZ_V_PSEUDO:
1082 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
1083 case Mips::SZ_B_PSEUDO:
1084 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
1085 case Mips::SZ_H_PSEUDO:
1086 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
1087 case Mips::SZ_W_PSEUDO:
1088 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
1089 case Mips::SZ_D_PSEUDO:
1090 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
1091 case Mips::SZ_V_PSEUDO:
1092 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
1093 case Mips::COPY_FW_PSEUDO:
1094 return emitCOPY_FW(MI, BB);
1095 case Mips::COPY_FD_PSEUDO:
1096 return emitCOPY_FD(MI, BB);
1097 case Mips::INSERT_FW_PSEUDO:
1098 return emitINSERT_FW(MI, BB);
1099 case Mips::INSERT_FD_PSEUDO:
1100 return emitINSERT_FD(MI, BB);
1101 case Mips::INSERT_B_VIDX_PSEUDO:
1102 case Mips::INSERT_B_VIDX64_PSEUDO:
1103 return emitINSERT_DF_VIDX(MI, BB, 1, false);
1104 case Mips::INSERT_H_VIDX_PSEUDO:
1105 case Mips::INSERT_H_VIDX64_PSEUDO:
1106 return emitINSERT_DF_VIDX(MI, BB, 2, false);
1107 case Mips::INSERT_W_VIDX_PSEUDO:
1108 case Mips::INSERT_W_VIDX64_PSEUDO:
1109 return emitINSERT_DF_VIDX(MI, BB, 4, false);
1110 case Mips::INSERT_D_VIDX_PSEUDO:
1111 case Mips::INSERT_D_VIDX64_PSEUDO:
1112 return emitINSERT_DF_VIDX(MI, BB, 8, false);
1113 case Mips::INSERT_FW_VIDX_PSEUDO:
1114 case Mips::INSERT_FW_VIDX64_PSEUDO:
1115 return emitINSERT_DF_VIDX(MI, BB, 4, true);
1116 case Mips::INSERT_FD_VIDX_PSEUDO:
1117 case Mips::INSERT_FD_VIDX64_PSEUDO:
1118 return emitINSERT_DF_VIDX(MI, BB, 8, true);
1119 case Mips::FILL_FW_PSEUDO:
1120 return emitFILL_FW(MI, BB);
1121 case Mips::FILL_FD_PSEUDO:
1122 return emitFILL_FD(MI, BB);
1123 case Mips::FEXP2_W_1_PSEUDO:
1124 return emitFEXP2_W_1(MI, BB);
1125 case Mips::FEXP2_D_1_PSEUDO:
1126 return emitFEXP2_D_1(MI, BB);
1127 case Mips::ST_F16:
1128 return emitST_F16_PSEUDO(MI, BB);
1129 case Mips::LD_F16:
1130 return emitLD_F16_PSEUDO(MI, BB);
1131 case Mips::MSA_FP_EXTEND_W_PSEUDO:
1132 return emitFPEXTEND_PSEUDO(MI, BB, false);
1133 case Mips::MSA_FP_ROUND_W_PSEUDO:
1134 return emitFPROUND_PSEUDO(MI, BB, false);
1135 case Mips::MSA_FP_EXTEND_D_PSEUDO:
1136 return emitFPEXTEND_PSEUDO(MI, BB, true);
1137 case Mips::MSA_FP_ROUND_D_PSEUDO:
1138 return emitFPROUND_PSEUDO(MI, BB, true);
1139 }
1140}
1141
1142bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1143 const CCState &CCInfo, unsigned NextStackOffset,
1144 const MipsFunctionInfo &FI) const {
1145 if (!UseMipsTailCalls)
1146 return false;
1147
1148 // Exception has to be cleared with eret.
1149 if (FI.isISR())
1150 return false;
1151
1152 // Return false if either the callee or caller has a byval argument.
1153 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
1154 return false;
1155
1156 // Return true if the callee's argument area is no larger than the
1157 // caller's.
1158 return NextStackOffset <= FI.getIncomingArgSize();
1159}
1160
1161void MipsSETargetLowering::
1162getOpndList(SmallVectorImpl<SDValue> &Ops,
1163 std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1164 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1165 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1166 SDValue Chain) const {
1167 Ops.push_back(Callee);
1168 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1169 InternalLinkage, IsCallReloc, CLI, Callee,
1170 Chain);
1171}
1172
1173SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1174 LoadSDNode &Nd = *cast<LoadSDNode>(Op);
1175
1176 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1177 return MipsTargetLowering::lowerLOAD(Op, DAG);
1178
1179 // Replace a double precision load with two i32 loads and a buildpair64.
1180 SDLoc DL(Op);
1181 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1182 EVT PtrVT = Ptr.getValueType();
1183
1184 // i32 load from lower address.
1185 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(),
1186 Nd.getAlign(), Nd.getMemOperand()->getFlags());
1187
1188 // i32 load from higher address.
1189 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1190 SDValue Hi = DAG.getLoad(
1191 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(),
1193
1194 if (!Subtarget.isLittle())
1195 std::swap(Lo, Hi);
1196
1197 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1198 SDValue Ops[2] = {BP, Hi.getValue(1)};
1199 return DAG.getMergeValues(Ops, DL);
1200}
1201
1202SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1203 StoreSDNode &Nd = *cast<StoreSDNode>(Op);
1204
1205 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1207
1208 // Replace a double precision store with two extractelement64s and i32 stores.
1209 SDLoc DL(Op);
1210 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1211 EVT PtrVT = Ptr.getValueType();
1213 Val, DAG.getConstant(0, DL, MVT::i32));
1215 Val, DAG.getConstant(1, DL, MVT::i32));
1216
1217 if (!Subtarget.isLittle())
1218 std::swap(Lo, Hi);
1219
1220 // i32 store to lower address.
1221 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(),
1222 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1223
1224 // i32 store to higher address.
1225 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1226 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
1227 commonAlignment(Nd.getAlign(), 4),
1228 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1229}
1230
1231SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
1232 SelectionDAG &DAG) const {
1233 SDLoc DL(Op);
1234 MVT Src = Op.getOperand(0).getValueType().getSimpleVT();
1235 MVT Dest = Op.getValueType().getSimpleVT();
1236
1237 // Bitcast i64 to double.
1238 if (Src == MVT::i64 && Dest == MVT::f64) {
1239 SDValue Lo, Hi;
1240 std::tie(Lo, Hi) =
1241 DAG.SplitScalar(Op.getOperand(0), DL, MVT::i32, MVT::i32);
1242 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1243 }
1244
1245 // Bitcast double to i64.
1246 if (Src == MVT::f64 && Dest == MVT::i64) {
1247 SDValue Lo =
1248 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1249 DAG.getConstant(0, DL, MVT::i32));
1250 SDValue Hi =
1251 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1252 DAG.getConstant(1, DL, MVT::i32));
1253 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1254 }
1255
1256 // Skip other cases of bitcast and use default lowering.
1257 return SDValue();
1258}
1259
1260SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1261 bool HasLo, bool HasHi,
1262 SelectionDAG &DAG) const {
1263 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1265
1266 EVT Ty = Op.getOperand(0).getValueType();
1267 SDLoc DL(Op);
1268 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
1269 Op.getOperand(0), Op.getOperand(1));
1270 SDValue Lo, Hi;
1271
1272 if (HasLo)
1273 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
1274 if (HasHi)
1275 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
1276
1277 if (!HasLo || !HasHi)
1278 return HasLo ? Lo : Hi;
1279
1280 SDValue Vals[] = { Lo, Hi };
1281 return DAG.getMergeValues(Vals, DL);
1282}
1283
1285 SDValue InLo, InHi;
1286 std::tie(InLo, InHi) = DAG.SplitScalar(In, DL, MVT::i32, MVT::i32);
1287 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
1288}
1289
1291 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
1292 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
1293 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1294}
1295
1296// This function expands mips intrinsic nodes which have 64-bit input operands
1297// or output values.
1298//
1299// out64 = intrinsic-node in64
1300// =>
1301// lo = copy (extract-element (in64, 0))
1302// hi = copy (extract-element (in64, 1))
1303// mips-specific-node
1304// v0 = copy lo
1305// v1 = copy hi
1306// out64 = merge-values (v0, v1)
1307//
1308static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1309 SDLoc DL(Op);
1310 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
1312 unsigned OpNo = 0;
1313
1314 // See if Op has a chain input.
1315 if (HasChainIn)
1316 Ops.push_back(Op->getOperand(OpNo++));
1317
1318 // The next operand is the intrinsic opcode.
1319 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1320
1321 // See if the next operand has type i64.
1322 SDValue Opnd = Op->getOperand(++OpNo), In64;
1323
1324 if (Opnd.getValueType() == MVT::i64)
1325 In64 = initAccumulator(Opnd, DL, DAG);
1326 else
1327 Ops.push_back(Opnd);
1328
1329 // Push the remaining operands.
1330 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1331 Ops.push_back(Op->getOperand(OpNo));
1332
1333 // Add In64 to the end of the list.
1334 if (In64.getNode())
1335 Ops.push_back(In64);
1336
1337 // Scan output.
1338 SmallVector<EVT, 2> ResTys;
1339
1340 for (EVT Ty : Op->values())
1341 ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty);
1342
1343 // Create node.
1344 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops);
1345 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
1346
1347 if (!HasChainIn)
1348 return Out;
1349
1350 assert(Val->getValueType(1) == MVT::Other);
1351 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1352 return DAG.getMergeValues(Vals, DL);
1353}
1354
1355// Lower an MSA copy intrinsic into the specified SelectionDAG node
1356static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1357 SDLoc DL(Op);
1358 SDValue Vec = Op->getOperand(1);
1359 SDValue Idx = Op->getOperand(2);
1360 EVT ResTy = Op->getValueType(0);
1361 EVT EltTy = Vec->getValueType(0).getVectorElementType();
1362
1363 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
1364 DAG.getValueType(EltTy));
1365
1366 return Result;
1367}
1368
1369static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1370 EVT ResVecTy = Op->getValueType(0);
1371 EVT ViaVecTy = ResVecTy;
1372 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1373 SDLoc DL(Op);
1374
1375 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1376 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1377 // lanes.
1378 SDValue LaneA = Op->getOperand(OpNr);
1379 SDValue LaneB;
1380
1381 if (ResVecTy == MVT::v2i64) {
1382 // In case of the index being passed as an immediate value, set the upper
1383 // lane to 0 so that the splati.d instruction can be matched.
1384 if (isa<ConstantSDNode>(LaneA))
1385 LaneB = DAG.getConstant(0, DL, MVT::i32);
1386 // Having the index passed in a register, set the upper lane to the same
1387 // value as the lower - this results in the BUILD_VECTOR node not being
1388 // expanded through stack. This way we are able to pattern match the set of
1389 // nodes created here to splat.d.
1390 else
1391 LaneB = LaneA;
1392 ViaVecTy = MVT::v4i32;
1393 if(BigEndian)
1394 std::swap(LaneA, LaneB);
1395 } else
1396 LaneB = LaneA;
1397
1398 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1399 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1400
1401 SDValue Result = DAG.getBuildVector(
1402 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1403
1404 if (ViaVecTy != ResVecTy) {
1405 SDValue One = DAG.getConstant(1, DL, ViaVecTy);
1406 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy,
1407 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One));
1408 }
1409
1410 return Result;
1411}
1412
1413static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1414 bool IsSigned = false) {
1415 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1416 return DAG.getConstant(
1417 APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
1418 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
1419 SDLoc(Op), Op->getValueType(0));
1420}
1421
1422static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1423 bool BigEndian, SelectionDAG &DAG) {
1424 EVT ViaVecTy = VecTy;
1425 SDValue SplatValueA = SplatValue;
1426 SDValue SplatValueB = SplatValue;
1427 SDLoc DL(SplatValue);
1428
1429 if (VecTy == MVT::v2i64) {
1430 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1431 ViaVecTy = MVT::v4i32;
1432
1433 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
1434 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
1435 DAG.getConstant(32, DL, MVT::i32));
1436 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
1437 }
1438
1439 // We currently hold the parts in little endian order. Swap them if
1440 // necessary.
1441 if (BigEndian)
1442 std::swap(SplatValueA, SplatValueB);
1443
1444 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1445 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1446 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1447 SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1448
1449 SDValue Result = DAG.getBuildVector(
1450 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1451
1452 if (VecTy != ViaVecTy)
1453 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
1454
1455 return Result;
1456}
1457
1459 unsigned Opc, SDValue Imm,
1460 bool BigEndian) {
1461 EVT VecTy = Op->getValueType(0);
1462 SDValue Exp2Imm;
1463 SDLoc DL(Op);
1464
1465 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1466 // here for now.
1467 if (VecTy == MVT::v2i64) {
1468 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
1469 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1470
1471 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL,
1472 MVT::i32);
1473 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32);
1474
1475 if (BigEndian)
1476 std::swap(BitImmLoOp, BitImmHiOp);
1477
1478 Exp2Imm = DAG.getNode(
1479 ISD::BITCAST, DL, MVT::v2i64,
1480 DAG.getBuildVector(MVT::v4i32, DL,
1481 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1482 }
1483 }
1484
1485 if (!Exp2Imm.getNode()) {
1486 // We couldnt constant fold, do a vector shift instead
1487
1488 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1489 // only values 0-63 are valid.
1490 if (VecTy == MVT::v2i64)
1491 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
1492
1493 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
1494
1495 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy),
1496 Exp2Imm);
1497 }
1498
1499 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
1500}
1501
1503 SDLoc DL(Op);
1504 EVT ResTy = Op->getValueType(0);
1505 SDValue Vec = Op->getOperand(2);
1506 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1507 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
1508 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1,
1509 DL, ResEltTy);
1510 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG);
1511
1512 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
1513}
1514
1516 EVT ResTy = Op->getValueType(0);
1517 SDLoc DL(Op);
1518 SDValue One = DAG.getConstant(1, DL, ResTy);
1519 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
1520
1521 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
1522 DAG.getNOT(DL, Bit, ResTy));
1523}
1524
1526 SDLoc DL(Op);
1527 EVT ResTy = Op->getValueType(0);
1528 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1529 << Op->getConstantOperandAPInt(2);
1530 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
1531
1532 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
1533}
1534
1535SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1536 SelectionDAG &DAG) const {
1537 SDLoc DL(Op);
1538 unsigned Intrinsic = Op->getConstantOperandVal(0);
1539 switch (Intrinsic) {
1540 default:
1541 return SDValue();
1542 case Intrinsic::mips_shilo:
1543 return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
1544 case Intrinsic::mips_dpau_h_qbl:
1545 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
1546 case Intrinsic::mips_dpau_h_qbr:
1547 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
1548 case Intrinsic::mips_dpsu_h_qbl:
1549 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
1550 case Intrinsic::mips_dpsu_h_qbr:
1551 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
1552 case Intrinsic::mips_dpa_w_ph:
1553 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
1554 case Intrinsic::mips_dps_w_ph:
1555 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
1556 case Intrinsic::mips_dpax_w_ph:
1557 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
1558 case Intrinsic::mips_dpsx_w_ph:
1559 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
1560 case Intrinsic::mips_mulsa_w_ph:
1561 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
1562 case Intrinsic::mips_mult:
1563 return lowerDSPIntr(Op, DAG, MipsISD::Mult);
1564 case Intrinsic::mips_multu:
1565 return lowerDSPIntr(Op, DAG, MipsISD::Multu);
1566 case Intrinsic::mips_madd:
1567 return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
1568 case Intrinsic::mips_maddu:
1569 return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
1570 case Intrinsic::mips_msub:
1571 return lowerDSPIntr(Op, DAG, MipsISD::MSub);
1572 case Intrinsic::mips_msubu:
1573 return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
1574 case Intrinsic::mips_addv_b:
1575 case Intrinsic::mips_addv_h:
1576 case Intrinsic::mips_addv_w:
1577 case Intrinsic::mips_addv_d:
1578 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1579 Op->getOperand(2));
1580 case Intrinsic::mips_addvi_b:
1581 case Intrinsic::mips_addvi_h:
1582 case Intrinsic::mips_addvi_w:
1583 case Intrinsic::mips_addvi_d:
1584 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1585 lowerMSASplatImm(Op, 2, DAG));
1586 case Intrinsic::mips_and_v:
1587 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1588 Op->getOperand(2));
1589 case Intrinsic::mips_andi_b:
1590 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1591 lowerMSASplatImm(Op, 2, DAG));
1592 case Intrinsic::mips_bclr_b:
1593 case Intrinsic::mips_bclr_h:
1594 case Intrinsic::mips_bclr_w:
1595 case Intrinsic::mips_bclr_d:
1596 return lowerMSABitClear(Op, DAG);
1597 case Intrinsic::mips_bclri_b:
1598 case Intrinsic::mips_bclri_h:
1599 case Intrinsic::mips_bclri_w:
1600 case Intrinsic::mips_bclri_d:
1601 return lowerMSABitClearImm(Op, DAG);
1602 case Intrinsic::mips_binsli_b:
1603 case Intrinsic::mips_binsli_h:
1604 case Intrinsic::mips_binsli_w:
1605 case Intrinsic::mips_binsli_d: {
1606 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1607 EVT VecTy = Op->getValueType(0);
1608 EVT EltTy = VecTy.getVectorElementType();
1609 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1610 report_fatal_error("Immediate out of range");
1612 Op->getConstantOperandVal(3) + 1);
1613 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1614 DAG.getConstant(Mask, DL, VecTy, true),
1615 Op->getOperand(2), Op->getOperand(1));
1616 }
1617 case Intrinsic::mips_binsri_b:
1618 case Intrinsic::mips_binsri_h:
1619 case Intrinsic::mips_binsri_w:
1620 case Intrinsic::mips_binsri_d: {
1621 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1622 EVT VecTy = Op->getValueType(0);
1623 EVT EltTy = VecTy.getVectorElementType();
1624 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1625 report_fatal_error("Immediate out of range");
1627 Op->getConstantOperandVal(3) + 1);
1628 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1629 DAG.getConstant(Mask, DL, VecTy, true),
1630 Op->getOperand(2), Op->getOperand(1));
1631 }
1632 case Intrinsic::mips_bmnz_v:
1633 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1634 Op->getOperand(2), Op->getOperand(1));
1635 case Intrinsic::mips_bmnzi_b:
1636 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1637 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
1638 Op->getOperand(1));
1639 case Intrinsic::mips_bmz_v:
1640 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1641 Op->getOperand(1), Op->getOperand(2));
1642 case Intrinsic::mips_bmzi_b:
1643 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1644 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
1645 Op->getOperand(2));
1646 case Intrinsic::mips_bneg_b:
1647 case Intrinsic::mips_bneg_h:
1648 case Intrinsic::mips_bneg_w:
1649 case Intrinsic::mips_bneg_d: {
1650 EVT VecTy = Op->getValueType(0);
1651 SDValue One = DAG.getConstant(1, DL, VecTy);
1652
1653 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
1654 DAG.getNode(ISD::SHL, DL, VecTy, One,
1655 truncateVecElts(Op, DAG)));
1656 }
1657 case Intrinsic::mips_bnegi_b:
1658 case Intrinsic::mips_bnegi_h:
1659 case Intrinsic::mips_bnegi_w:
1660 case Intrinsic::mips_bnegi_d:
1661 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
1662 !Subtarget.isLittle());
1663 case Intrinsic::mips_bnz_b:
1664 case Intrinsic::mips_bnz_h:
1665 case Intrinsic::mips_bnz_w:
1666 case Intrinsic::mips_bnz_d:
1667 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
1668 Op->getOperand(1));
1669 case Intrinsic::mips_bnz_v:
1670 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
1671 Op->getOperand(1));
1672 case Intrinsic::mips_bsel_v:
1673 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1674 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1675 Op->getOperand(1), Op->getOperand(3),
1676 Op->getOperand(2));
1677 case Intrinsic::mips_bseli_b:
1678 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1679 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1680 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG),
1681 Op->getOperand(2));
1682 case Intrinsic::mips_bset_b:
1683 case Intrinsic::mips_bset_h:
1684 case Intrinsic::mips_bset_w:
1685 case Intrinsic::mips_bset_d: {
1686 EVT VecTy = Op->getValueType(0);
1687 SDValue One = DAG.getConstant(1, DL, VecTy);
1688
1689 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
1690 DAG.getNode(ISD::SHL, DL, VecTy, One,
1691 truncateVecElts(Op, DAG)));
1692 }
1693 case Intrinsic::mips_bseti_b:
1694 case Intrinsic::mips_bseti_h:
1695 case Intrinsic::mips_bseti_w:
1696 case Intrinsic::mips_bseti_d:
1697 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
1698 !Subtarget.isLittle());
1699 case Intrinsic::mips_bz_b:
1700 case Intrinsic::mips_bz_h:
1701 case Intrinsic::mips_bz_w:
1702 case Intrinsic::mips_bz_d:
1703 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
1704 Op->getOperand(1));
1705 case Intrinsic::mips_bz_v:
1706 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
1707 Op->getOperand(1));
1708 case Intrinsic::mips_ceq_b:
1709 case Intrinsic::mips_ceq_h:
1710 case Intrinsic::mips_ceq_w:
1711 case Intrinsic::mips_ceq_d:
1712 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1713 Op->getOperand(2), ISD::SETEQ);
1714 case Intrinsic::mips_ceqi_b:
1715 case Intrinsic::mips_ceqi_h:
1716 case Intrinsic::mips_ceqi_w:
1717 case Intrinsic::mips_ceqi_d:
1718 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1719 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ);
1720 case Intrinsic::mips_cle_s_b:
1721 case Intrinsic::mips_cle_s_h:
1722 case Intrinsic::mips_cle_s_w:
1723 case Intrinsic::mips_cle_s_d:
1724 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1725 Op->getOperand(2), ISD::SETLE);
1726 case Intrinsic::mips_clei_s_b:
1727 case Intrinsic::mips_clei_s_h:
1728 case Intrinsic::mips_clei_s_w:
1729 case Intrinsic::mips_clei_s_d:
1730 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1731 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE);
1732 case Intrinsic::mips_cle_u_b:
1733 case Intrinsic::mips_cle_u_h:
1734 case Intrinsic::mips_cle_u_w:
1735 case Intrinsic::mips_cle_u_d:
1736 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1737 Op->getOperand(2), ISD::SETULE);
1738 case Intrinsic::mips_clei_u_b:
1739 case Intrinsic::mips_clei_u_h:
1740 case Intrinsic::mips_clei_u_w:
1741 case Intrinsic::mips_clei_u_d:
1742 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1743 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
1744 case Intrinsic::mips_clt_s_b:
1745 case Intrinsic::mips_clt_s_h:
1746 case Intrinsic::mips_clt_s_w:
1747 case Intrinsic::mips_clt_s_d:
1748 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1749 Op->getOperand(2), ISD::SETLT);
1750 case Intrinsic::mips_clti_s_b:
1751 case Intrinsic::mips_clti_s_h:
1752 case Intrinsic::mips_clti_s_w:
1753 case Intrinsic::mips_clti_s_d:
1754 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1755 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT);
1756 case Intrinsic::mips_clt_u_b:
1757 case Intrinsic::mips_clt_u_h:
1758 case Intrinsic::mips_clt_u_w:
1759 case Intrinsic::mips_clt_u_d:
1760 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1761 Op->getOperand(2), ISD::SETULT);
1762 case Intrinsic::mips_clti_u_b:
1763 case Intrinsic::mips_clti_u_h:
1764 case Intrinsic::mips_clti_u_w:
1765 case Intrinsic::mips_clti_u_d:
1766 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1767 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
1768 case Intrinsic::mips_copy_s_b:
1769 case Intrinsic::mips_copy_s_h:
1770 case Intrinsic::mips_copy_s_w:
1772 case Intrinsic::mips_copy_s_d:
1773 if (Subtarget.hasMips64())
1774 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1776 else {
1777 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1778 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1780 Op->getValueType(0), Op->getOperand(1),
1781 Op->getOperand(2));
1782 }
1783 case Intrinsic::mips_copy_u_b:
1784 case Intrinsic::mips_copy_u_h:
1785 case Intrinsic::mips_copy_u_w:
1787 case Intrinsic::mips_copy_u_d:
1788 if (Subtarget.hasMips64())
1789 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1791 else {
1792 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1793 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1794 // Note: When i64 is illegal, this results in copy_s.w instructions
1795 // instead of copy_u.w instructions. This makes no difference to the
1796 // behaviour since i64 is only illegal when the register file is 32-bit.
1798 Op->getValueType(0), Op->getOperand(1),
1799 Op->getOperand(2));
1800 }
1801 case Intrinsic::mips_div_s_b:
1802 case Intrinsic::mips_div_s_h:
1803 case Intrinsic::mips_div_s_w:
1804 case Intrinsic::mips_div_s_d:
1805 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
1806 Op->getOperand(2));
1807 case Intrinsic::mips_div_u_b:
1808 case Intrinsic::mips_div_u_h:
1809 case Intrinsic::mips_div_u_w:
1810 case Intrinsic::mips_div_u_d:
1811 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
1812 Op->getOperand(2));
1813 case Intrinsic::mips_fadd_w:
1814 case Intrinsic::mips_fadd_d:
1815 // TODO: If intrinsics have fast-math-flags, propagate them.
1816 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
1817 Op->getOperand(2));
1818 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1819 case Intrinsic::mips_fceq_w:
1820 case Intrinsic::mips_fceq_d:
1821 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1822 Op->getOperand(2), ISD::SETOEQ);
1823 case Intrinsic::mips_fcle_w:
1824 case Intrinsic::mips_fcle_d:
1825 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1826 Op->getOperand(2), ISD::SETOLE);
1827 case Intrinsic::mips_fclt_w:
1828 case Intrinsic::mips_fclt_d:
1829 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1830 Op->getOperand(2), ISD::SETOLT);
1831 case Intrinsic::mips_fcne_w:
1832 case Intrinsic::mips_fcne_d:
1833 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1834 Op->getOperand(2), ISD::SETONE);
1835 case Intrinsic::mips_fcor_w:
1836 case Intrinsic::mips_fcor_d:
1837 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1838 Op->getOperand(2), ISD::SETO);
1839 case Intrinsic::mips_fcueq_w:
1840 case Intrinsic::mips_fcueq_d:
1841 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1842 Op->getOperand(2), ISD::SETUEQ);
1843 case Intrinsic::mips_fcule_w:
1844 case Intrinsic::mips_fcule_d:
1845 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1846 Op->getOperand(2), ISD::SETULE);
1847 case Intrinsic::mips_fcult_w:
1848 case Intrinsic::mips_fcult_d:
1849 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1850 Op->getOperand(2), ISD::SETULT);
1851 case Intrinsic::mips_fcun_w:
1852 case Intrinsic::mips_fcun_d:
1853 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1854 Op->getOperand(2), ISD::SETUO);
1855 case Intrinsic::mips_fcune_w:
1856 case Intrinsic::mips_fcune_d:
1857 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1858 Op->getOperand(2), ISD::SETUNE);
1859 case Intrinsic::mips_fdiv_w:
1860 case Intrinsic::mips_fdiv_d:
1861 // TODO: If intrinsics have fast-math-flags, propagate them.
1862 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
1863 Op->getOperand(2));
1864 case Intrinsic::mips_ffint_u_w:
1865 case Intrinsic::mips_ffint_u_d:
1866 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
1867 Op->getOperand(1));
1868 case Intrinsic::mips_ffint_s_w:
1869 case Intrinsic::mips_ffint_s_d:
1870 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
1871 Op->getOperand(1));
1872 case Intrinsic::mips_fill_b:
1873 case Intrinsic::mips_fill_h:
1874 case Intrinsic::mips_fill_w:
1875 case Intrinsic::mips_fill_d: {
1876 EVT ResTy = Op->getValueType(0);
1878 Op->getOperand(1));
1879
1880 // If ResTy is v2i64 then the type legalizer will break this node down into
1881 // an equivalent v4i32.
1882 return DAG.getBuildVector(ResTy, DL, Ops);
1883 }
1884 case Intrinsic::mips_fexp2_w:
1885 case Intrinsic::mips_fexp2_d: {
1886 // TODO: If intrinsics have fast-math-flags, propagate them.
1887 EVT ResTy = Op->getValueType(0);
1888 return DAG.getNode(
1889 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
1890 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
1891 }
1892 case Intrinsic::mips_flog2_w:
1893 case Intrinsic::mips_flog2_d:
1894 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
1895 case Intrinsic::mips_fmadd_w:
1896 case Intrinsic::mips_fmadd_d:
1897 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
1898 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1899 case Intrinsic::mips_fmul_w:
1900 case Intrinsic::mips_fmul_d:
1901 // TODO: If intrinsics have fast-math-flags, propagate them.
1902 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
1903 Op->getOperand(2));
1904 case Intrinsic::mips_fmsub_w:
1905 case Intrinsic::mips_fmsub_d: {
1906 // TODO: If intrinsics have fast-math-flags, propagate them.
1907 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0),
1908 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1909 }
1910 case Intrinsic::mips_frint_w:
1911 case Intrinsic::mips_frint_d:
1912 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
1913 case Intrinsic::mips_fsqrt_w:
1914 case Intrinsic::mips_fsqrt_d:
1915 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
1916 case Intrinsic::mips_fsub_w:
1917 case Intrinsic::mips_fsub_d:
1918 // TODO: If intrinsics have fast-math-flags, propagate them.
1919 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
1920 Op->getOperand(2));
1921 case Intrinsic::mips_ftrunc_u_w:
1922 case Intrinsic::mips_ftrunc_u_d:
1923 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
1924 Op->getOperand(1));
1925 case Intrinsic::mips_ftrunc_s_w:
1926 case Intrinsic::mips_ftrunc_s_d:
1927 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
1928 Op->getOperand(1));
1929 case Intrinsic::mips_ilvev_b:
1930 case Intrinsic::mips_ilvev_h:
1931 case Intrinsic::mips_ilvev_w:
1932 case Intrinsic::mips_ilvev_d:
1933 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
1934 Op->getOperand(1), Op->getOperand(2));
1935 case Intrinsic::mips_ilvl_b:
1936 case Intrinsic::mips_ilvl_h:
1937 case Intrinsic::mips_ilvl_w:
1938 case Intrinsic::mips_ilvl_d:
1939 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
1940 Op->getOperand(1), Op->getOperand(2));
1941 case Intrinsic::mips_ilvod_b:
1942 case Intrinsic::mips_ilvod_h:
1943 case Intrinsic::mips_ilvod_w:
1944 case Intrinsic::mips_ilvod_d:
1945 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
1946 Op->getOperand(1), Op->getOperand(2));
1947 case Intrinsic::mips_ilvr_b:
1948 case Intrinsic::mips_ilvr_h:
1949 case Intrinsic::mips_ilvr_w:
1950 case Intrinsic::mips_ilvr_d:
1951 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
1952 Op->getOperand(1), Op->getOperand(2));
1953 case Intrinsic::mips_insert_b:
1954 case Intrinsic::mips_insert_h:
1955 case Intrinsic::mips_insert_w:
1956 case Intrinsic::mips_insert_d:
1957 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
1958 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
1959 case Intrinsic::mips_insve_b:
1960 case Intrinsic::mips_insve_h:
1961 case Intrinsic::mips_insve_w:
1962 case Intrinsic::mips_insve_d: {
1963 // Report an error for out of range values.
1964 int64_t Max;
1965 switch (Intrinsic) {
1966 case Intrinsic::mips_insve_b: Max = 15; break;
1967 case Intrinsic::mips_insve_h: Max = 7; break;
1968 case Intrinsic::mips_insve_w: Max = 3; break;
1969 case Intrinsic::mips_insve_d: Max = 1; break;
1970 default: llvm_unreachable("Unmatched intrinsic");
1971 }
1972 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
1973 if (Value < 0 || Value > Max)
1974 report_fatal_error("Immediate out of range");
1975 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
1976 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
1977 DAG.getConstant(0, DL, MVT::i32));
1978 }
1979 case Intrinsic::mips_ldi_b:
1980 case Intrinsic::mips_ldi_h:
1981 case Intrinsic::mips_ldi_w:
1982 case Intrinsic::mips_ldi_d:
1983 return lowerMSASplatImm(Op, 1, DAG, true);
1984 case Intrinsic::mips_lsa:
1985 case Intrinsic::mips_dlsa: {
1986 EVT ResTy = Op->getValueType(0);
1987 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
1988 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
1989 Op->getOperand(2), Op->getOperand(3)));
1990 }
1991 case Intrinsic::mips_maddv_b:
1992 case Intrinsic::mips_maddv_h:
1993 case Intrinsic::mips_maddv_w:
1994 case Intrinsic::mips_maddv_d: {
1995 EVT ResTy = Op->getValueType(0);
1996 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
1997 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
1998 Op->getOperand(2), Op->getOperand(3)));
1999 }
2000 case Intrinsic::mips_max_s_b:
2001 case Intrinsic::mips_max_s_h:
2002 case Intrinsic::mips_max_s_w:
2003 case Intrinsic::mips_max_s_d:
2004 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2005 Op->getOperand(1), Op->getOperand(2));
2006 case Intrinsic::mips_max_u_b:
2007 case Intrinsic::mips_max_u_h:
2008 case Intrinsic::mips_max_u_w:
2009 case Intrinsic::mips_max_u_d:
2010 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2011 Op->getOperand(1), Op->getOperand(2));
2012 case Intrinsic::mips_maxi_s_b:
2013 case Intrinsic::mips_maxi_s_h:
2014 case Intrinsic::mips_maxi_s_w:
2015 case Intrinsic::mips_maxi_s_d:
2016 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2017 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2018 case Intrinsic::mips_maxi_u_b:
2019 case Intrinsic::mips_maxi_u_h:
2020 case Intrinsic::mips_maxi_u_w:
2021 case Intrinsic::mips_maxi_u_d:
2022 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2023 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2024 case Intrinsic::mips_min_s_b:
2025 case Intrinsic::mips_min_s_h:
2026 case Intrinsic::mips_min_s_w:
2027 case Intrinsic::mips_min_s_d:
2028 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2029 Op->getOperand(1), Op->getOperand(2));
2030 case Intrinsic::mips_min_u_b:
2031 case Intrinsic::mips_min_u_h:
2032 case Intrinsic::mips_min_u_w:
2033 case Intrinsic::mips_min_u_d:
2034 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2035 Op->getOperand(1), Op->getOperand(2));
2036 case Intrinsic::mips_mini_s_b:
2037 case Intrinsic::mips_mini_s_h:
2038 case Intrinsic::mips_mini_s_w:
2039 case Intrinsic::mips_mini_s_d:
2040 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2041 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2042 case Intrinsic::mips_mini_u_b:
2043 case Intrinsic::mips_mini_u_h:
2044 case Intrinsic::mips_mini_u_w:
2045 case Intrinsic::mips_mini_u_d:
2046 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2047 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2048 case Intrinsic::mips_mod_s_b:
2049 case Intrinsic::mips_mod_s_h:
2050 case Intrinsic::mips_mod_s_w:
2051 case Intrinsic::mips_mod_s_d:
2052 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
2053 Op->getOperand(2));
2054 case Intrinsic::mips_mod_u_b:
2055 case Intrinsic::mips_mod_u_h:
2056 case Intrinsic::mips_mod_u_w:
2057 case Intrinsic::mips_mod_u_d:
2058 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
2059 Op->getOperand(2));
2060 case Intrinsic::mips_mulv_b:
2061 case Intrinsic::mips_mulv_h:
2062 case Intrinsic::mips_mulv_w:
2063 case Intrinsic::mips_mulv_d:
2064 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
2065 Op->getOperand(2));
2066 case Intrinsic::mips_msubv_b:
2067 case Intrinsic::mips_msubv_h:
2068 case Intrinsic::mips_msubv_w:
2069 case Intrinsic::mips_msubv_d: {
2070 EVT ResTy = Op->getValueType(0);
2071 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
2072 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2073 Op->getOperand(2), Op->getOperand(3)));
2074 }
2075 case Intrinsic::mips_nlzc_b:
2076 case Intrinsic::mips_nlzc_h:
2077 case Intrinsic::mips_nlzc_w:
2078 case Intrinsic::mips_nlzc_d:
2079 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
2080 case Intrinsic::mips_nor_v: {
2081 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2082 Op->getOperand(1), Op->getOperand(2));
2083 return DAG.getNOT(DL, Res, Res->getValueType(0));
2084 }
2085 case Intrinsic::mips_nori_b: {
2086 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2087 Op->getOperand(1),
2088 lowerMSASplatImm(Op, 2, DAG));
2089 return DAG.getNOT(DL, Res, Res->getValueType(0));
2090 }
2091 case Intrinsic::mips_or_v:
2092 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
2093 Op->getOperand(2));
2094 case Intrinsic::mips_ori_b:
2095 return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2096 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2097 case Intrinsic::mips_pckev_b:
2098 case Intrinsic::mips_pckev_h:
2099 case Intrinsic::mips_pckev_w:
2100 case Intrinsic::mips_pckev_d:
2101 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
2102 Op->getOperand(1), Op->getOperand(2));
2103 case Intrinsic::mips_pckod_b:
2104 case Intrinsic::mips_pckod_h:
2105 case Intrinsic::mips_pckod_w:
2106 case Intrinsic::mips_pckod_d:
2107 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
2108 Op->getOperand(1), Op->getOperand(2));
2109 case Intrinsic::mips_pcnt_b:
2110 case Intrinsic::mips_pcnt_h:
2111 case Intrinsic::mips_pcnt_w:
2112 case Intrinsic::mips_pcnt_d:
2113 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
2114 case Intrinsic::mips_sat_s_b:
2115 case Intrinsic::mips_sat_s_h:
2116 case Intrinsic::mips_sat_s_w:
2117 case Intrinsic::mips_sat_s_d:
2118 case Intrinsic::mips_sat_u_b:
2119 case Intrinsic::mips_sat_u_h:
2120 case Intrinsic::mips_sat_u_w:
2121 case Intrinsic::mips_sat_u_d: {
2122 // Report an error for out of range values.
2123 int64_t Max;
2124 switch (Intrinsic) {
2125 case Intrinsic::mips_sat_s_b:
2126 case Intrinsic::mips_sat_u_b: Max = 7; break;
2127 case Intrinsic::mips_sat_s_h:
2128 case Intrinsic::mips_sat_u_h: Max = 15; break;
2129 case Intrinsic::mips_sat_s_w:
2130 case Intrinsic::mips_sat_u_w: Max = 31; break;
2131 case Intrinsic::mips_sat_s_d:
2132 case Intrinsic::mips_sat_u_d: Max = 63; break;
2133 default: llvm_unreachable("Unmatched intrinsic");
2134 }
2135 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2136 if (Value < 0 || Value > Max)
2137 report_fatal_error("Immediate out of range");
2138 return SDValue();
2139 }
2140 case Intrinsic::mips_shf_b:
2141 case Intrinsic::mips_shf_h:
2142 case Intrinsic::mips_shf_w: {
2143 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2144 if (Value < 0 || Value > 255)
2145 report_fatal_error("Immediate out of range");
2146 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
2147 Op->getOperand(2), Op->getOperand(1));
2148 }
2149 case Intrinsic::mips_sldi_b:
2150 case Intrinsic::mips_sldi_h:
2151 case Intrinsic::mips_sldi_w:
2152 case Intrinsic::mips_sldi_d: {
2153 // Report an error for out of range values.
2154 int64_t Max;
2155 switch (Intrinsic) {
2156 case Intrinsic::mips_sldi_b: Max = 15; break;
2157 case Intrinsic::mips_sldi_h: Max = 7; break;
2158 case Intrinsic::mips_sldi_w: Max = 3; break;
2159 case Intrinsic::mips_sldi_d: Max = 1; break;
2160 default: llvm_unreachable("Unmatched intrinsic");
2161 }
2162 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue();
2163 if (Value < 0 || Value > Max)
2164 report_fatal_error("Immediate out of range");
2165 return SDValue();
2166 }
2167 case Intrinsic::mips_sll_b:
2168 case Intrinsic::mips_sll_h:
2169 case Intrinsic::mips_sll_w:
2170 case Intrinsic::mips_sll_d:
2171 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
2172 truncateVecElts(Op, DAG));
2173 case Intrinsic::mips_slli_b:
2174 case Intrinsic::mips_slli_h:
2175 case Intrinsic::mips_slli_w:
2176 case Intrinsic::mips_slli_d:
2177 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
2178 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2179 case Intrinsic::mips_splat_b:
2180 case Intrinsic::mips_splat_h:
2181 case Intrinsic::mips_splat_w:
2182 case Intrinsic::mips_splat_d:
2183 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2184 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2185 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2186 // Instead we lower to MipsISD::VSHF and match from there.
2187 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2188 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
2189 Op->getOperand(1));
2190 case Intrinsic::mips_splati_b:
2191 case Intrinsic::mips_splati_h:
2192 case Intrinsic::mips_splati_w:
2193 case Intrinsic::mips_splati_d:
2194 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2195 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
2196 Op->getOperand(1));
2197 case Intrinsic::mips_sra_b:
2198 case Intrinsic::mips_sra_h:
2199 case Intrinsic::mips_sra_w:
2200 case Intrinsic::mips_sra_d:
2201 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
2202 truncateVecElts(Op, DAG));
2203 case Intrinsic::mips_srai_b:
2204 case Intrinsic::mips_srai_h:
2205 case Intrinsic::mips_srai_w:
2206 case Intrinsic::mips_srai_d:
2207 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
2208 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2209 case Intrinsic::mips_srari_b:
2210 case Intrinsic::mips_srari_h:
2211 case Intrinsic::mips_srari_w:
2212 case Intrinsic::mips_srari_d: {
2213 // Report an error for out of range values.
2214 int64_t Max;
2215 switch (Intrinsic) {
2216 case Intrinsic::mips_srari_b: Max = 7; break;
2217 case Intrinsic::mips_srari_h: Max = 15; break;
2218 case Intrinsic::mips_srari_w: Max = 31; break;
2219 case Intrinsic::mips_srari_d: Max = 63; break;
2220 default: llvm_unreachable("Unmatched intrinsic");
2221 }
2222 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2223 if (Value < 0 || Value > Max)
2224 report_fatal_error("Immediate out of range");
2225 return SDValue();
2226 }
2227 case Intrinsic::mips_srl_b:
2228 case Intrinsic::mips_srl_h:
2229 case Intrinsic::mips_srl_w:
2230 case Intrinsic::mips_srl_d:
2231 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
2232 truncateVecElts(Op, DAG));
2233 case Intrinsic::mips_srli_b:
2234 case Intrinsic::mips_srli_h:
2235 case Intrinsic::mips_srli_w:
2236 case Intrinsic::mips_srli_d:
2237 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
2238 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2239 case Intrinsic::mips_srlri_b:
2240 case Intrinsic::mips_srlri_h:
2241 case Intrinsic::mips_srlri_w:
2242 case Intrinsic::mips_srlri_d: {
2243 // Report an error for out of range values.
2244 int64_t Max;
2245 switch (Intrinsic) {
2246 case Intrinsic::mips_srlri_b: Max = 7; break;
2247 case Intrinsic::mips_srlri_h: Max = 15; break;
2248 case Intrinsic::mips_srlri_w: Max = 31; break;
2249 case Intrinsic::mips_srlri_d: Max = 63; break;
2250 default: llvm_unreachable("Unmatched intrinsic");
2251 }
2252 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2253 if (Value < 0 || Value > Max)
2254 report_fatal_error("Immediate out of range");
2255 return SDValue();
2256 }
2257 case Intrinsic::mips_subv_b:
2258 case Intrinsic::mips_subv_h:
2259 case Intrinsic::mips_subv_w:
2260 case Intrinsic::mips_subv_d:
2261 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
2262 Op->getOperand(2));
2263 case Intrinsic::mips_subvi_b:
2264 case Intrinsic::mips_subvi_h:
2265 case Intrinsic::mips_subvi_w:
2266 case Intrinsic::mips_subvi_d:
2267 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
2268 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2269 case Intrinsic::mips_vshf_b:
2270 case Intrinsic::mips_vshf_h:
2271 case Intrinsic::mips_vshf_w:
2272 case Intrinsic::mips_vshf_d:
2273 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2274 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2275 case Intrinsic::mips_xor_v:
2276 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
2277 Op->getOperand(2));
2278 case Intrinsic::mips_xori_b:
2279 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
2280 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2281 case Intrinsic::thread_pointer: {
2282 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2283 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
2284 }
2285 }
2286}
2287
2289 const MipsSubtarget &Subtarget) {
2290 SDLoc DL(Op);
2291 SDValue ChainIn = Op->getOperand(0);
2292 SDValue Address = Op->getOperand(2);
2293 SDValue Offset = Op->getOperand(3);
2294 EVT ResTy = Op->getValueType(0);
2295 EVT PtrTy = Address->getValueType(0);
2296
2297 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2298 // however takes an i32 signed constant offset. The actual type of the
2299 // intrinsic is a scaled signed i10.
2300 if (Subtarget.isABI_N64())
2301 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2302
2303 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2304 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
2305 Align(16));
2306}
2307
2308SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2309 SelectionDAG &DAG) const {
2310 unsigned Intr = Op->getConstantOperandVal(1);
2311 switch (Intr) {
2312 default:
2313 return SDValue();
2314 case Intrinsic::mips_extp:
2315 return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
2316 case Intrinsic::mips_extpdp:
2317 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
2318 case Intrinsic::mips_extr_w:
2319 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
2320 case Intrinsic::mips_extr_r_w:
2321 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
2322 case Intrinsic::mips_extr_rs_w:
2323 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
2324 case Intrinsic::mips_extr_s_h:
2325 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
2326 case Intrinsic::mips_mthlip:
2327 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
2328 case Intrinsic::mips_mulsaq_s_w_ph:
2330 case Intrinsic::mips_maq_s_w_phl:
2331 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
2332 case Intrinsic::mips_maq_s_w_phr:
2333 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
2334 case Intrinsic::mips_maq_sa_w_phl:
2336 case Intrinsic::mips_maq_sa_w_phr:
2338 case Intrinsic::mips_dpaq_s_w_ph:
2339 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
2340 case Intrinsic::mips_dpsq_s_w_ph:
2341 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
2342 case Intrinsic::mips_dpaq_sa_l_w:
2343 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
2344 case Intrinsic::mips_dpsq_sa_l_w:
2345 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
2346 case Intrinsic::mips_dpaqx_s_w_ph:
2348 case Intrinsic::mips_dpaqx_sa_w_ph:
2350 case Intrinsic::mips_dpsqx_s_w_ph:
2352 case Intrinsic::mips_dpsqx_sa_w_ph:
2354 case Intrinsic::mips_ld_b:
2355 case Intrinsic::mips_ld_h:
2356 case Intrinsic::mips_ld_w:
2357 case Intrinsic::mips_ld_d:
2358 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2359 }
2360}
2361
2363 const MipsSubtarget &Subtarget) {
2364 SDLoc DL(Op);
2365 SDValue ChainIn = Op->getOperand(0);
2366 SDValue Value = Op->getOperand(2);
2367 SDValue Address = Op->getOperand(3);
2368 SDValue Offset = Op->getOperand(4);
2369 EVT PtrTy = Address->getValueType(0);
2370
2371 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2372 // however takes an i32 signed constant offset. The actual type of the
2373 // intrinsic is a scaled signed i10.
2374 if (Subtarget.isABI_N64())
2375 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2376
2377 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2378
2379 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
2380 Align(16));
2381}
2382
2383SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2384 SelectionDAG &DAG) const {
2385 unsigned Intr = Op->getConstantOperandVal(1);
2386 switch (Intr) {
2387 default:
2388 return SDValue();
2389 case Intrinsic::mips_st_b:
2390 case Intrinsic::mips_st_h:
2391 case Intrinsic::mips_st_w:
2392 case Intrinsic::mips_st_d:
2393 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2394 }
2395}
2396
2397// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2398//
2399// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2400// choose to sign-extend but we could have equally chosen zero-extend. The
2401// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2402// result into this node later (possibly changing it to a zero-extend in the
2403// process).
2404SDValue MipsSETargetLowering::
2405lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2406 SDLoc DL(Op);
2407 EVT ResTy = Op->getValueType(0);
2408 SDValue Op0 = Op->getOperand(0);
2409 EVT VecTy = Op0->getValueType(0);
2410
2411 if (!VecTy.is128BitVector())
2412 return SDValue();
2413
2414 if (ResTy.isInteger()) {
2415 SDValue Op1 = Op->getOperand(1);
2416 EVT EltTy = VecTy.getVectorElementType();
2417 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
2418 DAG.getValueType(EltTy));
2419 }
2420
2421 return Op;
2422}
2423
2424static bool isConstantOrUndef(const SDValue Op) {
2425 if (Op->isUndef())
2426 return true;
2427 if (isa<ConstantSDNode>(Op))
2428 return true;
2429 if (isa<ConstantFPSDNode>(Op))
2430 return true;
2431 return false;
2432}
2433
2435 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2436 if (isConstantOrUndef(Op->getOperand(i)))
2437 return true;
2438 return false;
2439}
2440
2441// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2442// backend.
2443//
2444// Lowers according to the following rules:
2445// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2446// 2 less than or equal to 64 and the value fits into a signed 10-bit
2447// immediate
2448// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2449// is a power of 2 less than or equal to 64 and the value does not fit into a
2450// signed 10-bit immediate
2451// - Non-constant splats are legal as-is.
2452// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2453// - All others are illegal and must be expanded.
2454SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2455 SelectionDAG &DAG) const {
2456 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2457 EVT ResTy = Op->getValueType(0);
2458 SDLoc DL(Op);
2459 APInt SplatValue, SplatUndef;
2460 unsigned SplatBitSize;
2461 bool HasAnyUndefs;
2462
2463 if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2464 return SDValue();
2465
2466 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2467 HasAnyUndefs, 8,
2468 !Subtarget.isLittle()) && SplatBitSize <= 64) {
2469 // We can only cope with 8, 16, 32, or 64-bit elements
2470 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2471 SplatBitSize != 64)
2472 return SDValue();
2473
2474 // If the value isn't an integer type we will have to bitcast
2475 // from an integer type first. Also, if there are any undefs, we must
2476 // lower them to defined values first.
2477 if (ResTy.isInteger() && !HasAnyUndefs)
2478 return Op;
2479
2480 EVT ViaVecTy;
2481
2482 switch (SplatBitSize) {
2483 default:
2484 return SDValue();
2485 case 8:
2486 ViaVecTy = MVT::v16i8;
2487 break;
2488 case 16:
2489 ViaVecTy = MVT::v8i16;
2490 break;
2491 case 32:
2492 ViaVecTy = MVT::v4i32;
2493 break;
2494 case 64:
2495 // There's no fill.d to fall back on for 64-bit values
2496 return SDValue();
2497 }
2498
2499 // SelectionDAG::getConstant will promote SplatValue appropriately.
2500 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2501
2502 // Bitcast to the type we originally wanted
2503 if (ViaVecTy != ResTy)
2504 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2505
2506 return Result;
2507 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
2508 return Op;
2510 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2511 // The resulting code is the same length as the expansion, but it doesn't
2512 // use memory operations
2513 EVT ResTy = Node->getValueType(0);
2514
2515 assert(ResTy.isVector());
2516
2517 unsigned NumElts = ResTy.getVectorNumElements();
2518 SDValue Vector = DAG.getUNDEF(ResTy);
2519 for (unsigned i = 0; i < NumElts; ++i) {
2521 Node->getOperand(i),
2522 DAG.getConstant(i, DL, MVT::i32));
2523 }
2524 return Vector;
2525 }
2526
2527 return SDValue();
2528}
2529
2530// Lower VECTOR_SHUFFLE into SHF (if possible).
2531//
2532// SHF splits the vector into blocks of four elements, then shuffles these
2533// elements according to a <4 x i2> constant (encoded as an integer immediate).
2534//
2535// It is therefore possible to lower into SHF when the mask takes the form:
2536// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2537// When undef's appear they are treated as if they were whatever value is
2538// necessary in order to fit the above forms.
2539//
2540// For example:
2541// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2542// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2543// i32 7, i32 6, i32 5, i32 4>
2544// is lowered to:
2545// (SHF_H $w0, $w1, 27)
2546// where the 27 comes from:
2547// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2549 SmallVector<int, 16> Indices,
2550 SelectionDAG &DAG) {
2551 int SHFIndices[4] = { -1, -1, -1, -1 };
2552
2553 if (Indices.size() < 4)
2554 return SDValue();
2555
2556 for (unsigned i = 0; i < 4; ++i) {
2557 for (unsigned j = i; j < Indices.size(); j += 4) {
2558 int Idx = Indices[j];
2559
2560 // Convert from vector index to 4-element subvector index
2561 // If an index refers to an element outside of the subvector then give up
2562 if (Idx != -1) {
2563 Idx -= 4 * (j / 4);
2564 if (Idx < 0 || Idx >= 4)
2565 return SDValue();
2566 }
2567
2568 // If the mask has an undef, replace it with the current index.
2569 // Note that it might still be undef if the current index is also undef
2570 if (SHFIndices[i] == -1)
2571 SHFIndices[i] = Idx;
2572
2573 // Check that non-undef values are the same as in the mask. If they
2574 // aren't then give up
2575 if (!(Idx == -1 || Idx == SHFIndices[i]))
2576 return SDValue();
2577 }
2578 }
2579
2580 // Calculate the immediate. Replace any remaining undefs with zero
2581 APInt Imm(32, 0);
2582 for (int i = 3; i >= 0; --i) {
2583 int Idx = SHFIndices[i];
2584
2585 if (Idx == -1)
2586 Idx = 0;
2587
2588 Imm <<= 2;
2589 Imm |= Idx & 0x3;
2590 }
2591
2592 SDLoc DL(Op);
2593 return DAG.getNode(MipsISD::SHF, DL, ResTy,
2594 DAG.getTargetConstant(Imm, DL, MVT::i32),
2595 Op->getOperand(0));
2596}
2597
2598/// Determine whether a range fits a regular pattern of values.
2599/// This function accounts for the possibility of jumping over the End iterator.
2600template <typename ValType>
2601static bool
2603 unsigned CheckStride,
2605 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2606 auto &I = Begin;
2607
2608 while (I != End) {
2609 if (*I != -1 && *I != ExpectedIndex)
2610 return false;
2611 ExpectedIndex += ExpectedIndexStride;
2612
2613 // Incrementing past End is undefined behaviour so we must increment one
2614 // step at a time and check for End at each step.
2615 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
2616 ; // Empty loop body.
2617 }
2618 return true;
2619}
2620
2621// Determine whether VECTOR_SHUFFLE is a SPLATI.
2622//
2623// It is a SPLATI when the mask is:
2624// <x, x, x, ...>
2625// where x is any valid index.
2626//
2627// When undef's appear in the mask they are treated as if they were whatever
2628// value is necessary in order to fit the above form.
2630 SmallVector<int, 16> Indices,
2631 SelectionDAG &DAG) {
2632 assert((Indices.size() % 2) == 0);
2633
2634 int SplatIndex = -1;
2635 for (const auto &V : Indices) {
2636 if (V != -1) {
2637 SplatIndex = V;
2638 break;
2639 }
2640 }
2641
2642 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex,
2643 0);
2644}
2645
2646// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2647//
2648// ILVEV interleaves the even elements from each vector.
2649//
2650// It is possible to lower into ILVEV when the mask consists of two of the
2651// following forms interleaved:
2652// <0, 2, 4, ...>
2653// <n, n+2, n+4, ...>
2654// where n is the number of elements in the vector.
2655// For example:
2656// <0, 0, 2, 2, 4, 4, ...>
2657// <0, n, 2, n+2, 4, n+4, ...>
2658//
2659// When undef's appear in the mask they are treated as if they were whatever
2660// value is necessary in order to fit the above forms.
2662 SmallVector<int, 16> Indices,
2663 SelectionDAG &DAG) {
2664 assert((Indices.size() % 2) == 0);
2665
2666 SDValue Wt;
2667 SDValue Ws;
2668 const auto &Begin = Indices.begin();
2669 const auto &End = Indices.end();
2670
2671 // Check even elements are taken from the even elements of one half or the
2672 // other and pick an operand accordingly.
2673 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2674 Wt = Op->getOperand(0);
2675 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2))
2676 Wt = Op->getOperand(1);
2677 else
2678 return SDValue();
2679
2680 // Check odd elements are taken from the even elements of one half or the
2681 // other and pick an operand accordingly.
2682 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2683 Ws = Op->getOperand(0);
2684 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2))
2685 Ws = Op->getOperand(1);
2686 else
2687 return SDValue();
2688
2689 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt);
2690}
2691
2692// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2693//
2694// ILVOD interleaves the odd elements from each vector.
2695//
2696// It is possible to lower into ILVOD when the mask consists of two of the
2697// following forms interleaved:
2698// <1, 3, 5, ...>
2699// <n+1, n+3, n+5, ...>
2700// where n is the number of elements in the vector.
2701// For example:
2702// <1, 1, 3, 3, 5, 5, ...>
2703// <1, n+1, 3, n+3, 5, n+5, ...>
2704//
2705// When undef's appear in the mask they are treated as if they were whatever
2706// value is necessary in order to fit the above forms.
2708 SmallVector<int, 16> Indices,
2709 SelectionDAG &DAG) {
2710 assert((Indices.size() % 2) == 0);
2711
2712 SDValue Wt;
2713 SDValue Ws;
2714 const auto &Begin = Indices.begin();
2715 const auto &End = Indices.end();
2716
2717 // Check even elements are taken from the odd elements of one half or the
2718 // other and pick an operand accordingly.
2719 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2720 Wt = Op->getOperand(0);
2721 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2))
2722 Wt = Op->getOperand(1);
2723 else
2724 return SDValue();
2725
2726 // Check odd elements are taken from the odd elements of one half or the
2727 // other and pick an operand accordingly.
2728 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2729 Ws = Op->getOperand(0);
2730 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2))
2731 Ws = Op->getOperand(1);
2732 else
2733 return SDValue();
2734
2735 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws);
2736}
2737
2738// Lower VECTOR_SHUFFLE into ILVR (if possible).
2739//
2740// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2741// each vector.
2742//
2743// It is possible to lower into ILVR when the mask consists of two of the
2744// following forms interleaved:
2745// <0, 1, 2, ...>
2746// <n, n+1, n+2, ...>
2747// where n is the number of elements in the vector.
2748// For example:
2749// <0, 0, 1, 1, 2, 2, ...>
2750// <0, n, 1, n+1, 2, n+2, ...>
2751//
2752// When undef's appear in the mask they are treated as if they were whatever
2753// value is necessary in order to fit the above forms.
2755 SmallVector<int, 16> Indices,
2756 SelectionDAG &DAG) {
2757 assert((Indices.size() % 2) == 0);
2758
2759 SDValue Wt;
2760 SDValue Ws;
2761 const auto &Begin = Indices.begin();
2762 const auto &End = Indices.end();
2763
2764 // Check even elements are taken from the right (lowest-indexed) elements of
2765 // one half or the other and pick an operand accordingly.
2766 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2767 Wt = Op->getOperand(0);
2768 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1))
2769 Wt = Op->getOperand(1);
2770 else
2771 return SDValue();
2772
2773 // Check odd elements are taken from the right (lowest-indexed) elements of
2774 // one half or the other and pick an operand accordingly.
2775 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2776 Ws = Op->getOperand(0);
2777 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1))
2778 Ws = Op->getOperand(1);
2779 else
2780 return SDValue();
2781
2782 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt);
2783}
2784
2785// Lower VECTOR_SHUFFLE into ILVL (if possible).
2786//
2787// ILVL interleaves consecutive elements from the left (highest-indexed) half
2788// of each vector.
2789//
2790// It is possible to lower into ILVL when the mask consists of two of the
2791// following forms interleaved:
2792// <x, x+1, x+2, ...>
2793// <n+x, n+x+1, n+x+2, ...>
2794// where n is the number of elements in the vector and x is half n.
2795// For example:
2796// <x, x, x+1, x+1, x+2, x+2, ...>
2797// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2798//
2799// When undef's appear in the mask they are treated as if they were whatever
2800// value is necessary in order to fit the above forms.
2802 SmallVector<int, 16> Indices,
2803 SelectionDAG &DAG) {
2804 assert((Indices.size() % 2) == 0);
2805
2806 unsigned HalfSize = Indices.size() / 2;
2807 SDValue Wt;
2808 SDValue Ws;
2809 const auto &Begin = Indices.begin();
2810 const auto &End = Indices.end();
2811
2812 // Check even elements are taken from the left (highest-indexed) elements of
2813 // one half or the other and pick an operand accordingly.
2814 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2815 Wt = Op->getOperand(0);
2816 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1))
2817 Wt = Op->getOperand(1);
2818 else
2819 return SDValue();
2820
2821 // Check odd elements are taken from the left (highest-indexed) elements of
2822 // one half or the other and pick an operand accordingly.
2823 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2824 Ws = Op->getOperand(0);
2825 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
2826 1))
2827 Ws = Op->getOperand(1);
2828 else
2829 return SDValue();
2830
2831 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt);
2832}
2833
2834// Lower VECTOR_SHUFFLE into PCKEV (if possible).
2835//
2836// PCKEV copies the even elements of each vector into the result vector.
2837//
2838// It is possible to lower into PCKEV when the mask consists of two of the
2839// following forms concatenated:
2840// <0, 2, 4, ...>
2841// <n, n+2, n+4, ...>
2842// where n is the number of elements in the vector.
2843// For example:
2844// <0, 2, 4, ..., 0, 2, 4, ...>
2845// <0, 2, 4, ..., n, n+2, n+4, ...>
2846//
2847// When undef's appear in the mask they are treated as if they were whatever
2848// value is necessary in order to fit the above forms.
2850 SmallVector<int, 16> Indices,
2851 SelectionDAG &DAG) {
2852 assert((Indices.size() % 2) == 0);
2853
2854 SDValue Wt;
2855 SDValue Ws;
2856 const auto &Begin = Indices.begin();
2857 const auto &Mid = Indices.begin() + Indices.size() / 2;
2858 const auto &End = Indices.end();
2859
2860 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2861 Wt = Op->getOperand(0);
2862 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2))
2863 Wt = Op->getOperand(1);
2864 else
2865 return SDValue();
2866
2867 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2868 Ws = Op->getOperand(0);
2869 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2))
2870 Ws = Op->getOperand(1);
2871 else
2872 return SDValue();
2873
2874 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt);
2875}
2876
2877// Lower VECTOR_SHUFFLE into PCKOD (if possible).
2878//
2879// PCKOD copies the odd elements of each vector into the result vector.
2880//
2881// It is possible to lower into PCKOD when the mask consists of two of the
2882// following forms concatenated:
2883// <1, 3, 5, ...>
2884// <n+1, n+3, n+5, ...>
2885// where n is the number of elements in the vector.
2886// For example:
2887// <1, 3, 5, ..., 1, 3, 5, ...>
2888// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2889//
2890// When undef's appear in the mask they are treated as if they were whatever
2891// value is necessary in order to fit the above forms.
2893 SmallVector<int, 16> Indices,
2894 SelectionDAG &DAG) {
2895 assert((Indices.size() % 2) == 0);
2896
2897 SDValue Wt;
2898 SDValue Ws;
2899 const auto &Begin = Indices.begin();
2900 const auto &Mid = Indices.begin() + Indices.size() / 2;
2901 const auto &End = Indices.end();
2902
2903 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2904 Wt = Op->getOperand(0);
2905 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2))
2906 Wt = Op->getOperand(1);
2907 else
2908 return SDValue();
2909
2910 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2911 Ws = Op->getOperand(0);
2912 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2))
2913 Ws = Op->getOperand(1);
2914 else
2915 return SDValue();
2916
2917 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt);
2918}
2919
2920// Lower VECTOR_SHUFFLE into VSHF.
2921//
2922// This mostly consists of converting the shuffle indices in Indices into a
2923// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2924// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2925// if the type is v8i16 and all the indices are less than 8 then the second
2926// operand is unused and can be replaced with anything. We choose to replace it
2927// with the used operand since this reduces the number of instructions overall.
2929 const SmallVector<int, 16> &Indices,
2930 SelectionDAG &DAG) {
2932 SDValue Op0;
2933 SDValue Op1;
2934 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
2935 EVT MaskEltTy = MaskVecTy.getVectorElementType();
2936 bool Using1stVec = false;
2937 bool Using2ndVec = false;
2938 SDLoc DL(Op);
2939 int ResTyNumElts = ResTy.getVectorNumElements();
2940
2941 for (int i = 0; i < ResTyNumElts; ++i) {
2942 // Idx == -1 means UNDEF
2943 int Idx = Indices[i];
2944
2945 if (0 <= Idx && Idx < ResTyNumElts)
2946 Using1stVec = true;
2947 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
2948 Using2ndVec = true;
2949 }
2950
2951 for (int Idx : Indices)
2952 Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy));
2953
2954 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2955
2956 if (Using1stVec && Using2ndVec) {
2957 Op0 = Op->getOperand(0);
2958 Op1 = Op->getOperand(1);
2959 } else if (Using1stVec)
2960 Op0 = Op1 = Op->getOperand(0);
2961 else if (Using2ndVec)
2962 Op0 = Op1 = Op->getOperand(1);
2963 else
2964 llvm_unreachable("shuffle vector mask references neither vector operand?");
2965
2966 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2967 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2968 // VSHF concatenates the vectors in a bitwise fashion:
2969 // <0b00, 0b01> + <0b10, 0b11> ->
2970 // 0b0100 + 0b1110 -> 0b01001110
2971 // <0b10, 0b11, 0b00, 0b01>
2972 // We must therefore swap the operands to get the correct result.
2973 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0);
2974}
2975
2976// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
2977// indices in the shuffle.
2978SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2979 SelectionDAG &DAG) const {
2980 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
2981 EVT ResTy = Op->getValueType(0);
2982
2983 if (!ResTy.is128BitVector())
2984 return SDValue();
2985
2986 int ResTyNumElts = ResTy.getVectorNumElements();
2987 SmallVector<int, 16> Indices;
2988
2989 for (int i = 0; i < ResTyNumElts; ++i)
2990 Indices.push_back(Node->getMaskElt(i));
2991
2992 // splati.[bhwd] is preferable to the others but is matched from
2993 // MipsISD::VSHF.
2994 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
2995 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
2997 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
2998 return Result;
2999 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
3000 return Result;
3001 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
3002 return Result;
3003 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
3004 return Result;
3005 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
3006 return Result;
3007 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
3008 return Result;
3009 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
3010 return Result;
3011 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
3012}
3013
3015MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
3016 MachineBasicBlock *BB) const {
3017 // $bb:
3018 // bposge32_pseudo $vr0
3019 // =>
3020 // $bb:
3021 // bposge32 $tbb
3022 // $fbb:
3023 // li $vr2, 0
3024 // b $sink
3025 // $tbb:
3026 // li $vr1, 1
3027 // $sink:
3028 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3029
3032 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3033 DebugLoc DL = MI.getDebugLoc();
3034 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3036 MachineFunction *F = BB->getParent();
3037 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3038 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3039 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3040 F->insert(It, FBB);
3041 F->insert(It, TBB);
3042 F->insert(It, Sink);
3043
3044 // Transfer the remainder of BB and its successor edges to Sink.
3045 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3046 BB->end());
3047 Sink->transferSuccessorsAndUpdatePHIs(BB);
3048
3049 // Add successors.
3050 BB->addSuccessor(FBB);
3051 BB->addSuccessor(TBB);
3052 FBB->addSuccessor(Sink);
3053 TBB->addSuccessor(Sink);
3054
3055 // Insert the real bposge32 instruction to $BB.
3056 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
3057 // Insert the real bposge32c instruction to $BB.
3058 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB);
3059
3060 // Fill $FBB.
3061 Register VR2 = RegInfo.createVirtualRegister(RC);
3062 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
3063 .addReg(Mips::ZERO).addImm(0);
3064 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3065
3066 // Fill $TBB.
3067 Register VR1 = RegInfo.createVirtualRegister(RC);
3068 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
3069 .addReg(Mips::ZERO).addImm(1);
3070
3071 // Insert phi function to $Sink.
3072 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3073 MI.getOperand(0).getReg())
3074 .addReg(VR2)
3075 .addMBB(FBB)
3076 .addReg(VR1)
3077 .addMBB(TBB);
3078
3079 MI.eraseFromParent(); // The pseudo instruction is gone now.
3080 return Sink;
3081}
3082
3083MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3084 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3085 // $bb:
3086 // vany_nonzero $rd, $ws
3087 // =>
3088 // $bb:
3089 // bnz.b $ws, $tbb
3090 // b $fbb
3091 // $fbb:
3092 // li $rd1, 0
3093 // b $sink
3094 // $tbb:
3095 // li $rd2, 1
3096 // $sink:
3097 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3098
3101 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3102 DebugLoc DL = MI.getDebugLoc();
3103 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3105 MachineFunction *F = BB->getParent();
3106 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3107 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3108 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3109 F->insert(It, FBB);
3110 F->insert(It, TBB);
3111 F->insert(It, Sink);
3112
3113 // Transfer the remainder of BB and its successor edges to Sink.
3114 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3115 BB->end());
3116 Sink->transferSuccessorsAndUpdatePHIs(BB);
3117
3118 // Add successors.
3119 BB->addSuccessor(FBB);
3120 BB->addSuccessor(TBB);
3121 FBB->addSuccessor(Sink);
3122 TBB->addSuccessor(Sink);
3123
3124 // Insert the real bnz.b instruction to $BB.
3125 BuildMI(BB, DL, TII->get(BranchOp))
3126 .addReg(MI.getOperand(1).getReg())
3127 .addMBB(TBB);
3128
3129 // Fill $FBB.
3130 Register RD1 = RegInfo.createVirtualRegister(RC);
3131 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
3132 .addReg(Mips::ZERO).addImm(0);
3133 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3134
3135 // Fill $TBB.
3136 Register RD2 = RegInfo.createVirtualRegister(RC);
3137 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
3138 .addReg(Mips::ZERO).addImm(1);
3139
3140 // Insert phi function to $Sink.
3141 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3142 MI.getOperand(0).getReg())
3143 .addReg(RD1)
3144 .addMBB(FBB)
3145 .addReg(RD2)
3146 .addMBB(TBB);
3147
3148 MI.eraseFromParent(); // The pseudo instruction is gone now.
3149 return Sink;
3150}
3151
3152// Emit the COPY_FW pseudo instruction.
3153//
3154// copy_fw_pseudo $fd, $ws, n
3155// =>
3156// copy_u_w $rt, $ws, $n
3157// mtc1 $rt, $fd
3158//
3159// When n is zero, the equivalent operation can be performed with (potentially)
3160// zero instructions due to register overlaps. This optimization is never valid
3161// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3163MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3164 MachineBasicBlock *BB) const {
3167 DebugLoc DL = MI.getDebugLoc();
3168 Register Fd = MI.getOperand(0).getReg();
3169 Register Ws = MI.getOperand(1).getReg();
3170 unsigned Lane = MI.getOperand(2).getImm();
3171
3172 if (Lane == 0) {
3173 unsigned Wt = Ws;
3174 if (!Subtarget.useOddSPReg()) {
3175 // We must copy to an even-numbered MSA register so that the
3176 // single-precision sub-register is also guaranteed to be even-numbered.
3177 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
3178
3179 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
3180 }
3181
3182 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3183 } else {
3184 Register Wt = RegInfo.createVirtualRegister(
3185 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3186 : &Mips::MSA128WEvensRegClass);
3187
3188 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
3189 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3190 }
3191
3192 MI.eraseFromParent(); // The pseudo instruction is gone now.
3193 return BB;
3194}
3195
3196// Emit the COPY_FD pseudo instruction.
3197//
3198// copy_fd_pseudo $fd, $ws, n
3199// =>
3200// splati.d $wt, $ws, $n
3201// copy $fd, $wt:sub_64
3202//
3203// When n is zero, the equivalent operation can be performed with (potentially)
3204// zero instructions due to register overlaps. This optimization is always
3205// valid because FR=1 mode which is the only supported mode in MSA.
3207MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3208 MachineBasicBlock *BB) const {
3210
3213 Register Fd = MI.getOperand(0).getReg();
3214 Register Ws = MI.getOperand(1).getReg();
3215 unsigned Lane = MI.getOperand(2).getImm() * 2;
3216 DebugLoc DL = MI.getDebugLoc();
3217
3218 if (Lane == 0)
3219 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64);
3220 else {
3221 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3222
3223 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
3224 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64);
3225 }
3226
3227 MI.eraseFromParent(); // The pseudo instruction is gone now.
3228 return BB;
3229}
3230
3231// Emit the INSERT_FW pseudo instruction.
3232//
3233// insert_fw_pseudo $wd, $wd_in, $n, $fs
3234// =>
3235// subreg_to_reg $wt:sub_lo, $fs
3236// insve_w $wd[$n], $wd_in, $wt[0]
3238MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3239 MachineBasicBlock *BB) const {
3242 DebugLoc DL = MI.getDebugLoc();
3243 Register Wd = MI.getOperand(0).getReg();
3244 Register Wd_in = MI.getOperand(1).getReg();
3245 unsigned Lane = MI.getOperand(2).getImm();
3246 Register Fs = MI.getOperand(3).getReg();
3247 Register Wt = RegInfo.createVirtualRegister(
3248 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3249 : &Mips::MSA128WEvensRegClass);
3250
3251 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3252 .addImm(0)
3253 .addReg(Fs)
3254 .addImm(Mips::sub_lo);
3255 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
3256 .addReg(Wd_in)
3257 .addImm(Lane)
3258 .addReg(Wt)
3259 .addImm(0);
3260
3261 MI.eraseFromParent(); // The pseudo instruction is gone now.
3262 return BB;
3263}
3264
3265// Emit the INSERT_FD pseudo instruction.
3266//
3267// insert_fd_pseudo $wd, $fs, n
3268// =>
3269// subreg_to_reg $wt:sub_64, $fs
3270// insve_d $wd[$n], $wd_in, $wt[0]
3272MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3273 MachineBasicBlock *BB) const {
3275
3278 DebugLoc DL = MI.getDebugLoc();
3279 Register Wd = MI.getOperand(0).getReg();
3280 Register Wd_in = MI.getOperand(1).getReg();
3281 unsigned Lane = MI.getOperand(2).getImm();
3282 Register Fs = MI.getOperand(3).getReg();
3283 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3284
3285 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3286 .addImm(0)
3287 .addReg(Fs)
3288 .addImm(Mips::sub_64);
3289 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
3290 .addReg(Wd_in)
3291 .addImm(Lane)
3292 .addReg(Wt)
3293 .addImm(0);
3294
3295 MI.eraseFromParent(); // The pseudo instruction is gone now.
3296 return BB;
3297}
3298
3299// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3300//
3301// For integer:
3302// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3303// =>
3304// (SLL $lanetmp1, $lane, <log2size)
3305// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3306// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3307// (NEG $lanetmp2, $lanetmp1)
3308// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3309//
3310// For floating point:
3311// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3312// =>
3313// (SUBREG_TO_REG $wt, $fs, <subreg>)
3314// (SLL $lanetmp1, $lane, <log2size)
3315// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3316// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3317// (NEG $lanetmp2, $lanetmp1)
3318// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3319MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3320 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3321 bool IsFP) const {
3324 DebugLoc DL = MI.getDebugLoc();
3325 Register Wd = MI.getOperand(0).getReg();
3326 Register SrcVecReg = MI.getOperand(1).getReg();
3327 Register LaneReg = MI.getOperand(2).getReg();
3328 Register SrcValReg = MI.getOperand(3).getReg();
3329
3330 const TargetRegisterClass *VecRC = nullptr;
3331 // FIXME: This should be true for N32 too.
3332 const TargetRegisterClass *GPRRC =
3333 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3334 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0;
3335 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL;
3336 unsigned EltLog2Size;
3337 unsigned InsertOp = 0;
3338 unsigned InsveOp = 0;
3339 switch (EltSizeInBytes) {
3340 default:
3341 llvm_unreachable("Unexpected size");
3342 case 1:
3343 EltLog2Size = 0;
3344 InsertOp = Mips::INSERT_B;
3345 InsveOp = Mips::INSVE_B;
3346 VecRC = &Mips::MSA128BRegClass;
3347 break;
3348 case 2:
3349 EltLog2Size = 1;
3350 InsertOp = Mips::INSERT_H;
3351 InsveOp = Mips::INSVE_H;
3352 VecRC = &Mips::MSA128HRegClass;
3353 break;
3354 case 4:
3355 EltLog2Size = 2;
3356 InsertOp = Mips::INSERT_W;
3357 InsveOp = Mips::INSVE_W;
3358 VecRC = &Mips::MSA128WRegClass;
3359 break;
3360 case 8:
3361 EltLog2Size = 3;
3362 InsertOp = Mips::INSERT_D;
3363 InsveOp = Mips::INSVE_D;
3364 VecRC = &Mips::MSA128DRegClass;
3365 break;
3366 }
3367
3368 if (IsFP) {
3369 Register Wt = RegInfo.createVirtualRegister(VecRC);
3370 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3371 .addImm(0)
3372 .addReg(SrcValReg)
3373 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
3374 SrcValReg = Wt;
3375 }
3376
3377 // Convert the lane index into a byte index
3378 if (EltSizeInBytes != 1) {
3379 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
3380 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1)
3381 .addReg(LaneReg)
3382 .addImm(EltLog2Size);
3383 LaneReg = LaneTmp1;
3384 }
3385
3386 // Rotate bytes around so that the desired lane is element zero
3387 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC);
3388 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1)
3389 .addReg(SrcVecReg)
3390 .addReg(SrcVecReg)
3391 .addReg(LaneReg, 0, SubRegIdx);
3392
3393 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC);
3394 if (IsFP) {
3395 // Use insve.df to insert to element zero
3396 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2)
3397 .addReg(WdTmp1)
3398 .addImm(0)
3399 .addReg(SrcValReg)
3400 .addImm(0);
3401 } else {
3402 // Use insert.df to insert to element zero
3403 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2)
3404 .addReg(WdTmp1)
3405 .addReg(SrcValReg)
3406 .addImm(0);
3407 }
3408
3409 // Rotate elements the rest of the way for a full rotation.
3410 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3411 // the lane index to do this.
3412 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
3413 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
3414 LaneTmp2)
3415 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
3416 .addReg(LaneReg);
3417 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd)
3418 .addReg(WdTmp2)
3419 .addReg(WdTmp2)
3420 .addReg(LaneTmp2, 0, SubRegIdx);
3421
3422 MI.eraseFromParent(); // The pseudo instruction is gone now.
3423 return BB;
3424}
3425
3426// Emit the FILL_FW pseudo instruction.
3427//
3428// fill_fw_pseudo $wd, $fs
3429// =>
3430// implicit_def $wt1
3431// insert_subreg $wt2:subreg_lo, $wt1, $fs
3432// splati.w $wd, $wt2[0]
3434MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3435 MachineBasicBlock *BB) const {
3438 DebugLoc DL = MI.getDebugLoc();
3439 Register Wd = MI.getOperand(0).getReg();
3440 Register Fs = MI.getOperand(1).getReg();
3441 Register Wt1 = RegInfo.createVirtualRegister(
3442 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3443 : &Mips::MSA128WEvensRegClass);
3444 Register Wt2 = RegInfo.createVirtualRegister(
3445 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3446 : &Mips::MSA128WEvensRegClass);
3447
3448 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3449 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3450 .addReg(Wt1)
3451 .addReg(Fs)
3452 .addImm(Mips::sub_lo);
3453 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
3454
3455 MI.eraseFromParent(); // The pseudo instruction is gone now.
3456 return BB;
3457}
3458
3459// Emit the FILL_FD pseudo instruction.
3460//
3461// fill_fd_pseudo $wd, $fs
3462// =>
3463// implicit_def $wt1
3464// insert_subreg $wt2:subreg_64, $wt1, $fs
3465// splati.d $wd, $wt2[0]
3467MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3468 MachineBasicBlock *BB) const {
3470
3473 DebugLoc DL = MI.getDebugLoc();
3474 Register Wd = MI.getOperand(0).getReg();
3475 Register Fs = MI.getOperand(1).getReg();
3476 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3477 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3478
3479 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3480 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3481 .addReg(Wt1)
3482 .addReg(Fs)
3483 .addImm(Mips::sub_64);
3484 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
3485
3486 MI.eraseFromParent(); // The pseudo instruction is gone now.
3487 return BB;
3488}
3489
3490// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3491// register.
3492//
3493// STF16 MSA128F16:$wd, mem_simm10:$addr
3494// =>
3495// copy_u.h $rtemp,$wd[0]
3496// sh $rtemp, $addr
3497//
3498// Safety: We can't use st.h & co as they would over write the memory after
3499// the destination. It would require half floats be allocated 16 bytes(!) of
3500// space.
3502MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
3503 MachineBasicBlock *BB) const {
3504
3507 DebugLoc DL = MI.getDebugLoc();
3508 Register Ws = MI.getOperand(0).getReg();
3509 Register Rt = MI.getOperand(1).getReg();
3510 const MachineMemOperand &MMO = **MI.memoperands_begin();
3511 unsigned Imm = MMO.getOffset();
3512
3513 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3514 // spill and reload can expand as a GPR64 operand. Examine the
3515 // operand in detail and default to ABI.
3516 const TargetRegisterClass *RC =
3517 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3518 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3519 : &Mips::GPR64RegClass);
3520 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3521 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3522
3523 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
3524 if(!UsingMips32) {
3525 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
3526 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp)
3527 .addImm(0)
3528 .addReg(Rs)
3529 .addImm(Mips::sub_32);
3530 Rs = Tmp;
3531 }
3532 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
3533 .addReg(Rs)
3534 .addReg(Rt)
3535 .addImm(Imm)
3537 &MMO, MMO.getOffset(), MMO.getSize()));
3538
3539 MI.eraseFromParent();
3540 return BB;
3541}
3542
3543// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3544//
3545// LD_F16 MSA128F16:$wd, mem_simm10:$addr
3546// =>
3547// lh $rtemp, $addr
3548// fill.h $wd, $rtemp
3549//
3550// Safety: We can't use ld.h & co as they over-read from the source.
3551// Additionally, if the address is not modulo 16, 2 cases can occur:
3552// a) Segmentation fault as the load instruction reads from a memory page
3553// memory it's not supposed to.
3554// b) The load crosses an implementation specific boundary, requiring OS
3555// intervention.
3557MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
3558 MachineBasicBlock *BB) const {
3559
3562 DebugLoc DL = MI.getDebugLoc();
3563 Register Wd = MI.getOperand(0).getReg();
3564
3565 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3566 // spill and reload can expand as a GPR64 operand. Examine the
3567 // operand in detail and default to ABI.
3568 const TargetRegisterClass *RC =
3569 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3570 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3571 : &Mips::GPR64RegClass);
3572
3573 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3574 Register Rt = RegInfo.createVirtualRegister(RC);
3575
3577 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
3578 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
3579 MIB.add(MO);
3580
3581 if(!UsingMips32) {
3582 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3583 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32);
3584 Rt = Tmp;
3585 }
3586
3587 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
3588
3589 MI.eraseFromParent();
3590 return BB;
3591}
3592
3593// Emit the FPROUND_PSEUDO instruction.
3594//
3595// Round an FGR64Opnd, FGR32Opnd to an f16.
3596//
3597// Safety: Cycle the operand through the GPRs so the result always ends up
3598// the correct MSA register.
3599//
3600// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3601// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3602// (which they can be, as the MSA registers are defined to alias the
3603// FPU's 64 bit and 32 bit registers) the result can be accessed using
3604// the correct register class. That requires operands be tie-able across
3605// register classes which have a sub/super register class relationship.
3606//
3607// For FPG32Opnd:
3608//
3609// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3610// =>
3611// mfc1 $rtemp, $fs
3612// fill.w $rtemp, $wtemp
3613// fexdo.w $wd, $wtemp, $wtemp
3614//
3615// For FPG64Opnd on mips32r2+:
3616//
3617// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3618// =>
3619// mfc1 $rtemp, $fs
3620// fill.w $rtemp, $wtemp
3621// mfhc1 $rtemp2, $fs
3622// insert.w $wtemp[1], $rtemp2
3623// insert.w $wtemp[3], $rtemp2
3624// fexdo.w $wtemp2, $wtemp, $wtemp
3625// fexdo.h $wd, $temp2, $temp2
3626//
3627// For FGR64Opnd on mips64r2+:
3628//
3629// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3630// =>
3631// dmfc1 $rtemp, $fs
3632// fill.d $rtemp, $wtemp
3633// fexdo.w $wtemp2, $wtemp, $wtemp
3634// fexdo.h $wd, $wtemp2, $wtemp2
3635//
3636// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3637// undef bits are "just right" and the exception enable bits are
3638// set. By using fill.w to replicate $fs into all elements over
3639// insert.w for one element, we avoid that potiential case. If
3640// fexdo.[hw] causes an exception in, the exception is valid and it
3641// occurs for all elements.
3643MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
3645 bool IsFGR64) const {
3646
3647 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3648 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3649 // it.
3651
3652 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3653 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3654
3656 DebugLoc DL = MI.getDebugLoc();
3657 Register Wd = MI.getOperand(0).getReg();
3658 Register Fs = MI.getOperand(1).getReg();
3659
3661 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3662 const TargetRegisterClass *GPRRC =
3663 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3664 unsigned MFC1Opc = IsFGR64onMips64
3665 ? Mips::DMFC1
3666 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1);
3667 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
3668
3669 // Perform the register class copy as mentioned above.
3670 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3671 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
3672 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
3673 unsigned WPHI = Wtemp;
3674
3675 if (IsFGR64onMips32) {
3676 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3677 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
3678 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3679 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3680 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
3681 .addReg(Wtemp)
3682 .addReg(Rtemp2)
3683 .addImm(1);
3684 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3)
3685 .addReg(Wtemp2)
3686 .addReg(Rtemp2)
3687 .addImm(3);
3688 WPHI = Wtemp3;
3689 }
3690
3691 if (IsFGR64) {
3692 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3693 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
3694 .addReg(WPHI)
3695 .addReg(WPHI);
3696 WPHI = Wtemp2;
3697 }
3698
3699 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI);
3700
3701 MI.eraseFromParent();
3702 return BB;
3703}
3704
3705// Emit the FPEXTEND_PSEUDO instruction.
3706//
3707// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3708//
3709// Safety: Cycle the result through the GPRs so the result always ends up
3710// the correct floating point register.
3711//
3712// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3713// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3714// (which they can be, as the MSA registers are defined to alias the
3715// FPU's 64 bit and 32 bit registers) the result can be accessed using
3716// the correct register class. That requires operands be tie-able across
3717// register classes which have a sub/super register class relationship. I
3718// haven't checked.
3719//
3720// For FGR32Opnd:
3721//
3722// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3723// =>
3724// fexupr.w $wtemp, $ws
3725// copy_s.w $rtemp, $ws[0]
3726// mtc1 $rtemp, $fd
3727//
3728// For FGR64Opnd on Mips64:
3729//
3730// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3731// =>
3732// fexupr.w $wtemp, $ws
3733// fexupr.d $wtemp2, $wtemp
3734// copy_s.d $rtemp, $wtemp2s[0]
3735// dmtc1 $rtemp, $fd
3736//
3737// For FGR64Opnd on Mips32:
3738//
3739// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3740// =>
3741// fexupr.w $wtemp, $ws
3742// fexupr.d $wtemp2, $wtemp
3743// copy_s.w $rtemp, $wtemp2[0]
3744// mtc1 $rtemp, $ftemp
3745// copy_s.w $rtemp2, $wtemp2[1]
3746// $fd = mthc1 $rtemp2, $ftemp
3748MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
3750 bool IsFGR64) const {
3751
3752 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3753 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3754 // it.
3756
3757 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3758 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3759
3761 DebugLoc DL = MI.getDebugLoc();
3762 Register Fd = MI.getOperand(0).getReg();
3763 Register Ws = MI.getOperand(1).getReg();
3764
3766 const TargetRegisterClass *GPRRC =
3767 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3768 unsigned MTC1Opc = IsFGR64onMips64
3769 ? Mips::DMTC1
3770 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
3771 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
3772
3773 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3774 Register WPHI = Wtemp;
3775
3776 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
3777 if (IsFGR64) {
3778 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3779 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp);
3780 }
3781
3782 // Perform the safety regclass copy mentioned above.
3783 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3784 Register FPRPHI = IsFGR64onMips32
3785 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
3786 : Fd;
3787 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
3788 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
3789
3790 if (IsFGR64onMips32) {
3791 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3792 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
3793 .addReg(WPHI)
3794 .addImm(1);
3795 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
3796 .addReg(FPRPHI)
3797 .addReg(Rtemp2);
3798 }
3799
3800 MI.eraseFromParent();
3801 return BB;
3802}
3803
3804// Emit the FEXP2_W_1 pseudo instructions.
3805//
3806// fexp2_w_1_pseudo $wd, $wt
3807// =>
3808// ldi.w $ws, 1
3809// fexp2.w $wd, $ws, $wt
3811MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
3812 MachineBasicBlock *BB) const {
3815 const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
3816 Register Ws1 = RegInfo.createVirtualRegister(RC);
3817 Register Ws2 = RegInfo.createVirtualRegister(RC);
3818 DebugLoc DL = MI.getDebugLoc();
3819
3820 // Splat 1.0 into a vector
3821 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
3822 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
3823
3824 // Emit 1.0 * fexp2(Wt)
3825 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg())
3826 .addReg(Ws2)
3827 .addReg(MI.getOperand(1).getReg());
3828
3829 MI.eraseFromParent(); // The pseudo instruction is gone now.
3830 return BB;
3831}
3832
3833// Emit the FEXP2_D_1 pseudo instructions.
3834//
3835// fexp2_d_1_pseudo $wd, $wt
3836// =>
3837// ldi.d $ws, 1
3838// fexp2.d $wd, $ws, $wt
3840MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
3841 MachineBasicBlock *BB) const {
3844 const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
3845 Register Ws1 = RegInfo.createVirtualRegister(RC);
3846 Register Ws2 = RegInfo.createVirtualRegister(RC);
3847 DebugLoc DL = MI.getDebugLoc();
3848
3849 // Splat 1.0 into a vector
3850 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
3851 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
3852
3853 // Emit 1.0 * fexp2(Wt)
3854 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg())
3855 .addReg(Ws2)
3856 .addReg(MI.getOperand(1).getReg());
3857
3858 MI.eraseFromParent(); // The pseudo instruction is gone now.
3859 return BB;
3860}
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
unsigned Intr
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndef(const SDValue Op)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc, SDValue Imm, bool BigEndian)
static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static cl::opt< bool > NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), cl::desc("Expand double precision loads and " "stores to their single precision " "counterparts"))
static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, bool BigEndian, SelectionDAG &DAG)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian)
static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG)
static bool isBitwiseInverse(SDValue N, SDValue OfNode)
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, const SmallVector< int, 16 > &Indices, SelectionDAG &DAG)
static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static bool isVectorAllOnes(SDValue N)
static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC)
static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG)
static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG)
static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG)
static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static cl::opt< bool > UseMipsTailCalls("mips-tail-calls", cl::Hidden, cl::desc("MIPS: permit tail calls."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
support::ulittle32_t & Wd
Definition: aarch32.cpp:227
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
unsigned logBase2() const
Definition: APInt.h:1719
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getInRegsParamsCount() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const Triple & getTargetTriple() const
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
Flags getFlags() const
Return the raw flags of the source value,.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MipsFunctionInfo - This class is derived from MachineFunction private Mips target-specific informatio...
unsigned getIncomingArgSize() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given floating-point type and Register class.
void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given integer type and Register class.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
const TargetRegisterClass * getRepRegClassFor(MVT VT) const override
Return the 'representative' register class for the specified value type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.