LLVM 18.0.0git
MipsSEISelLowering.cpp
Go to the documentation of this file.
1//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Subclass of MipsTargetLowering specialized for mips32/64.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MipsSEISelLowering.h"
14#include "MipsMachineFunction.h"
15#include "MipsRegisterInfo.h"
16#include "MipsSubtarget.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/STLExtras.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Intrinsics.h"
37#include "llvm/IR/IntrinsicsMips.h"
40#include "llvm/Support/Debug.h"
45#include <algorithm>
46#include <cassert>
47#include <cstdint>
48#include <iterator>
49#include <utility>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "mips-isel"
54
55static cl::opt<bool>
56UseMipsTailCalls("mips-tail-calls", cl::Hidden,
57 cl::desc("MIPS: permit tail calls."), cl::init(false));
58
59static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
60 cl::desc("Expand double precision loads and "
61 "stores to their single precision "
62 "counterparts"));
63
65 const MipsSubtarget &STI)
66 : MipsTargetLowering(TM, STI) {
67 // Set up the register classes
68 addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
69
70 if (Subtarget.isGP64bit())
71 addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
72
73 if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
74 // Expand all truncating stores and extending loads.
77 setTruncStoreAction(VT0, VT1, Expand);
81 }
82 }
83 }
84
85 if (Subtarget.hasDSP()) {
86 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
87
88 for (const auto &VecTy : VecTys) {
89 addRegisterClass(VecTy, &Mips::DSPRRegClass);
90
91 // Expand all builtin opcodes.
92 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
93 setOperationAction(Opc, VecTy, Expand);
94
100 }
101
104
105 if (Subtarget.hasMips32r2()) {
108 }
109 }
110
111 if (Subtarget.hasDSPR2())
112 setOperationAction(ISD::MUL, MVT::v2i16, Legal);
113
114 if (Subtarget.hasMSA()) {
115 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
116 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
117 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
118 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
119 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
120 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
121 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
122
123 // f16 is a storage-only type, always promote it to f32.
124 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass);
160
162 }
163
164 if (!Subtarget.useSoftFloat()) {
165 addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
166
167 // When dealing with single precision only, use libcalls
168 if (!Subtarget.isSingleFloat()) {
169 if (Subtarget.isFP64bit())
170 addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
171 else
172 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
173 }
174 }
175
180
181 if (Subtarget.hasCnMips())
183 else if (Subtarget.isGP64bit())
185
186 if (Subtarget.isGP64bit()) {
193 }
194
197
203
205
209
211 !Subtarget.hasMips64()) {
213 }
214
215 if (NoDPLoadStore) {
218 }
219
220 if (Subtarget.hasMips32r6()) {
221 // MIPS32r6 replaces the accumulator-based multiplies with a three register
222 // instruction
228
229 // MIPS32r6 replaces the accumulator-based division/remainder with separate
230 // three register division and remainder instructions.
237
238 // MIPS32r6 replaces conditional moves with an equivalent that removes the
239 // need for three GPR read ports.
243
247
248 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
252
254
255 // Floating point > and >= are supported via < and <=
260
265 }
266
267 if (Subtarget.hasMips64r6()) {
268 // MIPS64r6 replaces the accumulator-based multiplies with a three register
269 // instruction
275
276 // MIPS32r6 replaces the accumulator-based division/remainder with separate
277 // three register division and remainder instructions.
284
285 // MIPS64r6 replaces conditional moves with an equivalent that removes the
286 // need for three GPR read ports.
290 }
291
293}
294
295const MipsTargetLowering *
297 const MipsSubtarget &STI) {
298 return new MipsSETargetLowering(TM, STI);
299}
300
303 if (VT == MVT::Untyped)
304 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
305
307}
308
309// Enable MSA support for the given integer type and Register class.
312 addRegisterClass(Ty, RC);
313
314 // Expand all builtin opcodes.
315 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
316 setOperationAction(Opc, Ty, Expand);
317
325
347
348 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
353 }
354
361}
362
363// Enable MSA support for the given floating-point type and Register class.
366 addRegisterClass(Ty, RC);
367
368 // Expand all builtin opcodes.
369 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
370 setOperationAction(Opc, Ty, Expand);
371
378
379 if (Ty != MVT::v8f16) {
391
399 }
400}
401
402SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
405
406 EVT ResTy = Op->getValueType(0);
407 SDLoc DL(Op);
408
409 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
410 // floating point register are undefined. Not really an issue as sel.d, which
411 // is produced from an FSELECT node, only looks at bit 0.
412 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0));
413 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1),
414 Op->getOperand(2));
415}
416
418 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
420
422 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
423 // implementation defined whether this is handled by hardware, software, or
424 // a hybrid of the two but it's expected that most implementations will
425 // handle the majority of cases in hardware.
426 if (Fast)
427 *Fast = 1;
428 return true;
429 }
430
431 switch (SVT) {
432 case MVT::i64:
433 case MVT::i32:
434 if (Fast)
435 *Fast = 1;
436 return true;
437 default:
438 return false;
439 }
440}
441
443 SelectionDAG &DAG) const {
444 switch(Op.getOpcode()) {
445 case ISD::LOAD: return lowerLOAD(Op, DAG);
446 case ISD::STORE: return lowerSTORE(Op, DAG);
447 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
448 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
449 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
450 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
451 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
452 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
453 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
454 DAG);
455 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
456 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
457 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
458 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
459 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
460 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
461 case ISD::SELECT: return lowerSELECT(Op, DAG);
462 case ISD::BITCAST: return lowerBITCAST(Op, DAG);
463 }
464
466}
467
468// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
469//
470// Performs the following transformations:
471// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
472// sign/zero-extension is completely overwritten by the new one performed by
473// the ISD::AND.
474// - Removes redundant zero extensions performed by an ISD::AND.
477 const MipsSubtarget &Subtarget) {
478 if (!Subtarget.hasMSA())
479 return SDValue();
480
481 SDValue Op0 = N->getOperand(0);
482 SDValue Op1 = N->getOperand(1);
483 unsigned Op0Opcode = Op0->getOpcode();
484
485 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
486 // where $d + 1 == 2^n and n == 32
487 // or $d + 1 == 2^n and n <= 32 and ZExt
488 // -> (MipsVExtractZExt $a, $b, $c)
489 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
490 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
491 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1);
492
493 if (!Mask)
494 return SDValue();
495
496 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
497
498 if (Log2IfPositive <= 0)
499 return SDValue(); // Mask+1 is not a power of 2
500
501 SDValue Op0Op2 = Op0->getOperand(2);
502 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
503 unsigned ExtendTySize = ExtendTy.getSizeInBits();
504 unsigned Log2 = Log2IfPositive;
505
506 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
507 Log2 == ExtendTySize) {
508 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
510 Op0->getVTList(),
511 ArrayRef(Ops, Op0->getNumOperands()));
512 }
513 }
514
515 return SDValue();
516}
517
518// Determine if the specified node is a constant vector splat.
519//
520// Returns true and sets Imm if:
521// * N is a ISD::BUILD_VECTOR representing a constant splat
522//
523// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
524// differences are that it assumes the MSA has already been checked and the
525// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
526// must not be in order for binsri.d to be selectable).
527static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
528 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode());
529
530 if (!Node)
531 return false;
532
533 APInt SplatValue, SplatUndef;
534 unsigned SplatBitSize;
535 bool HasAnyUndefs;
536
537 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
538 8, !IsLittleEndian))
539 return false;
540
541 Imm = SplatValue;
542
543 return true;
544}
545
546// Test whether the given node is an all-ones build_vector.
548 // Look through bitcasts. Endianness doesn't matter because we are looking
549 // for an all-ones value.
550 if (N->getOpcode() == ISD::BITCAST)
551 N = N->getOperand(0);
552
553 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
554
555 if (!BVN)
556 return false;
557
558 APInt SplatValue, SplatUndef;
559 unsigned SplatBitSize;
560 bool HasAnyUndefs;
561
562 // Endianness doesn't matter in this context because we are looking for
563 // an all-ones value.
564 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
565 return SplatValue.isAllOnes();
566
567 return false;
568}
569
570// Test whether N is the bitwise inverse of OfNode.
571static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
572 if (N->getOpcode() != ISD::XOR)
573 return false;
574
575 if (isVectorAllOnes(N->getOperand(0)))
576 return N->getOperand(1) == OfNode;
577
578 if (isVectorAllOnes(N->getOperand(1)))
579 return N->getOperand(0) == OfNode;
580
581 return false;
582}
583
584// Perform combines where ISD::OR is the root node.
585//
586// Performs the following transformations:
587// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
588// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
589// vector type.
592 const MipsSubtarget &Subtarget) {
593 if (!Subtarget.hasMSA())
594 return SDValue();
595
596 EVT Ty = N->getValueType(0);
597
598 if (!Ty.is128BitVector())
599 return SDValue();
600
601 SDValue Op0 = N->getOperand(0);
602 SDValue Op1 = N->getOperand(1);
603
604 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
605 SDValue Op0Op0 = Op0->getOperand(0);
606 SDValue Op0Op1 = Op0->getOperand(1);
607 SDValue Op1Op0 = Op1->getOperand(0);
608 SDValue Op1Op1 = Op1->getOperand(1);
609 bool IsLittleEndian = !Subtarget.isLittle();
610
611 SDValue IfSet, IfClr, Cond;
612 bool IsConstantMask = false;
613 APInt Mask, InvMask;
614
615 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
616 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
617 // looking.
618 // IfClr will be set if we find a valid match.
619 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
620 Cond = Op0Op0;
621 IfSet = Op0Op1;
622
623 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
624 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
625 IfClr = Op1Op1;
626 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
627 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
628 IfClr = Op1Op0;
629
630 IsConstantMask = true;
631 }
632
633 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
634 // thing again using this mask.
635 // IfClr will be set if we find a valid match.
636 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
637 Cond = Op0Op1;
638 IfSet = Op0Op0;
639
640 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
641 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
642 IfClr = Op1Op1;
643 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
644 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
645 IfClr = Op1Op0;
646
647 IsConstantMask = true;
648 }
649
650 // If IfClr is not yet set, try looking for a non-constant match.
651 // IfClr will be set if we find a valid match amongst the eight
652 // possibilities.
653 if (!IfClr.getNode()) {
654 if (isBitwiseInverse(Op0Op0, Op1Op0)) {
655 Cond = Op1Op0;
656 IfSet = Op1Op1;
657 IfClr = Op0Op1;
658 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
659 Cond = Op1Op0;
660 IfSet = Op1Op1;
661 IfClr = Op0Op0;
662 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
663 Cond = Op1Op1;
664 IfSet = Op1Op0;
665 IfClr = Op0Op1;
666 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
667 Cond = Op1Op1;
668 IfSet = Op1Op0;
669 IfClr = Op0Op0;
670 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
671 Cond = Op0Op0;
672 IfSet = Op0Op1;
673 IfClr = Op1Op1;
674 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
675 Cond = Op0Op0;
676 IfSet = Op0Op1;
677 IfClr = Op1Op0;
678 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
679 Cond = Op0Op1;
680 IfSet = Op0Op0;
681 IfClr = Op1Op1;
682 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
683 Cond = Op0Op1;
684 IfSet = Op0Op0;
685 IfClr = Op1Op0;
686 }
687 }
688
689 // At this point, IfClr will be set if we have a valid match.
690 if (!IfClr.getNode())
691 return SDValue();
692
693 assert(Cond.getNode() && IfSet.getNode());
694
695 // Fold degenerate cases.
696 if (IsConstantMask) {
697 if (Mask.isAllOnes())
698 return IfSet;
699 else if (Mask == 0)
700 return IfClr;
701 }
702
703 // Transform the DAG into an equivalent VSELECT.
704 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr);
705 }
706
707 return SDValue();
708}
709
711 SelectionDAG &DAG,
712 const MipsSubtarget &Subtarget) {
713 // Estimate the number of operations the below transform will turn a
714 // constant multiply into. The number is approximately equal to the minimal
715 // number of powers of two that constant can be broken down to by adding
716 // or subtracting them.
717 //
718 // If we have taken more than 12[1] / 8[2] steps to attempt the
719 // optimization for a native sized value, it is more than likely that this
720 // optimization will make things worse.
721 //
722 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
723 // multiplication requires at least 4 cycles, but another cycle (or two)
724 // to retrieve the result from the HI/LO registers.
725 //
726 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
727 // materialized in 2 instructions, multiplication requires at least 4
728 // cycles, but another cycle (or two) to retrieve the result from the
729 // HI/LO registers.
730 //
731 // TODO:
732 // - MaxSteps needs to consider the `VT` of the constant for the current
733 // target.
734 // - Consider to perform this optimization after type legalization.
735 // That allows to remove a workaround for types not supported natively.
736 // - Take in account `-Os, -Oz` flags because this optimization
737 // increases code size.
738 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
739
740 SmallVector<APInt, 16> WorkStack(1, C);
741 unsigned Steps = 0;
742 unsigned BitWidth = C.getBitWidth();
743
744 while (!WorkStack.empty()) {
745 APInt Val = WorkStack.pop_back_val();
746
747 if (Val == 0 || Val == 1)
748 continue;
749
750 if (Steps >= MaxSteps)
751 return false;
752
753 if (Val.isPowerOf2()) {
754 ++Steps;
755 continue;
756 }
757
758 APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
759 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
760 : APInt(BitWidth, 1) << C.ceilLogBase2();
761 if ((Val - Floor).ule(Ceil - Val)) {
762 WorkStack.push_back(Floor);
763 WorkStack.push_back(Val - Floor);
764 } else {
765 WorkStack.push_back(Ceil);
766 WorkStack.push_back(Ceil - Val);
767 }
768
769 ++Steps;
770 }
771
772 // If the value being multiplied is not supported natively, we have to pay
773 // an additional legalization cost, conservatively assume an increase in the
774 // cost of 3 instructions per step. This values for this heuristic were
775 // determined experimentally.
776 unsigned RegisterSize = DAG.getTargetLoweringInfo()
777 .getRegisterType(*DAG.getContext(), VT)
778 .getSizeInBits();
779 Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
780 if (Steps > 27)
781 return false;
782
783 return true;
784}
785
787 EVT ShiftTy, SelectionDAG &DAG) {
788 // Return 0.
789 if (C == 0)
790 return DAG.getConstant(0, DL, VT);
791
792 // Return x.
793 if (C == 1)
794 return X;
795
796 // If c is power of 2, return (shl x, log2(c)).
797 if (C.isPowerOf2())
798 return DAG.getNode(ISD::SHL, DL, VT, X,
799 DAG.getConstant(C.logBase2(), DL, ShiftTy));
800
801 unsigned BitWidth = C.getBitWidth();
802 APInt Floor = APInt(BitWidth, 1) << C.logBase2();
803 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
804 APInt(BitWidth, 1) << C.ceilLogBase2();
805
806 // If |c - floor_c| <= |c - ceil_c|,
807 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
808 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
809 if ((C - Floor).ule(Ceil - C)) {
810 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
811 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
812 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
813 }
814
815 // If |c - floor_c| > |c - ceil_c|,
816 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
817 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
818 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
819 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
820}
821
824 const MipsSETargetLowering *TL,
825 const MipsSubtarget &Subtarget) {
826 EVT VT = N->getValueType(0);
827
828 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
830 C->getAPIntValue(), VT, DAG, Subtarget))
831 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
833 DAG);
834
835 return SDValue(N, 0);
836}
837
838static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
839 SelectionDAG &DAG,
840 const MipsSubtarget &Subtarget) {
841 // See if this is a vector splat immediate node.
842 APInt SplatValue, SplatUndef;
843 unsigned SplatBitSize;
844 bool HasAnyUndefs;
845 unsigned EltSize = Ty.getScalarSizeInBits();
846 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
847
848 if (!Subtarget.hasDSP())
849 return SDValue();
850
851 if (!BV ||
852 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
853 EltSize, !Subtarget.isLittle()) ||
854 (SplatBitSize != EltSize) ||
855 (SplatValue.getZExtValue() >= EltSize))
856 return SDValue();
857
858 SDLoc DL(N);
859 return DAG.getNode(Opc, DL, Ty, N->getOperand(0),
860 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32));
861}
862
865 const MipsSubtarget &Subtarget) {
866 EVT Ty = N->getValueType(0);
867
868 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
869 return SDValue();
870
871 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
872}
873
874// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
875// constant splats into MipsISD::SHRA_DSP for DSPr2.
876//
877// Performs the following transformations:
878// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
879// sign/zero-extension is completely overwritten by the new one performed by
880// the ISD::SRA and ISD::SHL nodes.
881// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
882// sequence.
883//
884// See performDSPShiftCombine for more information about the transformation
885// used for DSPr2.
888 const MipsSubtarget &Subtarget) {
889 EVT Ty = N->getValueType(0);
890
891 if (Subtarget.hasMSA()) {
892 SDValue Op0 = N->getOperand(0);
893 SDValue Op1 = N->getOperand(1);
894
895 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
896 // where $d + sizeof($c) == 32
897 // or $d + sizeof($c) <= 32 and SExt
898 // -> (MipsVExtractSExt $a, $b, $c)
899 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
900 SDValue Op0Op0 = Op0->getOperand(0);
901 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1);
902
903 if (!ShAmount)
904 return SDValue();
905
906 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
908 return SDValue();
909
910 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
911 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
912
913 if (TotalBits == 32 ||
915 TotalBits <= 32)) {
916 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
917 Op0Op0->getOperand(2) };
918 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
919 Op0Op0->getVTList(),
920 ArrayRef(Ops, Op0Op0->getNumOperands()));
921 }
922 }
923 }
924
925 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
926 return SDValue();
927
928 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
929}
930
931
934 const MipsSubtarget &Subtarget) {
935 EVT Ty = N->getValueType(0);
936
937 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
938 return SDValue();
939
940 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
941}
942
944 bool IsV216 = (Ty == MVT::v2i16);
945
946 switch (CC) {
947 case ISD::SETEQ:
948 case ISD::SETNE: return true;
949 case ISD::SETLT:
950 case ISD::SETLE:
951 case ISD::SETGT:
952 case ISD::SETGE: return IsV216;
953 case ISD::SETULT:
954 case ISD::SETULE:
955 case ISD::SETUGT:
956 case ISD::SETUGE: return !IsV216;
957 default: return false;
958 }
959}
960
962 EVT Ty = N->getValueType(0);
963
964 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
965 return SDValue();
966
967 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
968 return SDValue();
969
970 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0),
971 N->getOperand(1), N->getOperand(2));
972}
973
975 EVT Ty = N->getValueType(0);
976
977 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) {
978 SDValue SetCC = N->getOperand(0);
979
980 if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
981 return SDValue();
982
983 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
984 SetCC.getOperand(0), SetCC.getOperand(1),
985 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
986 }
987
988 return SDValue();
989}
990
992 const MipsSubtarget &Subtarget) {
993 EVT Ty = N->getValueType(0);
994
995 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
996 // Try the following combines:
997 // (xor (or $a, $b), (build_vector allones))
998 // (xor (or $a, $b), (bitcast (build_vector allones)))
999 SDValue Op0 = N->getOperand(0);
1000 SDValue Op1 = N->getOperand(1);
1001 SDValue NotOp;
1002
1004 NotOp = Op1;
1005 else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
1006 NotOp = Op0;
1007 else
1008 return SDValue();
1009
1010 if (NotOp->getOpcode() == ISD::OR)
1011 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
1012 NotOp->getOperand(1));
1013 }
1014
1015 return SDValue();
1016}
1017
1018SDValue
1020 SelectionDAG &DAG = DCI.DAG;
1021 SDValue Val;
1022
1023 switch (N->getOpcode()) {
1024 case ISD::AND:
1025 Val = performANDCombine(N, DAG, DCI, Subtarget);
1026 break;
1027 case ISD::OR:
1028 Val = performORCombine(N, DAG, DCI, Subtarget);
1029 break;
1030 case ISD::MUL:
1031 return performMULCombine(N, DAG, DCI, this, Subtarget);
1032 case ISD::SHL:
1033 Val = performSHLCombine(N, DAG, DCI, Subtarget);
1034 break;
1035 case ISD::SRA:
1036 return performSRACombine(N, DAG, DCI, Subtarget);
1037 case ISD::SRL:
1038 return performSRLCombine(N, DAG, DCI, Subtarget);
1039 case ISD::VSELECT:
1040 return performVSELECTCombine(N, DAG);
1041 case ISD::XOR:
1042 Val = performXORCombine(N, DAG, Subtarget);
1043 break;
1044 case ISD::SETCC:
1045 Val = performSETCCCombine(N, DAG);
1046 break;
1047 }
1048
1049 if (Val.getNode()) {
1050 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1051 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1052 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1053 return Val;
1054 }
1055
1057}
1058
1061 MachineBasicBlock *BB) const {
1062 switch (MI.getOpcode()) {
1063 default:
1065 case Mips::BPOSGE32_PSEUDO:
1066 return emitBPOSGE32(MI, BB);
1067 case Mips::SNZ_B_PSEUDO:
1068 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
1069 case Mips::SNZ_H_PSEUDO:
1070 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
1071 case Mips::SNZ_W_PSEUDO:
1072 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
1073 case Mips::SNZ_D_PSEUDO:
1074 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
1075 case Mips::SNZ_V_PSEUDO:
1076 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
1077 case Mips::SZ_B_PSEUDO:
1078 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
1079 case Mips::SZ_H_PSEUDO:
1080 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
1081 case Mips::SZ_W_PSEUDO:
1082 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
1083 case Mips::SZ_D_PSEUDO:
1084 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
1085 case Mips::SZ_V_PSEUDO:
1086 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
1087 case Mips::COPY_FW_PSEUDO:
1088 return emitCOPY_FW(MI, BB);
1089 case Mips::COPY_FD_PSEUDO:
1090 return emitCOPY_FD(MI, BB);
1091 case Mips::INSERT_FW_PSEUDO:
1092 return emitINSERT_FW(MI, BB);
1093 case Mips::INSERT_FD_PSEUDO:
1094 return emitINSERT_FD(MI, BB);
1095 case Mips::INSERT_B_VIDX_PSEUDO:
1096 case Mips::INSERT_B_VIDX64_PSEUDO:
1097 return emitINSERT_DF_VIDX(MI, BB, 1, false);
1098 case Mips::INSERT_H_VIDX_PSEUDO:
1099 case Mips::INSERT_H_VIDX64_PSEUDO:
1100 return emitINSERT_DF_VIDX(MI, BB, 2, false);
1101 case Mips::INSERT_W_VIDX_PSEUDO:
1102 case Mips::INSERT_W_VIDX64_PSEUDO:
1103 return emitINSERT_DF_VIDX(MI, BB, 4, false);
1104 case Mips::INSERT_D_VIDX_PSEUDO:
1105 case Mips::INSERT_D_VIDX64_PSEUDO:
1106 return emitINSERT_DF_VIDX(MI, BB, 8, false);
1107 case Mips::INSERT_FW_VIDX_PSEUDO:
1108 case Mips::INSERT_FW_VIDX64_PSEUDO:
1109 return emitINSERT_DF_VIDX(MI, BB, 4, true);
1110 case Mips::INSERT_FD_VIDX_PSEUDO:
1111 case Mips::INSERT_FD_VIDX64_PSEUDO:
1112 return emitINSERT_DF_VIDX(MI, BB, 8, true);
1113 case Mips::FILL_FW_PSEUDO:
1114 return emitFILL_FW(MI, BB);
1115 case Mips::FILL_FD_PSEUDO:
1116 return emitFILL_FD(MI, BB);
1117 case Mips::FEXP2_W_1_PSEUDO:
1118 return emitFEXP2_W_1(MI, BB);
1119 case Mips::FEXP2_D_1_PSEUDO:
1120 return emitFEXP2_D_1(MI, BB);
1121 case Mips::ST_F16:
1122 return emitST_F16_PSEUDO(MI, BB);
1123 case Mips::LD_F16:
1124 return emitLD_F16_PSEUDO(MI, BB);
1125 case Mips::MSA_FP_EXTEND_W_PSEUDO:
1126 return emitFPEXTEND_PSEUDO(MI, BB, false);
1127 case Mips::MSA_FP_ROUND_W_PSEUDO:
1128 return emitFPROUND_PSEUDO(MI, BB, false);
1129 case Mips::MSA_FP_EXTEND_D_PSEUDO:
1130 return emitFPEXTEND_PSEUDO(MI, BB, true);
1131 case Mips::MSA_FP_ROUND_D_PSEUDO:
1132 return emitFPROUND_PSEUDO(MI, BB, true);
1133 }
1134}
1135
1136bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1137 const CCState &CCInfo, unsigned NextStackOffset,
1138 const MipsFunctionInfo &FI) const {
1139 if (!UseMipsTailCalls)
1140 return false;
1141
1142 // Exception has to be cleared with eret.
1143 if (FI.isISR())
1144 return false;
1145
1146 // Return false if either the callee or caller has a byval argument.
1147 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
1148 return false;
1149
1150 // Return true if the callee's argument area is no larger than the
1151 // caller's.
1152 return NextStackOffset <= FI.getIncomingArgSize();
1153}
1154
1155void MipsSETargetLowering::
1156getOpndList(SmallVectorImpl<SDValue> &Ops,
1157 std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1158 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1159 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1160 SDValue Chain) const {
1161 Ops.push_back(Callee);
1162 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1163 InternalLinkage, IsCallReloc, CLI, Callee,
1164 Chain);
1165}
1166
1167SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1168 LoadSDNode &Nd = *cast<LoadSDNode>(Op);
1169
1170 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1171 return MipsTargetLowering::lowerLOAD(Op, DAG);
1172
1173 // Replace a double precision load with two i32 loads and a buildpair64.
1174 SDLoc DL(Op);
1175 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1176 EVT PtrVT = Ptr.getValueType();
1177
1178 // i32 load from lower address.
1179 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(),
1180 Nd.getAlign(), Nd.getMemOperand()->getFlags());
1181
1182 // i32 load from higher address.
1183 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1184 SDValue Hi = DAG.getLoad(
1185 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(),
1187
1188 if (!Subtarget.isLittle())
1189 std::swap(Lo, Hi);
1190
1191 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1192 SDValue Ops[2] = {BP, Hi.getValue(1)};
1193 return DAG.getMergeValues(Ops, DL);
1194}
1195
1196SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1197 StoreSDNode &Nd = *cast<StoreSDNode>(Op);
1198
1199 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1201
1202 // Replace a double precision store with two extractelement64s and i32 stores.
1203 SDLoc DL(Op);
1204 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1205 EVT PtrVT = Ptr.getValueType();
1207 Val, DAG.getConstant(0, DL, MVT::i32));
1209 Val, DAG.getConstant(1, DL, MVT::i32));
1210
1211 if (!Subtarget.isLittle())
1212 std::swap(Lo, Hi);
1213
1214 // i32 store to lower address.
1215 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(),
1216 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1217
1218 // i32 store to higher address.
1219 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1220 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
1221 commonAlignment(Nd.getAlign(), 4),
1222 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1223}
1224
1225SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
1226 SelectionDAG &DAG) const {
1227 SDLoc DL(Op);
1228 MVT Src = Op.getOperand(0).getValueType().getSimpleVT();
1229 MVT Dest = Op.getValueType().getSimpleVT();
1230
1231 // Bitcast i64 to double.
1232 if (Src == MVT::i64 && Dest == MVT::f64) {
1233 SDValue Lo, Hi;
1234 std::tie(Lo, Hi) =
1235 DAG.SplitScalar(Op.getOperand(0), DL, MVT::i32, MVT::i32);
1236 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1237 }
1238
1239 // Bitcast double to i64.
1240 if (Src == MVT::f64 && Dest == MVT::i64) {
1241 SDValue Lo =
1242 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1243 DAG.getConstant(0, DL, MVT::i32));
1244 SDValue Hi =
1245 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1246 DAG.getConstant(1, DL, MVT::i32));
1247 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1248 }
1249
1250 // Skip other cases of bitcast and use default lowering.
1251 return SDValue();
1252}
1253
1254SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1255 bool HasLo, bool HasHi,
1256 SelectionDAG &DAG) const {
1257 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1259
1260 EVT Ty = Op.getOperand(0).getValueType();
1261 SDLoc DL(Op);
1262 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
1263 Op.getOperand(0), Op.getOperand(1));
1264 SDValue Lo, Hi;
1265
1266 if (HasLo)
1267 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
1268 if (HasHi)
1269 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
1270
1271 if (!HasLo || !HasHi)
1272 return HasLo ? Lo : Hi;
1273
1274 SDValue Vals[] = { Lo, Hi };
1275 return DAG.getMergeValues(Vals, DL);
1276}
1277
1279 SDValue InLo, InHi;
1280 std::tie(InLo, InHi) = DAG.SplitScalar(In, DL, MVT::i32, MVT::i32);
1281 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
1282}
1283
1285 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
1286 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
1287 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1288}
1289
1290// This function expands mips intrinsic nodes which have 64-bit input operands
1291// or output values.
1292//
1293// out64 = intrinsic-node in64
1294// =>
1295// lo = copy (extract-element (in64, 0))
1296// hi = copy (extract-element (in64, 1))
1297// mips-specific-node
1298// v0 = copy lo
1299// v1 = copy hi
1300// out64 = merge-values (v0, v1)
1301//
1302static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1303 SDLoc DL(Op);
1304 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
1306 unsigned OpNo = 0;
1307
1308 // See if Op has a chain input.
1309 if (HasChainIn)
1310 Ops.push_back(Op->getOperand(OpNo++));
1311
1312 // The next operand is the intrinsic opcode.
1313 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1314
1315 // See if the next operand has type i64.
1316 SDValue Opnd = Op->getOperand(++OpNo), In64;
1317
1318 if (Opnd.getValueType() == MVT::i64)
1319 In64 = initAccumulator(Opnd, DL, DAG);
1320 else
1321 Ops.push_back(Opnd);
1322
1323 // Push the remaining operands.
1324 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1325 Ops.push_back(Op->getOperand(OpNo));
1326
1327 // Add In64 to the end of the list.
1328 if (In64.getNode())
1329 Ops.push_back(In64);
1330
1331 // Scan output.
1332 SmallVector<EVT, 2> ResTys;
1333
1334 for (EVT Ty : Op->values())
1335 ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty);
1336
1337 // Create node.
1338 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops);
1339 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
1340
1341 if (!HasChainIn)
1342 return Out;
1343
1344 assert(Val->getValueType(1) == MVT::Other);
1345 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1346 return DAG.getMergeValues(Vals, DL);
1347}
1348
1349// Lower an MSA copy intrinsic into the specified SelectionDAG node
1350static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
1351 SDLoc DL(Op);
1352 SDValue Vec = Op->getOperand(1);
1353 SDValue Idx = Op->getOperand(2);
1354 EVT ResTy = Op->getValueType(0);
1355 EVT EltTy = Vec->getValueType(0).getVectorElementType();
1356
1357 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
1358 DAG.getValueType(EltTy));
1359
1360 return Result;
1361}
1362
1363static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1364 EVT ResVecTy = Op->getValueType(0);
1365 EVT ViaVecTy = ResVecTy;
1366 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1367 SDLoc DL(Op);
1368
1369 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1370 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1371 // lanes.
1372 SDValue LaneA = Op->getOperand(OpNr);
1373 SDValue LaneB;
1374
1375 if (ResVecTy == MVT::v2i64) {
1376 // In case of the index being passed as an immediate value, set the upper
1377 // lane to 0 so that the splati.d instruction can be matched.
1378 if (isa<ConstantSDNode>(LaneA))
1379 LaneB = DAG.getConstant(0, DL, MVT::i32);
1380 // Having the index passed in a register, set the upper lane to the same
1381 // value as the lower - this results in the BUILD_VECTOR node not being
1382 // expanded through stack. This way we are able to pattern match the set of
1383 // nodes created here to splat.d.
1384 else
1385 LaneB = LaneA;
1386 ViaVecTy = MVT::v4i32;
1387 if(BigEndian)
1388 std::swap(LaneA, LaneB);
1389 } else
1390 LaneB = LaneA;
1391
1392 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1393 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1394
1395 SDValue Result = DAG.getBuildVector(
1396 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1397
1398 if (ViaVecTy != ResVecTy) {
1399 SDValue One = DAG.getConstant(1, DL, ViaVecTy);
1400 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy,
1401 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One));
1402 }
1403
1404 return Result;
1405}
1406
1407static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1408 bool IsSigned = false) {
1409 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1410 return DAG.getConstant(
1411 APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
1412 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
1413 SDLoc(Op), Op->getValueType(0));
1414}
1415
1416static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1417 bool BigEndian, SelectionDAG &DAG) {
1418 EVT ViaVecTy = VecTy;
1419 SDValue SplatValueA = SplatValue;
1420 SDValue SplatValueB = SplatValue;
1421 SDLoc DL(SplatValue);
1422
1423 if (VecTy == MVT::v2i64) {
1424 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1425 ViaVecTy = MVT::v4i32;
1426
1427 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
1428 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
1429 DAG.getConstant(32, DL, MVT::i32));
1430 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
1431 }
1432
1433 // We currently hold the parts in little endian order. Swap them if
1434 // necessary.
1435 if (BigEndian)
1436 std::swap(SplatValueA, SplatValueB);
1437
1438 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1439 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1440 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1441 SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1442
1443 SDValue Result = DAG.getBuildVector(
1444 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1445
1446 if (VecTy != ViaVecTy)
1447 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
1448
1449 return Result;
1450}
1451
1453 unsigned Opc, SDValue Imm,
1454 bool BigEndian) {
1455 EVT VecTy = Op->getValueType(0);
1456 SDValue Exp2Imm;
1457 SDLoc DL(Op);
1458
1459 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1460 // here for now.
1461 if (VecTy == MVT::v2i64) {
1462 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
1463 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1464
1465 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL,
1466 MVT::i32);
1467 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32);
1468
1469 if (BigEndian)
1470 std::swap(BitImmLoOp, BitImmHiOp);
1471
1472 Exp2Imm = DAG.getNode(
1473 ISD::BITCAST, DL, MVT::v2i64,
1474 DAG.getBuildVector(MVT::v4i32, DL,
1475 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1476 }
1477 }
1478
1479 if (!Exp2Imm.getNode()) {
1480 // We couldnt constant fold, do a vector shift instead
1481
1482 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1483 // only values 0-63 are valid.
1484 if (VecTy == MVT::v2i64)
1485 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
1486
1487 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
1488
1489 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy),
1490 Exp2Imm);
1491 }
1492
1493 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
1494}
1495
1497 SDLoc DL(Op);
1498 EVT ResTy = Op->getValueType(0);
1499 SDValue Vec = Op->getOperand(2);
1500 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1501 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
1502 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1,
1503 DL, ResEltTy);
1504 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG);
1505
1506 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
1507}
1508
1510 EVT ResTy = Op->getValueType(0);
1511 SDLoc DL(Op);
1512 SDValue One = DAG.getConstant(1, DL, ResTy);
1513 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
1514
1515 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
1516 DAG.getNOT(DL, Bit, ResTy));
1517}
1518
1520 SDLoc DL(Op);
1521 EVT ResTy = Op->getValueType(0);
1522 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1523 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
1524 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
1525
1526 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
1527}
1528
1529SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1530 SelectionDAG &DAG) const {
1531 SDLoc DL(Op);
1532 unsigned Intrinsic = cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue();
1533 switch (Intrinsic) {
1534 default:
1535 return SDValue();
1536 case Intrinsic::mips_shilo:
1537 return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
1538 case Intrinsic::mips_dpau_h_qbl:
1539 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
1540 case Intrinsic::mips_dpau_h_qbr:
1541 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
1542 case Intrinsic::mips_dpsu_h_qbl:
1543 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
1544 case Intrinsic::mips_dpsu_h_qbr:
1545 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
1546 case Intrinsic::mips_dpa_w_ph:
1547 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
1548 case Intrinsic::mips_dps_w_ph:
1549 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
1550 case Intrinsic::mips_dpax_w_ph:
1551 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
1552 case Intrinsic::mips_dpsx_w_ph:
1553 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
1554 case Intrinsic::mips_mulsa_w_ph:
1555 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
1556 case Intrinsic::mips_mult:
1557 return lowerDSPIntr(Op, DAG, MipsISD::Mult);
1558 case Intrinsic::mips_multu:
1559 return lowerDSPIntr(Op, DAG, MipsISD::Multu);
1560 case Intrinsic::mips_madd:
1561 return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
1562 case Intrinsic::mips_maddu:
1563 return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
1564 case Intrinsic::mips_msub:
1565 return lowerDSPIntr(Op, DAG, MipsISD::MSub);
1566 case Intrinsic::mips_msubu:
1567 return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
1568 case Intrinsic::mips_addv_b:
1569 case Intrinsic::mips_addv_h:
1570 case Intrinsic::mips_addv_w:
1571 case Intrinsic::mips_addv_d:
1572 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1573 Op->getOperand(2));
1574 case Intrinsic::mips_addvi_b:
1575 case Intrinsic::mips_addvi_h:
1576 case Intrinsic::mips_addvi_w:
1577 case Intrinsic::mips_addvi_d:
1578 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1579 lowerMSASplatImm(Op, 2, DAG));
1580 case Intrinsic::mips_and_v:
1581 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1582 Op->getOperand(2));
1583 case Intrinsic::mips_andi_b:
1584 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1585 lowerMSASplatImm(Op, 2, DAG));
1586 case Intrinsic::mips_bclr_b:
1587 case Intrinsic::mips_bclr_h:
1588 case Intrinsic::mips_bclr_w:
1589 case Intrinsic::mips_bclr_d:
1590 return lowerMSABitClear(Op, DAG);
1591 case Intrinsic::mips_bclri_b:
1592 case Intrinsic::mips_bclri_h:
1593 case Intrinsic::mips_bclri_w:
1594 case Intrinsic::mips_bclri_d:
1595 return lowerMSABitClearImm(Op, DAG);
1596 case Intrinsic::mips_binsli_b:
1597 case Intrinsic::mips_binsli_h:
1598 case Intrinsic::mips_binsli_w:
1599 case Intrinsic::mips_binsli_d: {
1600 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1601 EVT VecTy = Op->getValueType(0);
1602 EVT EltTy = VecTy.getVectorElementType();
1603 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1604 report_fatal_error("Immediate out of range");
1606 Op->getConstantOperandVal(3) + 1);
1607 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1608 DAG.getConstant(Mask, DL, VecTy, true),
1609 Op->getOperand(2), Op->getOperand(1));
1610 }
1611 case Intrinsic::mips_binsri_b:
1612 case Intrinsic::mips_binsri_h:
1613 case Intrinsic::mips_binsri_w:
1614 case Intrinsic::mips_binsri_d: {
1615 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1616 EVT VecTy = Op->getValueType(0);
1617 EVT EltTy = VecTy.getVectorElementType();
1618 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1619 report_fatal_error("Immediate out of range");
1621 Op->getConstantOperandVal(3) + 1);
1622 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1623 DAG.getConstant(Mask, DL, VecTy, true),
1624 Op->getOperand(2), Op->getOperand(1));
1625 }
1626 case Intrinsic::mips_bmnz_v:
1627 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1628 Op->getOperand(2), Op->getOperand(1));
1629 case Intrinsic::mips_bmnzi_b:
1630 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1631 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
1632 Op->getOperand(1));
1633 case Intrinsic::mips_bmz_v:
1634 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1635 Op->getOperand(1), Op->getOperand(2));
1636 case Intrinsic::mips_bmzi_b:
1637 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1638 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
1639 Op->getOperand(2));
1640 case Intrinsic::mips_bneg_b:
1641 case Intrinsic::mips_bneg_h:
1642 case Intrinsic::mips_bneg_w:
1643 case Intrinsic::mips_bneg_d: {
1644 EVT VecTy = Op->getValueType(0);
1645 SDValue One = DAG.getConstant(1, DL, VecTy);
1646
1647 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
1648 DAG.getNode(ISD::SHL, DL, VecTy, One,
1649 truncateVecElts(Op, DAG)));
1650 }
1651 case Intrinsic::mips_bnegi_b:
1652 case Intrinsic::mips_bnegi_h:
1653 case Intrinsic::mips_bnegi_w:
1654 case Intrinsic::mips_bnegi_d:
1655 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
1656 !Subtarget.isLittle());
1657 case Intrinsic::mips_bnz_b:
1658 case Intrinsic::mips_bnz_h:
1659 case Intrinsic::mips_bnz_w:
1660 case Intrinsic::mips_bnz_d:
1661 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
1662 Op->getOperand(1));
1663 case Intrinsic::mips_bnz_v:
1664 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
1665 Op->getOperand(1));
1666 case Intrinsic::mips_bsel_v:
1667 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1668 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1669 Op->getOperand(1), Op->getOperand(3),
1670 Op->getOperand(2));
1671 case Intrinsic::mips_bseli_b:
1672 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1673 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1674 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG),
1675 Op->getOperand(2));
1676 case Intrinsic::mips_bset_b:
1677 case Intrinsic::mips_bset_h:
1678 case Intrinsic::mips_bset_w:
1679 case Intrinsic::mips_bset_d: {
1680 EVT VecTy = Op->getValueType(0);
1681 SDValue One = DAG.getConstant(1, DL, VecTy);
1682
1683 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
1684 DAG.getNode(ISD::SHL, DL, VecTy, One,
1685 truncateVecElts(Op, DAG)));
1686 }
1687 case Intrinsic::mips_bseti_b:
1688 case Intrinsic::mips_bseti_h:
1689 case Intrinsic::mips_bseti_w:
1690 case Intrinsic::mips_bseti_d:
1691 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
1692 !Subtarget.isLittle());
1693 case Intrinsic::mips_bz_b:
1694 case Intrinsic::mips_bz_h:
1695 case Intrinsic::mips_bz_w:
1696 case Intrinsic::mips_bz_d:
1697 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
1698 Op->getOperand(1));
1699 case Intrinsic::mips_bz_v:
1700 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
1701 Op->getOperand(1));
1702 case Intrinsic::mips_ceq_b:
1703 case Intrinsic::mips_ceq_h:
1704 case Intrinsic::mips_ceq_w:
1705 case Intrinsic::mips_ceq_d:
1706 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1707 Op->getOperand(2), ISD::SETEQ);
1708 case Intrinsic::mips_ceqi_b:
1709 case Intrinsic::mips_ceqi_h:
1710 case Intrinsic::mips_ceqi_w:
1711 case Intrinsic::mips_ceqi_d:
1712 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1713 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ);
1714 case Intrinsic::mips_cle_s_b:
1715 case Intrinsic::mips_cle_s_h:
1716 case Intrinsic::mips_cle_s_w:
1717 case Intrinsic::mips_cle_s_d:
1718 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1719 Op->getOperand(2), ISD::SETLE);
1720 case Intrinsic::mips_clei_s_b:
1721 case Intrinsic::mips_clei_s_h:
1722 case Intrinsic::mips_clei_s_w:
1723 case Intrinsic::mips_clei_s_d:
1724 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1725 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE);
1726 case Intrinsic::mips_cle_u_b:
1727 case Intrinsic::mips_cle_u_h:
1728 case Intrinsic::mips_cle_u_w:
1729 case Intrinsic::mips_cle_u_d:
1730 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1731 Op->getOperand(2), ISD::SETULE);
1732 case Intrinsic::mips_clei_u_b:
1733 case Intrinsic::mips_clei_u_h:
1734 case Intrinsic::mips_clei_u_w:
1735 case Intrinsic::mips_clei_u_d:
1736 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1737 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
1738 case Intrinsic::mips_clt_s_b:
1739 case Intrinsic::mips_clt_s_h:
1740 case Intrinsic::mips_clt_s_w:
1741 case Intrinsic::mips_clt_s_d:
1742 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1743 Op->getOperand(2), ISD::SETLT);
1744 case Intrinsic::mips_clti_s_b:
1745 case Intrinsic::mips_clti_s_h:
1746 case Intrinsic::mips_clti_s_w:
1747 case Intrinsic::mips_clti_s_d:
1748 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1749 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT);
1750 case Intrinsic::mips_clt_u_b:
1751 case Intrinsic::mips_clt_u_h:
1752 case Intrinsic::mips_clt_u_w:
1753 case Intrinsic::mips_clt_u_d:
1754 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1755 Op->getOperand(2), ISD::SETULT);
1756 case Intrinsic::mips_clti_u_b:
1757 case Intrinsic::mips_clti_u_h:
1758 case Intrinsic::mips_clti_u_w:
1759 case Intrinsic::mips_clti_u_d:
1760 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1761 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
1762 case Intrinsic::mips_copy_s_b:
1763 case Intrinsic::mips_copy_s_h:
1764 case Intrinsic::mips_copy_s_w:
1766 case Intrinsic::mips_copy_s_d:
1767 if (Subtarget.hasMips64())
1768 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1770 else {
1771 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1772 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1774 Op->getValueType(0), Op->getOperand(1),
1775 Op->getOperand(2));
1776 }
1777 case Intrinsic::mips_copy_u_b:
1778 case Intrinsic::mips_copy_u_h:
1779 case Intrinsic::mips_copy_u_w:
1781 case Intrinsic::mips_copy_u_d:
1782 if (Subtarget.hasMips64())
1783 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1785 else {
1786 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1787 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1788 // Note: When i64 is illegal, this results in copy_s.w instructions
1789 // instead of copy_u.w instructions. This makes no difference to the
1790 // behaviour since i64 is only illegal when the register file is 32-bit.
1792 Op->getValueType(0), Op->getOperand(1),
1793 Op->getOperand(2));
1794 }
1795 case Intrinsic::mips_div_s_b:
1796 case Intrinsic::mips_div_s_h:
1797 case Intrinsic::mips_div_s_w:
1798 case Intrinsic::mips_div_s_d:
1799 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
1800 Op->getOperand(2));
1801 case Intrinsic::mips_div_u_b:
1802 case Intrinsic::mips_div_u_h:
1803 case Intrinsic::mips_div_u_w:
1804 case Intrinsic::mips_div_u_d:
1805 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
1806 Op->getOperand(2));
1807 case Intrinsic::mips_fadd_w:
1808 case Intrinsic::mips_fadd_d:
1809 // TODO: If intrinsics have fast-math-flags, propagate them.
1810 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
1811 Op->getOperand(2));
1812 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1813 case Intrinsic::mips_fceq_w:
1814 case Intrinsic::mips_fceq_d:
1815 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1816 Op->getOperand(2), ISD::SETOEQ);
1817 case Intrinsic::mips_fcle_w:
1818 case Intrinsic::mips_fcle_d:
1819 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1820 Op->getOperand(2), ISD::SETOLE);
1821 case Intrinsic::mips_fclt_w:
1822 case Intrinsic::mips_fclt_d:
1823 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1824 Op->getOperand(2), ISD::SETOLT);
1825 case Intrinsic::mips_fcne_w:
1826 case Intrinsic::mips_fcne_d:
1827 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1828 Op->getOperand(2), ISD::SETONE);
1829 case Intrinsic::mips_fcor_w:
1830 case Intrinsic::mips_fcor_d:
1831 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1832 Op->getOperand(2), ISD::SETO);
1833 case Intrinsic::mips_fcueq_w:
1834 case Intrinsic::mips_fcueq_d:
1835 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1836 Op->getOperand(2), ISD::SETUEQ);
1837 case Intrinsic::mips_fcule_w:
1838 case Intrinsic::mips_fcule_d:
1839 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1840 Op->getOperand(2), ISD::SETULE);
1841 case Intrinsic::mips_fcult_w:
1842 case Intrinsic::mips_fcult_d:
1843 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1844 Op->getOperand(2), ISD::SETULT);
1845 case Intrinsic::mips_fcun_w:
1846 case Intrinsic::mips_fcun_d:
1847 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1848 Op->getOperand(2), ISD::SETUO);
1849 case Intrinsic::mips_fcune_w:
1850 case Intrinsic::mips_fcune_d:
1851 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1852 Op->getOperand(2), ISD::SETUNE);
1853 case Intrinsic::mips_fdiv_w:
1854 case Intrinsic::mips_fdiv_d:
1855 // TODO: If intrinsics have fast-math-flags, propagate them.
1856 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
1857 Op->getOperand(2));
1858 case Intrinsic::mips_ffint_u_w:
1859 case Intrinsic::mips_ffint_u_d:
1860 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
1861 Op->getOperand(1));
1862 case Intrinsic::mips_ffint_s_w:
1863 case Intrinsic::mips_ffint_s_d:
1864 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
1865 Op->getOperand(1));
1866 case Intrinsic::mips_fill_b:
1867 case Intrinsic::mips_fill_h:
1868 case Intrinsic::mips_fill_w:
1869 case Intrinsic::mips_fill_d: {
1870 EVT ResTy = Op->getValueType(0);
1872 Op->getOperand(1));
1873
1874 // If ResTy is v2i64 then the type legalizer will break this node down into
1875 // an equivalent v4i32.
1876 return DAG.getBuildVector(ResTy, DL, Ops);
1877 }
1878 case Intrinsic::mips_fexp2_w:
1879 case Intrinsic::mips_fexp2_d: {
1880 // TODO: If intrinsics have fast-math-flags, propagate them.
1881 EVT ResTy = Op->getValueType(0);
1882 return DAG.getNode(
1883 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
1884 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
1885 }
1886 case Intrinsic::mips_flog2_w:
1887 case Intrinsic::mips_flog2_d:
1888 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
1889 case Intrinsic::mips_fmadd_w:
1890 case Intrinsic::mips_fmadd_d:
1891 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
1892 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1893 case Intrinsic::mips_fmul_w:
1894 case Intrinsic::mips_fmul_d:
1895 // TODO: If intrinsics have fast-math-flags, propagate them.
1896 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
1897 Op->getOperand(2));
1898 case Intrinsic::mips_fmsub_w:
1899 case Intrinsic::mips_fmsub_d: {
1900 // TODO: If intrinsics have fast-math-flags, propagate them.
1901 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0),
1902 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1903 }
1904 case Intrinsic::mips_frint_w:
1905 case Intrinsic::mips_frint_d:
1906 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
1907 case Intrinsic::mips_fsqrt_w:
1908 case Intrinsic::mips_fsqrt_d:
1909 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
1910 case Intrinsic::mips_fsub_w:
1911 case Intrinsic::mips_fsub_d:
1912 // TODO: If intrinsics have fast-math-flags, propagate them.
1913 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
1914 Op->getOperand(2));
1915 case Intrinsic::mips_ftrunc_u_w:
1916 case Intrinsic::mips_ftrunc_u_d:
1917 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
1918 Op->getOperand(1));
1919 case Intrinsic::mips_ftrunc_s_w:
1920 case Intrinsic::mips_ftrunc_s_d:
1921 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
1922 Op->getOperand(1));
1923 case Intrinsic::mips_ilvev_b:
1924 case Intrinsic::mips_ilvev_h:
1925 case Intrinsic::mips_ilvev_w:
1926 case Intrinsic::mips_ilvev_d:
1927 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
1928 Op->getOperand(1), Op->getOperand(2));
1929 case Intrinsic::mips_ilvl_b:
1930 case Intrinsic::mips_ilvl_h:
1931 case Intrinsic::mips_ilvl_w:
1932 case Intrinsic::mips_ilvl_d:
1933 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
1934 Op->getOperand(1), Op->getOperand(2));
1935 case Intrinsic::mips_ilvod_b:
1936 case Intrinsic::mips_ilvod_h:
1937 case Intrinsic::mips_ilvod_w:
1938 case Intrinsic::mips_ilvod_d:
1939 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
1940 Op->getOperand(1), Op->getOperand(2));
1941 case Intrinsic::mips_ilvr_b:
1942 case Intrinsic::mips_ilvr_h:
1943 case Intrinsic::mips_ilvr_w:
1944 case Intrinsic::mips_ilvr_d:
1945 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
1946 Op->getOperand(1), Op->getOperand(2));
1947 case Intrinsic::mips_insert_b:
1948 case Intrinsic::mips_insert_h:
1949 case Intrinsic::mips_insert_w:
1950 case Intrinsic::mips_insert_d:
1951 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
1952 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
1953 case Intrinsic::mips_insve_b:
1954 case Intrinsic::mips_insve_h:
1955 case Intrinsic::mips_insve_w:
1956 case Intrinsic::mips_insve_d: {
1957 // Report an error for out of range values.
1958 int64_t Max;
1959 switch (Intrinsic) {
1960 case Intrinsic::mips_insve_b: Max = 15; break;
1961 case Intrinsic::mips_insve_h: Max = 7; break;
1962 case Intrinsic::mips_insve_w: Max = 3; break;
1963 case Intrinsic::mips_insve_d: Max = 1; break;
1964 default: llvm_unreachable("Unmatched intrinsic");
1965 }
1966 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
1967 if (Value < 0 || Value > Max)
1968 report_fatal_error("Immediate out of range");
1969 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
1970 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
1971 DAG.getConstant(0, DL, MVT::i32));
1972 }
1973 case Intrinsic::mips_ldi_b:
1974 case Intrinsic::mips_ldi_h:
1975 case Intrinsic::mips_ldi_w:
1976 case Intrinsic::mips_ldi_d:
1977 return lowerMSASplatImm(Op, 1, DAG, true);
1978 case Intrinsic::mips_lsa:
1979 case Intrinsic::mips_dlsa: {
1980 EVT ResTy = Op->getValueType(0);
1981 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
1982 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
1983 Op->getOperand(2), Op->getOperand(3)));
1984 }
1985 case Intrinsic::mips_maddv_b:
1986 case Intrinsic::mips_maddv_h:
1987 case Intrinsic::mips_maddv_w:
1988 case Intrinsic::mips_maddv_d: {
1989 EVT ResTy = Op->getValueType(0);
1990 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
1991 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
1992 Op->getOperand(2), Op->getOperand(3)));
1993 }
1994 case Intrinsic::mips_max_s_b:
1995 case Intrinsic::mips_max_s_h:
1996 case Intrinsic::mips_max_s_w:
1997 case Intrinsic::mips_max_s_d:
1998 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
1999 Op->getOperand(1), Op->getOperand(2));
2000 case Intrinsic::mips_max_u_b:
2001 case Intrinsic::mips_max_u_h:
2002 case Intrinsic::mips_max_u_w:
2003 case Intrinsic::mips_max_u_d:
2004 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2005 Op->getOperand(1), Op->getOperand(2));
2006 case Intrinsic::mips_maxi_s_b:
2007 case Intrinsic::mips_maxi_s_h:
2008 case Intrinsic::mips_maxi_s_w:
2009 case Intrinsic::mips_maxi_s_d:
2010 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2011 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2012 case Intrinsic::mips_maxi_u_b:
2013 case Intrinsic::mips_maxi_u_h:
2014 case Intrinsic::mips_maxi_u_w:
2015 case Intrinsic::mips_maxi_u_d:
2016 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2017 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2018 case Intrinsic::mips_min_s_b:
2019 case Intrinsic::mips_min_s_h:
2020 case Intrinsic::mips_min_s_w:
2021 case Intrinsic::mips_min_s_d:
2022 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2023 Op->getOperand(1), Op->getOperand(2));
2024 case Intrinsic::mips_min_u_b:
2025 case Intrinsic::mips_min_u_h:
2026 case Intrinsic::mips_min_u_w:
2027 case Intrinsic::mips_min_u_d:
2028 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2029 Op->getOperand(1), Op->getOperand(2));
2030 case Intrinsic::mips_mini_s_b:
2031 case Intrinsic::mips_mini_s_h:
2032 case Intrinsic::mips_mini_s_w:
2033 case Intrinsic::mips_mini_s_d:
2034 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2035 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2036 case Intrinsic::mips_mini_u_b:
2037 case Intrinsic::mips_mini_u_h:
2038 case Intrinsic::mips_mini_u_w:
2039 case Intrinsic::mips_mini_u_d:
2040 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2041 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2042 case Intrinsic::mips_mod_s_b:
2043 case Intrinsic::mips_mod_s_h:
2044 case Intrinsic::mips_mod_s_w:
2045 case Intrinsic::mips_mod_s_d:
2046 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
2047 Op->getOperand(2));
2048 case Intrinsic::mips_mod_u_b:
2049 case Intrinsic::mips_mod_u_h:
2050 case Intrinsic::mips_mod_u_w:
2051 case Intrinsic::mips_mod_u_d:
2052 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
2053 Op->getOperand(2));
2054 case Intrinsic::mips_mulv_b:
2055 case Intrinsic::mips_mulv_h:
2056 case Intrinsic::mips_mulv_w:
2057 case Intrinsic::mips_mulv_d:
2058 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
2059 Op->getOperand(2));
2060 case Intrinsic::mips_msubv_b:
2061 case Intrinsic::mips_msubv_h:
2062 case Intrinsic::mips_msubv_w:
2063 case Intrinsic::mips_msubv_d: {
2064 EVT ResTy = Op->getValueType(0);
2065 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
2066 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2067 Op->getOperand(2), Op->getOperand(3)));
2068 }
2069 case Intrinsic::mips_nlzc_b:
2070 case Intrinsic::mips_nlzc_h:
2071 case Intrinsic::mips_nlzc_w:
2072 case Intrinsic::mips_nlzc_d:
2073 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
2074 case Intrinsic::mips_nor_v: {
2075 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2076 Op->getOperand(1), Op->getOperand(2));
2077 return DAG.getNOT(DL, Res, Res->getValueType(0));
2078 }
2079 case Intrinsic::mips_nori_b: {
2080 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2081 Op->getOperand(1),
2082 lowerMSASplatImm(Op, 2, DAG));
2083 return DAG.getNOT(DL, Res, Res->getValueType(0));
2084 }
2085 case Intrinsic::mips_or_v:
2086 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
2087 Op->getOperand(2));
2088 case Intrinsic::mips_ori_b:
2089 return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2090 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2091 case Intrinsic::mips_pckev_b:
2092 case Intrinsic::mips_pckev_h:
2093 case Intrinsic::mips_pckev_w:
2094 case Intrinsic::mips_pckev_d:
2095 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
2096 Op->getOperand(1), Op->getOperand(2));
2097 case Intrinsic::mips_pckod_b:
2098 case Intrinsic::mips_pckod_h:
2099 case Intrinsic::mips_pckod_w:
2100 case Intrinsic::mips_pckod_d:
2101 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
2102 Op->getOperand(1), Op->getOperand(2));
2103 case Intrinsic::mips_pcnt_b:
2104 case Intrinsic::mips_pcnt_h:
2105 case Intrinsic::mips_pcnt_w:
2106 case Intrinsic::mips_pcnt_d:
2107 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
2108 case Intrinsic::mips_sat_s_b:
2109 case Intrinsic::mips_sat_s_h:
2110 case Intrinsic::mips_sat_s_w:
2111 case Intrinsic::mips_sat_s_d:
2112 case Intrinsic::mips_sat_u_b:
2113 case Intrinsic::mips_sat_u_h:
2114 case Intrinsic::mips_sat_u_w:
2115 case Intrinsic::mips_sat_u_d: {
2116 // Report an error for out of range values.
2117 int64_t Max;
2118 switch (Intrinsic) {
2119 case Intrinsic::mips_sat_s_b:
2120 case Intrinsic::mips_sat_u_b: Max = 7; break;
2121 case Intrinsic::mips_sat_s_h:
2122 case Intrinsic::mips_sat_u_h: Max = 15; break;
2123 case Intrinsic::mips_sat_s_w:
2124 case Intrinsic::mips_sat_u_w: Max = 31; break;
2125 case Intrinsic::mips_sat_s_d:
2126 case Intrinsic::mips_sat_u_d: Max = 63; break;
2127 default: llvm_unreachable("Unmatched intrinsic");
2128 }
2129 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2130 if (Value < 0 || Value > Max)
2131 report_fatal_error("Immediate out of range");
2132 return SDValue();
2133 }
2134 case Intrinsic::mips_shf_b:
2135 case Intrinsic::mips_shf_h:
2136 case Intrinsic::mips_shf_w: {
2137 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2138 if (Value < 0 || Value > 255)
2139 report_fatal_error("Immediate out of range");
2140 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
2141 Op->getOperand(2), Op->getOperand(1));
2142 }
2143 case Intrinsic::mips_sldi_b:
2144 case Intrinsic::mips_sldi_h:
2145 case Intrinsic::mips_sldi_w:
2146 case Intrinsic::mips_sldi_d: {
2147 // Report an error for out of range values.
2148 int64_t Max;
2149 switch (Intrinsic) {
2150 case Intrinsic::mips_sldi_b: Max = 15; break;
2151 case Intrinsic::mips_sldi_h: Max = 7; break;
2152 case Intrinsic::mips_sldi_w: Max = 3; break;
2153 case Intrinsic::mips_sldi_d: Max = 1; break;
2154 default: llvm_unreachable("Unmatched intrinsic");
2155 }
2156 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue();
2157 if (Value < 0 || Value > Max)
2158 report_fatal_error("Immediate out of range");
2159 return SDValue();
2160 }
2161 case Intrinsic::mips_sll_b:
2162 case Intrinsic::mips_sll_h:
2163 case Intrinsic::mips_sll_w:
2164 case Intrinsic::mips_sll_d:
2165 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
2166 truncateVecElts(Op, DAG));
2167 case Intrinsic::mips_slli_b:
2168 case Intrinsic::mips_slli_h:
2169 case Intrinsic::mips_slli_w:
2170 case Intrinsic::mips_slli_d:
2171 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
2172 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2173 case Intrinsic::mips_splat_b:
2174 case Intrinsic::mips_splat_h:
2175 case Intrinsic::mips_splat_w:
2176 case Intrinsic::mips_splat_d:
2177 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2178 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2179 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2180 // Instead we lower to MipsISD::VSHF and match from there.
2181 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2182 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
2183 Op->getOperand(1));
2184 case Intrinsic::mips_splati_b:
2185 case Intrinsic::mips_splati_h:
2186 case Intrinsic::mips_splati_w:
2187 case Intrinsic::mips_splati_d:
2188 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2189 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
2190 Op->getOperand(1));
2191 case Intrinsic::mips_sra_b:
2192 case Intrinsic::mips_sra_h:
2193 case Intrinsic::mips_sra_w:
2194 case Intrinsic::mips_sra_d:
2195 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
2196 truncateVecElts(Op, DAG));
2197 case Intrinsic::mips_srai_b:
2198 case Intrinsic::mips_srai_h:
2199 case Intrinsic::mips_srai_w:
2200 case Intrinsic::mips_srai_d:
2201 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
2202 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2203 case Intrinsic::mips_srari_b:
2204 case Intrinsic::mips_srari_h:
2205 case Intrinsic::mips_srari_w:
2206 case Intrinsic::mips_srari_d: {
2207 // Report an error for out of range values.
2208 int64_t Max;
2209 switch (Intrinsic) {
2210 case Intrinsic::mips_srari_b: Max = 7; break;
2211 case Intrinsic::mips_srari_h: Max = 15; break;
2212 case Intrinsic::mips_srari_w: Max = 31; break;
2213 case Intrinsic::mips_srari_d: Max = 63; break;
2214 default: llvm_unreachable("Unmatched intrinsic");
2215 }
2216 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2217 if (Value < 0 || Value > Max)
2218 report_fatal_error("Immediate out of range");
2219 return SDValue();
2220 }
2221 case Intrinsic::mips_srl_b:
2222 case Intrinsic::mips_srl_h:
2223 case Intrinsic::mips_srl_w:
2224 case Intrinsic::mips_srl_d:
2225 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
2226 truncateVecElts(Op, DAG));
2227 case Intrinsic::mips_srli_b:
2228 case Intrinsic::mips_srli_h:
2229 case Intrinsic::mips_srli_w:
2230 case Intrinsic::mips_srli_d:
2231 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
2232 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2233 case Intrinsic::mips_srlri_b:
2234 case Intrinsic::mips_srlri_h:
2235 case Intrinsic::mips_srlri_w:
2236 case Intrinsic::mips_srlri_d: {
2237 // Report an error for out of range values.
2238 int64_t Max;
2239 switch (Intrinsic) {
2240 case Intrinsic::mips_srlri_b: Max = 7; break;
2241 case Intrinsic::mips_srlri_h: Max = 15; break;
2242 case Intrinsic::mips_srlri_w: Max = 31; break;
2243 case Intrinsic::mips_srlri_d: Max = 63; break;
2244 default: llvm_unreachable("Unmatched intrinsic");
2245 }
2246 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2247 if (Value < 0 || Value > Max)
2248 report_fatal_error("Immediate out of range");
2249 return SDValue();
2250 }
2251 case Intrinsic::mips_subv_b:
2252 case Intrinsic::mips_subv_h:
2253 case Intrinsic::mips_subv_w:
2254 case Intrinsic::mips_subv_d:
2255 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
2256 Op->getOperand(2));
2257 case Intrinsic::mips_subvi_b:
2258 case Intrinsic::mips_subvi_h:
2259 case Intrinsic::mips_subvi_w:
2260 case Intrinsic::mips_subvi_d:
2261 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
2262 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2263 case Intrinsic::mips_vshf_b:
2264 case Intrinsic::mips_vshf_h:
2265 case Intrinsic::mips_vshf_w:
2266 case Intrinsic::mips_vshf_d:
2267 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2268 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2269 case Intrinsic::mips_xor_v:
2270 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
2271 Op->getOperand(2));
2272 case Intrinsic::mips_xori_b:
2273 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
2274 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2275 case Intrinsic::thread_pointer: {
2276 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2277 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
2278 }
2279 }
2280}
2281
2283 const MipsSubtarget &Subtarget) {
2284 SDLoc DL(Op);
2285 SDValue ChainIn = Op->getOperand(0);
2286 SDValue Address = Op->getOperand(2);
2287 SDValue Offset = Op->getOperand(3);
2288 EVT ResTy = Op->getValueType(0);
2289 EVT PtrTy = Address->getValueType(0);
2290
2291 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2292 // however takes an i32 signed constant offset. The actual type of the
2293 // intrinsic is a scaled signed i10.
2294 if (Subtarget.isABI_N64())
2295 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2296
2297 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2298 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
2299 Align(16));
2300}
2301
2302SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2303 SelectionDAG &DAG) const {
2304 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
2305 switch (Intr) {
2306 default:
2307 return SDValue();
2308 case Intrinsic::mips_extp:
2309 return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
2310 case Intrinsic::mips_extpdp:
2311 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
2312 case Intrinsic::mips_extr_w:
2313 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
2314 case Intrinsic::mips_extr_r_w:
2315 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
2316 case Intrinsic::mips_extr_rs_w:
2317 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
2318 case Intrinsic::mips_extr_s_h:
2319 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
2320 case Intrinsic::mips_mthlip:
2321 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
2322 case Intrinsic::mips_mulsaq_s_w_ph:
2324 case Intrinsic::mips_maq_s_w_phl:
2325 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
2326 case Intrinsic::mips_maq_s_w_phr:
2327 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
2328 case Intrinsic::mips_maq_sa_w_phl:
2330 case Intrinsic::mips_maq_sa_w_phr:
2332 case Intrinsic::mips_dpaq_s_w_ph:
2333 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
2334 case Intrinsic::mips_dpsq_s_w_ph:
2335 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
2336 case Intrinsic::mips_dpaq_sa_l_w:
2337 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
2338 case Intrinsic::mips_dpsq_sa_l_w:
2339 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
2340 case Intrinsic::mips_dpaqx_s_w_ph:
2342 case Intrinsic::mips_dpaqx_sa_w_ph:
2344 case Intrinsic::mips_dpsqx_s_w_ph:
2346 case Intrinsic::mips_dpsqx_sa_w_ph:
2348 case Intrinsic::mips_ld_b:
2349 case Intrinsic::mips_ld_h:
2350 case Intrinsic::mips_ld_w:
2351 case Intrinsic::mips_ld_d:
2352 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2353 }
2354}
2355
2357 const MipsSubtarget &Subtarget) {
2358 SDLoc DL(Op);
2359 SDValue ChainIn = Op->getOperand(0);
2360 SDValue Value = Op->getOperand(2);
2361 SDValue Address = Op->getOperand(3);
2362 SDValue Offset = Op->getOperand(4);
2363 EVT PtrTy = Address->getValueType(0);
2364
2365 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2366 // however takes an i32 signed constant offset. The actual type of the
2367 // intrinsic is a scaled signed i10.
2368 if (Subtarget.isABI_N64())
2369 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2370
2371 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2372
2373 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
2374 Align(16));
2375}
2376
2377SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2378 SelectionDAG &DAG) const {
2379 unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
2380 switch (Intr) {
2381 default:
2382 return SDValue();
2383 case Intrinsic::mips_st_b:
2384 case Intrinsic::mips_st_h:
2385 case Intrinsic::mips_st_w:
2386 case Intrinsic::mips_st_d:
2387 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2388 }
2389}
2390
2391// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2392//
2393// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2394// choose to sign-extend but we could have equally chosen zero-extend. The
2395// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2396// result into this node later (possibly changing it to a zero-extend in the
2397// process).
2398SDValue MipsSETargetLowering::
2399lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2400 SDLoc DL(Op);
2401 EVT ResTy = Op->getValueType(0);
2402 SDValue Op0 = Op->getOperand(0);
2403 EVT VecTy = Op0->getValueType(0);
2404
2405 if (!VecTy.is128BitVector())
2406 return SDValue();
2407
2408 if (ResTy.isInteger()) {
2409 SDValue Op1 = Op->getOperand(1);
2410 EVT EltTy = VecTy.getVectorElementType();
2411 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
2412 DAG.getValueType(EltTy));
2413 }
2414
2415 return Op;
2416}
2417
2418static bool isConstantOrUndef(const SDValue Op) {
2419 if (Op->isUndef())
2420 return true;
2421 if (isa<ConstantSDNode>(Op))
2422 return true;
2423 if (isa<ConstantFPSDNode>(Op))
2424 return true;
2425 return false;
2426}
2427
2429 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2430 if (isConstantOrUndef(Op->getOperand(i)))
2431 return true;
2432 return false;
2433}
2434
2435// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2436// backend.
2437//
2438// Lowers according to the following rules:
2439// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2440// 2 less than or equal to 64 and the value fits into a signed 10-bit
2441// immediate
2442// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2443// is a power of 2 less than or equal to 64 and the value does not fit into a
2444// signed 10-bit immediate
2445// - Non-constant splats are legal as-is.
2446// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2447// - All others are illegal and must be expanded.
2448SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2449 SelectionDAG &DAG) const {
2450 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2451 EVT ResTy = Op->getValueType(0);
2452 SDLoc DL(Op);
2453 APInt SplatValue, SplatUndef;
2454 unsigned SplatBitSize;
2455 bool HasAnyUndefs;
2456
2457 if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2458 return SDValue();
2459
2460 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2461 HasAnyUndefs, 8,
2462 !Subtarget.isLittle()) && SplatBitSize <= 64) {
2463 // We can only cope with 8, 16, 32, or 64-bit elements
2464 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2465 SplatBitSize != 64)
2466 return SDValue();
2467
2468 // If the value isn't an integer type we will have to bitcast
2469 // from an integer type first. Also, if there are any undefs, we must
2470 // lower them to defined values first.
2471 if (ResTy.isInteger() && !HasAnyUndefs)
2472 return Op;
2473
2474 EVT ViaVecTy;
2475
2476 switch (SplatBitSize) {
2477 default:
2478 return SDValue();
2479 case 8:
2480 ViaVecTy = MVT::v16i8;
2481 break;
2482 case 16:
2483 ViaVecTy = MVT::v8i16;
2484 break;
2485 case 32:
2486 ViaVecTy = MVT::v4i32;
2487 break;
2488 case 64:
2489 // There's no fill.d to fall back on for 64-bit values
2490 return SDValue();
2491 }
2492
2493 // SelectionDAG::getConstant will promote SplatValue appropriately.
2494 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2495
2496 // Bitcast to the type we originally wanted
2497 if (ViaVecTy != ResTy)
2498 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2499
2500 return Result;
2501 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
2502 return Op;
2504 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2505 // The resulting code is the same length as the expansion, but it doesn't
2506 // use memory operations
2507 EVT ResTy = Node->getValueType(0);
2508
2509 assert(ResTy.isVector());
2510
2511 unsigned NumElts = ResTy.getVectorNumElements();
2512 SDValue Vector = DAG.getUNDEF(ResTy);
2513 for (unsigned i = 0; i < NumElts; ++i) {
2515 Node->getOperand(i),
2516 DAG.getConstant(i, DL, MVT::i32));
2517 }
2518 return Vector;
2519 }
2520
2521 return SDValue();
2522}
2523
2524// Lower VECTOR_SHUFFLE into SHF (if possible).
2525//
2526// SHF splits the vector into blocks of four elements, then shuffles these
2527// elements according to a <4 x i2> constant (encoded as an integer immediate).
2528//
2529// It is therefore possible to lower into SHF when the mask takes the form:
2530// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2531// When undef's appear they are treated as if they were whatever value is
2532// necessary in order to fit the above forms.
2533//
2534// For example:
2535// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2536// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2537// i32 7, i32 6, i32 5, i32 4>
2538// is lowered to:
2539// (SHF_H $w0, $w1, 27)
2540// where the 27 comes from:
2541// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2543 SmallVector<int, 16> Indices,
2544 SelectionDAG &DAG) {
2545 int SHFIndices[4] = { -1, -1, -1, -1 };
2546
2547 if (Indices.size() < 4)
2548 return SDValue();
2549
2550 for (unsigned i = 0; i < 4; ++i) {
2551 for (unsigned j = i; j < Indices.size(); j += 4) {
2552 int Idx = Indices[j];
2553
2554 // Convert from vector index to 4-element subvector index
2555 // If an index refers to an element outside of the subvector then give up
2556 if (Idx != -1) {
2557 Idx -= 4 * (j / 4);
2558 if (Idx < 0 || Idx >= 4)
2559 return SDValue();
2560 }
2561
2562 // If the mask has an undef, replace it with the current index.
2563 // Note that it might still be undef if the current index is also undef
2564 if (SHFIndices[i] == -1)
2565 SHFIndices[i] = Idx;
2566
2567 // Check that non-undef values are the same as in the mask. If they
2568 // aren't then give up
2569 if (!(Idx == -1 || Idx == SHFIndices[i]))
2570 return SDValue();
2571 }
2572 }
2573
2574 // Calculate the immediate. Replace any remaining undefs with zero
2575 APInt Imm(32, 0);
2576 for (int i = 3; i >= 0; --i) {
2577 int Idx = SHFIndices[i];
2578
2579 if (Idx == -1)
2580 Idx = 0;
2581
2582 Imm <<= 2;
2583 Imm |= Idx & 0x3;
2584 }
2585
2586 SDLoc DL(Op);
2587 return DAG.getNode(MipsISD::SHF, DL, ResTy,
2588 DAG.getTargetConstant(Imm, DL, MVT::i32),
2589 Op->getOperand(0));
2590}
2591
2592/// Determine whether a range fits a regular pattern of values.
2593/// This function accounts for the possibility of jumping over the End iterator.
2594template <typename ValType>
2595static bool
2597 unsigned CheckStride,
2599 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2600 auto &I = Begin;
2601
2602 while (I != End) {
2603 if (*I != -1 && *I != ExpectedIndex)
2604 return false;
2605 ExpectedIndex += ExpectedIndexStride;
2606
2607 // Incrementing past End is undefined behaviour so we must increment one
2608 // step at a time and check for End at each step.
2609 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
2610 ; // Empty loop body.
2611 }
2612 return true;
2613}
2614
2615// Determine whether VECTOR_SHUFFLE is a SPLATI.
2616//
2617// It is a SPLATI when the mask is:
2618// <x, x, x, ...>
2619// where x is any valid index.
2620//
2621// When undef's appear in the mask they are treated as if they were whatever
2622// value is necessary in order to fit the above form.
2624 SmallVector<int, 16> Indices,
2625 SelectionDAG &DAG) {
2626 assert((Indices.size() % 2) == 0);
2627
2628 int SplatIndex = -1;
2629 for (const auto &V : Indices) {
2630 if (V != -1) {
2631 SplatIndex = V;
2632 break;
2633 }
2634 }
2635
2636 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex,
2637 0);
2638}
2639
2640// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2641//
2642// ILVEV interleaves the even elements from each vector.
2643//
2644// It is possible to lower into ILVEV when the mask consists of two of the
2645// following forms interleaved:
2646// <0, 2, 4, ...>
2647// <n, n+2, n+4, ...>
2648// where n is the number of elements in the vector.
2649// For example:
2650// <0, 0, 2, 2, 4, 4, ...>
2651// <0, n, 2, n+2, 4, n+4, ...>
2652//
2653// When undef's appear in the mask they are treated as if they were whatever
2654// value is necessary in order to fit the above forms.
2656 SmallVector<int, 16> Indices,
2657 SelectionDAG &DAG) {
2658 assert((Indices.size() % 2) == 0);
2659
2660 SDValue Wt;
2661 SDValue Ws;
2662 const auto &Begin = Indices.begin();
2663 const auto &End = Indices.end();
2664
2665 // Check even elements are taken from the even elements of one half or the
2666 // other and pick an operand accordingly.
2667 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2668 Wt = Op->getOperand(0);
2669 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2))
2670 Wt = Op->getOperand(1);
2671 else
2672 return SDValue();
2673
2674 // Check odd elements are taken from the even elements of one half or the
2675 // other and pick an operand accordingly.
2676 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2677 Ws = Op->getOperand(0);
2678 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2))
2679 Ws = Op->getOperand(1);
2680 else
2681 return SDValue();
2682
2683 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt);
2684}
2685
2686// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2687//
2688// ILVOD interleaves the odd elements from each vector.
2689//
2690// It is possible to lower into ILVOD when the mask consists of two of the
2691// following forms interleaved:
2692// <1, 3, 5, ...>
2693// <n+1, n+3, n+5, ...>
2694// where n is the number of elements in the vector.
2695// For example:
2696// <1, 1, 3, 3, 5, 5, ...>
2697// <1, n+1, 3, n+3, 5, n+5, ...>
2698//
2699// When undef's appear in the mask they are treated as if they were whatever
2700// value is necessary in order to fit the above forms.
2702 SmallVector<int, 16> Indices,
2703 SelectionDAG &DAG) {
2704 assert((Indices.size() % 2) == 0);
2705
2706 SDValue Wt;
2707 SDValue Ws;
2708 const auto &Begin = Indices.begin();
2709 const auto &End = Indices.end();
2710
2711 // Check even elements are taken from the odd elements of one half or the
2712 // other and pick an operand accordingly.
2713 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2714 Wt = Op->getOperand(0);
2715 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2))
2716 Wt = Op->getOperand(1);
2717 else
2718 return SDValue();
2719
2720 // Check odd elements are taken from the odd elements of one half or the
2721 // other and pick an operand accordingly.
2722 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2723 Ws = Op->getOperand(0);
2724 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2))
2725 Ws = Op->getOperand(1);
2726 else
2727 return SDValue();
2728
2729 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Wt, Ws);
2730}
2731
2732// Lower VECTOR_SHUFFLE into ILVR (if possible).
2733//
2734// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2735// each vector.
2736//
2737// It is possible to lower into ILVR when the mask consists of two of the
2738// following forms interleaved:
2739// <0, 1, 2, ...>
2740// <n, n+1, n+2, ...>
2741// where n is the number of elements in the vector.
2742// For example:
2743// <0, 0, 1, 1, 2, 2, ...>
2744// <0, n, 1, n+1, 2, n+2, ...>
2745//
2746// When undef's appear in the mask they are treated as if they were whatever
2747// value is necessary in order to fit the above forms.
2749 SmallVector<int, 16> Indices,
2750 SelectionDAG &DAG) {
2751 assert((Indices.size() % 2) == 0);
2752
2753 SDValue Wt;
2754 SDValue Ws;
2755 const auto &Begin = Indices.begin();
2756 const auto &End = Indices.end();
2757
2758 // Check even elements are taken from the right (lowest-indexed) elements of
2759 // one half or the other and pick an operand accordingly.
2760 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2761 Wt = Op->getOperand(0);
2762 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1))
2763 Wt = Op->getOperand(1);
2764 else
2765 return SDValue();
2766
2767 // Check odd elements are taken from the right (lowest-indexed) elements of
2768 // one half or the other and pick an operand accordingly.
2769 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2770 Ws = Op->getOperand(0);
2771 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1))
2772 Ws = Op->getOperand(1);
2773 else
2774 return SDValue();
2775
2776 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt);
2777}
2778
2779// Lower VECTOR_SHUFFLE into ILVL (if possible).
2780//
2781// ILVL interleaves consecutive elements from the left (highest-indexed) half
2782// of each vector.
2783//
2784// It is possible to lower into ILVL when the mask consists of two of the
2785// following forms interleaved:
2786// <x, x+1, x+2, ...>
2787// <n+x, n+x+1, n+x+2, ...>
2788// where n is the number of elements in the vector and x is half n.
2789// For example:
2790// <x, x, x+1, x+1, x+2, x+2, ...>
2791// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2792//
2793// When undef's appear in the mask they are treated as if they were whatever
2794// value is necessary in order to fit the above forms.
2796 SmallVector<int, 16> Indices,
2797 SelectionDAG &DAG) {
2798 assert((Indices.size() % 2) == 0);
2799
2800 unsigned HalfSize = Indices.size() / 2;
2801 SDValue Wt;
2802 SDValue Ws;
2803 const auto &Begin = Indices.begin();
2804 const auto &End = Indices.end();
2805
2806 // Check even elements are taken from the left (highest-indexed) elements of
2807 // one half or the other and pick an operand accordingly.
2808 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2809 Wt = Op->getOperand(0);
2810 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1))
2811 Wt = Op->getOperand(1);
2812 else
2813 return SDValue();
2814
2815 // Check odd elements are taken from the left (highest-indexed) elements of
2816 // one half or the other and pick an operand accordingly.
2817 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2818 Ws = Op->getOperand(0);
2819 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
2820 1))
2821 Ws = Op->getOperand(1);
2822 else
2823 return SDValue();
2824
2825 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt);
2826}
2827
2828// Lower VECTOR_SHUFFLE into PCKEV (if possible).
2829//
2830// PCKEV copies the even elements of each vector into the result vector.
2831//
2832// It is possible to lower into PCKEV when the mask consists of two of the
2833// following forms concatenated:
2834// <0, 2, 4, ...>
2835// <n, n+2, n+4, ...>
2836// where n is the number of elements in the vector.
2837// For example:
2838// <0, 2, 4, ..., 0, 2, 4, ...>
2839// <0, 2, 4, ..., n, n+2, n+4, ...>
2840//
2841// When undef's appear in the mask they are treated as if they were whatever
2842// value is necessary in order to fit the above forms.
2844 SmallVector<int, 16> Indices,
2845 SelectionDAG &DAG) {
2846 assert((Indices.size() % 2) == 0);
2847
2848 SDValue Wt;
2849 SDValue Ws;
2850 const auto &Begin = Indices.begin();
2851 const auto &Mid = Indices.begin() + Indices.size() / 2;
2852 const auto &End = Indices.end();
2853
2854 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2855 Wt = Op->getOperand(0);
2856 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2))
2857 Wt = Op->getOperand(1);
2858 else
2859 return SDValue();
2860
2861 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2862 Ws = Op->getOperand(0);
2863 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2))
2864 Ws = Op->getOperand(1);
2865 else
2866 return SDValue();
2867
2868 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt);
2869}
2870
2871// Lower VECTOR_SHUFFLE into PCKOD (if possible).
2872//
2873// PCKOD copies the odd elements of each vector into the result vector.
2874//
2875// It is possible to lower into PCKOD when the mask consists of two of the
2876// following forms concatenated:
2877// <1, 3, 5, ...>
2878// <n+1, n+3, n+5, ...>
2879// where n is the number of elements in the vector.
2880// For example:
2881// <1, 3, 5, ..., 1, 3, 5, ...>
2882// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2883//
2884// When undef's appear in the mask they are treated as if they were whatever
2885// value is necessary in order to fit the above forms.
2887 SmallVector<int, 16> Indices,
2888 SelectionDAG &DAG) {
2889 assert((Indices.size() % 2) == 0);
2890
2891 SDValue Wt;
2892 SDValue Ws;
2893 const auto &Begin = Indices.begin();
2894 const auto &Mid = Indices.begin() + Indices.size() / 2;
2895 const auto &End = Indices.end();
2896
2897 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2898 Wt = Op->getOperand(0);
2899 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2))
2900 Wt = Op->getOperand(1);
2901 else
2902 return SDValue();
2903
2904 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2905 Ws = Op->getOperand(0);
2906 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2))
2907 Ws = Op->getOperand(1);
2908 else
2909 return SDValue();
2910
2911 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt);
2912}
2913
2914// Lower VECTOR_SHUFFLE into VSHF.
2915//
2916// This mostly consists of converting the shuffle indices in Indices into a
2917// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2918// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2919// if the type is v8i16 and all the indices are less than 8 then the second
2920// operand is unused and can be replaced with anything. We choose to replace it
2921// with the used operand since this reduces the number of instructions overall.
2923 const SmallVector<int, 16> &Indices,
2924 SelectionDAG &DAG) {
2926 SDValue Op0;
2927 SDValue Op1;
2928 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
2929 EVT MaskEltTy = MaskVecTy.getVectorElementType();
2930 bool Using1stVec = false;
2931 bool Using2ndVec = false;
2932 SDLoc DL(Op);
2933 int ResTyNumElts = ResTy.getVectorNumElements();
2934
2935 for (int i = 0; i < ResTyNumElts; ++i) {
2936 // Idx == -1 means UNDEF
2937 int Idx = Indices[i];
2938
2939 if (0 <= Idx && Idx < ResTyNumElts)
2940 Using1stVec = true;
2941 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
2942 Using2ndVec = true;
2943 }
2944
2945 for (int Idx : Indices)
2946 Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy));
2947
2948 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2949
2950 if (Using1stVec && Using2ndVec) {
2951 Op0 = Op->getOperand(0);
2952 Op1 = Op->getOperand(1);
2953 } else if (Using1stVec)
2954 Op0 = Op1 = Op->getOperand(0);
2955 else if (Using2ndVec)
2956 Op0 = Op1 = Op->getOperand(1);
2957 else
2958 llvm_unreachable("shuffle vector mask references neither vector operand?");
2959
2960 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2961 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2962 // VSHF concatenates the vectors in a bitwise fashion:
2963 // <0b00, 0b01> + <0b10, 0b11> ->
2964 // 0b0100 + 0b1110 -> 0b01001110
2965 // <0b10, 0b11, 0b00, 0b01>
2966 // We must therefore swap the operands to get the correct result.
2967 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0);
2968}
2969
2970// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
2971// indices in the shuffle.
2972SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2973 SelectionDAG &DAG) const {
2974 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
2975 EVT ResTy = Op->getValueType(0);
2976
2977 if (!ResTy.is128BitVector())
2978 return SDValue();
2979
2980 int ResTyNumElts = ResTy.getVectorNumElements();
2981 SmallVector<int, 16> Indices;
2982
2983 for (int i = 0; i < ResTyNumElts; ++i)
2984 Indices.push_back(Node->getMaskElt(i));
2985
2986 // splati.[bhwd] is preferable to the others but is matched from
2987 // MipsISD::VSHF.
2988 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
2989 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
2991 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
2992 return Result;
2993 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
2994 return Result;
2995 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
2996 return Result;
2997 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
2998 return Result;
2999 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
3000 return Result;
3001 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
3002 return Result;
3003 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
3004 return Result;
3005 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
3006}
3007
3009MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
3010 MachineBasicBlock *BB) const {
3011 // $bb:
3012 // bposge32_pseudo $vr0
3013 // =>
3014 // $bb:
3015 // bposge32 $tbb
3016 // $fbb:
3017 // li $vr2, 0
3018 // b $sink
3019 // $tbb:
3020 // li $vr1, 1
3021 // $sink:
3022 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3023
3026 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3027 DebugLoc DL = MI.getDebugLoc();
3028 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3030 MachineFunction *F = BB->getParent();
3031 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3032 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3033 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3034 F->insert(It, FBB);
3035 F->insert(It, TBB);
3036 F->insert(It, Sink);
3037
3038 // Transfer the remainder of BB and its successor edges to Sink.
3039 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3040 BB->end());
3041 Sink->transferSuccessorsAndUpdatePHIs(BB);
3042
3043 // Add successors.
3044 BB->addSuccessor(FBB);
3045 BB->addSuccessor(TBB);
3046 FBB->addSuccessor(Sink);
3047 TBB->addSuccessor(Sink);
3048
3049 // Insert the real bposge32 instruction to $BB.
3050 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
3051 // Insert the real bposge32c instruction to $BB.
3052 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB);
3053
3054 // Fill $FBB.
3055 Register VR2 = RegInfo.createVirtualRegister(RC);
3056 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
3057 .addReg(Mips::ZERO).addImm(0);
3058 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3059
3060 // Fill $TBB.
3061 Register VR1 = RegInfo.createVirtualRegister(RC);
3062 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
3063 .addReg(Mips::ZERO).addImm(1);
3064
3065 // Insert phi function to $Sink.
3066 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3067 MI.getOperand(0).getReg())
3068 .addReg(VR2)
3069 .addMBB(FBB)
3070 .addReg(VR1)
3071 .addMBB(TBB);
3072
3073 MI.eraseFromParent(); // The pseudo instruction is gone now.
3074 return Sink;
3075}
3076
3077MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3078 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3079 // $bb:
3080 // vany_nonzero $rd, $ws
3081 // =>
3082 // $bb:
3083 // bnz.b $ws, $tbb
3084 // b $fbb
3085 // $fbb:
3086 // li $rd1, 0
3087 // b $sink
3088 // $tbb:
3089 // li $rd2, 1
3090 // $sink:
3091 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3092
3095 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3096 DebugLoc DL = MI.getDebugLoc();
3097 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3099 MachineFunction *F = BB->getParent();
3100 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3101 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3102 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3103 F->insert(It, FBB);
3104 F->insert(It, TBB);
3105 F->insert(It, Sink);
3106
3107 // Transfer the remainder of BB and its successor edges to Sink.
3108 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3109 BB->end());
3110 Sink->transferSuccessorsAndUpdatePHIs(BB);
3111
3112 // Add successors.
3113 BB->addSuccessor(FBB);
3114 BB->addSuccessor(TBB);
3115 FBB->addSuccessor(Sink);
3116 TBB->addSuccessor(Sink);
3117
3118 // Insert the real bnz.b instruction to $BB.
3119 BuildMI(BB, DL, TII->get(BranchOp))
3120 .addReg(MI.getOperand(1).getReg())
3121 .addMBB(TBB);
3122
3123 // Fill $FBB.
3124 Register RD1 = RegInfo.createVirtualRegister(RC);
3125 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
3126 .addReg(Mips::ZERO).addImm(0);
3127 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3128
3129 // Fill $TBB.
3130 Register RD2 = RegInfo.createVirtualRegister(RC);
3131 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
3132 .addReg(Mips::ZERO).addImm(1);
3133
3134 // Insert phi function to $Sink.
3135 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3136 MI.getOperand(0).getReg())
3137 .addReg(RD1)
3138 .addMBB(FBB)
3139 .addReg(RD2)
3140 .addMBB(TBB);
3141
3142 MI.eraseFromParent(); // The pseudo instruction is gone now.
3143 return Sink;
3144}
3145
3146// Emit the COPY_FW pseudo instruction.
3147//
3148// copy_fw_pseudo $fd, $ws, n
3149// =>
3150// copy_u_w $rt, $ws, $n
3151// mtc1 $rt, $fd
3152//
3153// When n is zero, the equivalent operation can be performed with (potentially)
3154// zero instructions due to register overlaps. This optimization is never valid
3155// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3157MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3158 MachineBasicBlock *BB) const {
3161 DebugLoc DL = MI.getDebugLoc();
3162 Register Fd = MI.getOperand(0).getReg();
3163 Register Ws = MI.getOperand(1).getReg();
3164 unsigned Lane = MI.getOperand(2).getImm();
3165
3166 if (Lane == 0) {
3167 unsigned Wt = Ws;
3168 if (!Subtarget.useOddSPReg()) {
3169 // We must copy to an even-numbered MSA register so that the
3170 // single-precision sub-register is also guaranteed to be even-numbered.
3171 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
3172
3173 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
3174 }
3175
3176 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3177 } else {
3178 Register Wt = RegInfo.createVirtualRegister(
3179 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3180 : &Mips::MSA128WEvensRegClass);
3181
3182 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
3183 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3184 }
3185
3186 MI.eraseFromParent(); // The pseudo instruction is gone now.
3187 return BB;
3188}
3189
3190// Emit the COPY_FD pseudo instruction.
3191//
3192// copy_fd_pseudo $fd, $ws, n
3193// =>
3194// splati.d $wt, $ws, $n
3195// copy $fd, $wt:sub_64
3196//
3197// When n is zero, the equivalent operation can be performed with (potentially)
3198// zero instructions due to register overlaps. This optimization is always
3199// valid because FR=1 mode which is the only supported mode in MSA.
3201MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3202 MachineBasicBlock *BB) const {
3204
3207 Register Fd = MI.getOperand(0).getReg();
3208 Register Ws = MI.getOperand(1).getReg();
3209 unsigned Lane = MI.getOperand(2).getImm() * 2;
3210 DebugLoc DL = MI.getDebugLoc();
3211
3212 if (Lane == 0)
3213 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64);
3214 else {
3215 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3216
3217 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
3218 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64);
3219 }
3220
3221 MI.eraseFromParent(); // The pseudo instruction is gone now.
3222 return BB;
3223}
3224
3225// Emit the INSERT_FW pseudo instruction.
3226//
3227// insert_fw_pseudo $wd, $wd_in, $n, $fs
3228// =>
3229// subreg_to_reg $wt:sub_lo, $fs
3230// insve_w $wd[$n], $wd_in, $wt[0]
3232MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3233 MachineBasicBlock *BB) const {
3236 DebugLoc DL = MI.getDebugLoc();
3237 Register Wd = MI.getOperand(0).getReg();
3238 Register Wd_in = MI.getOperand(1).getReg();
3239 unsigned Lane = MI.getOperand(2).getImm();
3240 Register Fs = MI.getOperand(3).getReg();
3241 Register Wt = RegInfo.createVirtualRegister(
3242 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3243 : &Mips::MSA128WEvensRegClass);
3244
3245 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3246 .addImm(0)
3247 .addReg(Fs)
3248 .addImm(Mips::sub_lo);
3249 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
3250 .addReg(Wd_in)
3251 .addImm(Lane)
3252 .addReg(Wt)
3253 .addImm(0);
3254
3255 MI.eraseFromParent(); // The pseudo instruction is gone now.
3256 return BB;
3257}
3258
3259// Emit the INSERT_FD pseudo instruction.
3260//
3261// insert_fd_pseudo $wd, $fs, n
3262// =>
3263// subreg_to_reg $wt:sub_64, $fs
3264// insve_d $wd[$n], $wd_in, $wt[0]
3266MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3267 MachineBasicBlock *BB) const {
3269
3272 DebugLoc DL = MI.getDebugLoc();
3273 Register Wd = MI.getOperand(0).getReg();
3274 Register Wd_in = MI.getOperand(1).getReg();
3275 unsigned Lane = MI.getOperand(2).getImm();
3276 Register Fs = MI.getOperand(3).getReg();
3277 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3278
3279 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3280 .addImm(0)
3281 .addReg(Fs)
3282 .addImm(Mips::sub_64);
3283 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
3284 .addReg(Wd_in)
3285 .addImm(Lane)
3286 .addReg(Wt)
3287 .addImm(0);
3288
3289 MI.eraseFromParent(); // The pseudo instruction is gone now.
3290 return BB;
3291}
3292
3293// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3294//
3295// For integer:
3296// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3297// =>
3298// (SLL $lanetmp1, $lane, <log2size)
3299// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3300// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3301// (NEG $lanetmp2, $lanetmp1)
3302// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3303//
3304// For floating point:
3305// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3306// =>
3307// (SUBREG_TO_REG $wt, $fs, <subreg>)
3308// (SLL $lanetmp1, $lane, <log2size)
3309// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3310// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3311// (NEG $lanetmp2, $lanetmp1)
3312// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3313MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3314 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3315 bool IsFP) const {
3318 DebugLoc DL = MI.getDebugLoc();
3319 Register Wd = MI.getOperand(0).getReg();
3320 Register SrcVecReg = MI.getOperand(1).getReg();
3321 Register LaneReg = MI.getOperand(2).getReg();
3322 Register SrcValReg = MI.getOperand(3).getReg();
3323
3324 const TargetRegisterClass *VecRC = nullptr;
3325 // FIXME: This should be true for N32 too.
3326 const TargetRegisterClass *GPRRC =
3327 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3328 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0;
3329 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL;
3330 unsigned EltLog2Size;
3331 unsigned InsertOp = 0;
3332 unsigned InsveOp = 0;
3333 switch (EltSizeInBytes) {
3334 default:
3335 llvm_unreachable("Unexpected size");
3336 case 1:
3337 EltLog2Size = 0;
3338 InsertOp = Mips::INSERT_B;
3339 InsveOp = Mips::INSVE_B;
3340 VecRC = &Mips::MSA128BRegClass;
3341 break;
3342 case 2:
3343 EltLog2Size = 1;
3344 InsertOp = Mips::INSERT_H;
3345 InsveOp = Mips::INSVE_H;
3346 VecRC = &Mips::MSA128HRegClass;
3347 break;
3348 case 4:
3349 EltLog2Size = 2;
3350 InsertOp = Mips::INSERT_W;
3351 InsveOp = Mips::INSVE_W;
3352 VecRC = &Mips::MSA128WRegClass;
3353 break;
3354 case 8:
3355 EltLog2Size = 3;
3356 InsertOp = Mips::INSERT_D;
3357 InsveOp = Mips::INSVE_D;
3358 VecRC = &Mips::MSA128DRegClass;
3359 break;
3360 }
3361
3362 if (IsFP) {
3363 Register Wt = RegInfo.createVirtualRegister(VecRC);
3364 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3365 .addImm(0)
3366 .addReg(SrcValReg)
3367 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
3368 SrcValReg = Wt;
3369 }
3370
3371 // Convert the lane index into a byte index
3372 if (EltSizeInBytes != 1) {
3373 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
3374 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1)
3375 .addReg(LaneReg)
3376 .addImm(EltLog2Size);
3377 LaneReg = LaneTmp1;
3378 }
3379
3380 // Rotate bytes around so that the desired lane is element zero
3381 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC);
3382 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1)
3383 .addReg(SrcVecReg)
3384 .addReg(SrcVecReg)
3385 .addReg(LaneReg, 0, SubRegIdx);
3386
3387 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC);
3388 if (IsFP) {
3389 // Use insve.df to insert to element zero
3390 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2)
3391 .addReg(WdTmp1)
3392 .addImm(0)
3393 .addReg(SrcValReg)
3394 .addImm(0);
3395 } else {
3396 // Use insert.df to insert to element zero
3397 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2)
3398 .addReg(WdTmp1)
3399 .addReg(SrcValReg)
3400 .addImm(0);
3401 }
3402
3403 // Rotate elements the rest of the way for a full rotation.
3404 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3405 // the lane index to do this.
3406 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
3407 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
3408 LaneTmp2)
3409 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
3410 .addReg(LaneReg);
3411 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd)
3412 .addReg(WdTmp2)
3413 .addReg(WdTmp2)
3414 .addReg(LaneTmp2, 0, SubRegIdx);
3415
3416 MI.eraseFromParent(); // The pseudo instruction is gone now.
3417 return BB;
3418}
3419
3420// Emit the FILL_FW pseudo instruction.
3421//
3422// fill_fw_pseudo $wd, $fs
3423// =>
3424// implicit_def $wt1
3425// insert_subreg $wt2:subreg_lo, $wt1, $fs
3426// splati.w $wd, $wt2[0]
3428MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3429 MachineBasicBlock *BB) const {
3432 DebugLoc DL = MI.getDebugLoc();
3433 Register Wd = MI.getOperand(0).getReg();
3434 Register Fs = MI.getOperand(1).getReg();
3435 Register Wt1 = RegInfo.createVirtualRegister(
3436 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3437 : &Mips::MSA128WEvensRegClass);
3438 Register Wt2 = RegInfo.createVirtualRegister(
3439 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3440 : &Mips::MSA128WEvensRegClass);
3441
3442 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3443 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3444 .addReg(Wt1)
3445 .addReg(Fs)
3446 .addImm(Mips::sub_lo);
3447 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
3448
3449 MI.eraseFromParent(); // The pseudo instruction is gone now.
3450 return BB;
3451}
3452
3453// Emit the FILL_FD pseudo instruction.
3454//
3455// fill_fd_pseudo $wd, $fs
3456// =>
3457// implicit_def $wt1
3458// insert_subreg $wt2:subreg_64, $wt1, $fs
3459// splati.d $wd, $wt2[0]
3461MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3462 MachineBasicBlock *BB) const {
3464
3467 DebugLoc DL = MI.getDebugLoc();
3468 Register Wd = MI.getOperand(0).getReg();
3469 Register Fs = MI.getOperand(1).getReg();
3470 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3471 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3472
3473 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3474 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3475 .addReg(Wt1)
3476 .addReg(Fs)
3477 .addImm(Mips::sub_64);
3478 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
3479
3480 MI.eraseFromParent(); // The pseudo instruction is gone now.
3481 return BB;
3482}
3483
3484// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3485// register.
3486//
3487// STF16 MSA128F16:$wd, mem_simm10:$addr
3488// =>
3489// copy_u.h $rtemp,$wd[0]
3490// sh $rtemp, $addr
3491//
3492// Safety: We can't use st.h & co as they would over write the memory after
3493// the destination. It would require half floats be allocated 16 bytes(!) of
3494// space.
3496MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
3497 MachineBasicBlock *BB) const {
3498
3501 DebugLoc DL = MI.getDebugLoc();
3502 Register Ws = MI.getOperand(0).getReg();
3503 Register Rt = MI.getOperand(1).getReg();
3504 const MachineMemOperand &MMO = **MI.memoperands_begin();
3505 unsigned Imm = MMO.getOffset();
3506
3507 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3508 // spill and reload can expand as a GPR64 operand. Examine the
3509 // operand in detail and default to ABI.
3510 const TargetRegisterClass *RC =
3511 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3512 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3513 : &Mips::GPR64RegClass);
3514 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3515 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3516
3517 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
3518 if(!UsingMips32) {
3519 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
3520 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp)
3521 .addImm(0)
3522 .addReg(Rs)
3523 .addImm(Mips::sub_32);
3524 Rs = Tmp;
3525 }
3526 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
3527 .addReg(Rs)
3528 .addReg(Rt)
3529 .addImm(Imm)
3531 &MMO, MMO.getOffset(), MMO.getSize()));
3532
3533 MI.eraseFromParent();
3534 return BB;
3535}
3536
3537// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3538//
3539// LD_F16 MSA128F16:$wd, mem_simm10:$addr
3540// =>
3541// lh $rtemp, $addr
3542// fill.h $wd, $rtemp
3543//
3544// Safety: We can't use ld.h & co as they over-read from the source.
3545// Additionally, if the address is not modulo 16, 2 cases can occur:
3546// a) Segmentation fault as the load instruction reads from a memory page
3547// memory it's not supposed to.
3548// b) The load crosses an implementation specific boundary, requiring OS
3549// intervention.
3551MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
3552 MachineBasicBlock *BB) const {
3553
3556 DebugLoc DL = MI.getDebugLoc();
3557 Register Wd = MI.getOperand(0).getReg();
3558
3559 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3560 // spill and reload can expand as a GPR64 operand. Examine the
3561 // operand in detail and default to ABI.
3562 const TargetRegisterClass *RC =
3563 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3564 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3565 : &Mips::GPR64RegClass);
3566
3567 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3568 Register Rt = RegInfo.createVirtualRegister(RC);
3569
3571 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
3572 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
3573 MIB.add(MO);
3574
3575 if(!UsingMips32) {
3576 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3577 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32);
3578 Rt = Tmp;
3579 }
3580
3581 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
3582
3583 MI.eraseFromParent();
3584 return BB;
3585}
3586
3587// Emit the FPROUND_PSEUDO instruction.
3588//
3589// Round an FGR64Opnd, FGR32Opnd to an f16.
3590//
3591// Safety: Cycle the operand through the GPRs so the result always ends up
3592// the correct MSA register.
3593//
3594// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3595// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3596// (which they can be, as the MSA registers are defined to alias the
3597// FPU's 64 bit and 32 bit registers) the result can be accessed using
3598// the correct register class. That requires operands be tie-able across
3599// register classes which have a sub/super register class relationship.
3600//
3601// For FPG32Opnd:
3602//
3603// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3604// =>
3605// mfc1 $rtemp, $fs
3606// fill.w $rtemp, $wtemp
3607// fexdo.w $wd, $wtemp, $wtemp
3608//
3609// For FPG64Opnd on mips32r2+:
3610//
3611// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3612// =>
3613// mfc1 $rtemp, $fs
3614// fill.w $rtemp, $wtemp
3615// mfhc1 $rtemp2, $fs
3616// insert.w $wtemp[1], $rtemp2
3617// insert.w $wtemp[3], $rtemp2
3618// fexdo.w $wtemp2, $wtemp, $wtemp
3619// fexdo.h $wd, $temp2, $temp2
3620//
3621// For FGR64Opnd on mips64r2+:
3622//
3623// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3624// =>
3625// dmfc1 $rtemp, $fs
3626// fill.d $rtemp, $wtemp
3627// fexdo.w $wtemp2, $wtemp, $wtemp
3628// fexdo.h $wd, $wtemp2, $wtemp2
3629//
3630// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3631// undef bits are "just right" and the exception enable bits are
3632// set. By using fill.w to replicate $fs into all elements over
3633// insert.w for one element, we avoid that potiential case. If
3634// fexdo.[hw] causes an exception in, the exception is valid and it
3635// occurs for all elements.
3637MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
3639 bool IsFGR64) const {
3640
3641 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3642 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3643 // it.
3645
3646 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3647 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3648
3650 DebugLoc DL = MI.getDebugLoc();
3651 Register Wd = MI.getOperand(0).getReg();
3652 Register Fs = MI.getOperand(1).getReg();
3653
3655 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3656 const TargetRegisterClass *GPRRC =
3657 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3658 unsigned MFC1Opc = IsFGR64onMips64
3659 ? Mips::DMFC1
3660 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1);
3661 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
3662
3663 // Perform the register class copy as mentioned above.
3664 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3665 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
3666 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
3667 unsigned WPHI = Wtemp;
3668
3669 if (IsFGR64onMips32) {
3670 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3671 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
3672 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3673 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3674 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
3675 .addReg(Wtemp)
3676 .addReg(Rtemp2)
3677 .addImm(1);
3678 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3)
3679 .addReg(Wtemp2)
3680 .addReg(Rtemp2)
3681 .addImm(3);
3682 WPHI = Wtemp3;
3683 }
3684
3685 if (IsFGR64) {
3686 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3687 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
3688 .addReg(WPHI)
3689 .addReg(WPHI);
3690 WPHI = Wtemp2;
3691 }
3692
3693 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI);
3694
3695 MI.eraseFromParent();
3696 return BB;
3697}
3698
3699// Emit the FPEXTEND_PSEUDO instruction.
3700//
3701// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3702//
3703// Safety: Cycle the result through the GPRs so the result always ends up
3704// the correct floating point register.
3705//
3706// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3707// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3708// (which they can be, as the MSA registers are defined to alias the
3709// FPU's 64 bit and 32 bit registers) the result can be accessed using
3710// the correct register class. That requires operands be tie-able across
3711// register classes which have a sub/super register class relationship. I
3712// haven't checked.
3713//
3714// For FGR32Opnd:
3715//
3716// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3717// =>
3718// fexupr.w $wtemp, $ws
3719// copy_s.w $rtemp, $ws[0]
3720// mtc1 $rtemp, $fd
3721//
3722// For FGR64Opnd on Mips64:
3723//
3724// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3725// =>
3726// fexupr.w $wtemp, $ws
3727// fexupr.d $wtemp2, $wtemp
3728// copy_s.d $rtemp, $wtemp2s[0]
3729// dmtc1 $rtemp, $fd
3730//
3731// For FGR64Opnd on Mips32:
3732//
3733// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3734// =>
3735// fexupr.w $wtemp, $ws
3736// fexupr.d $wtemp2, $wtemp
3737// copy_s.w $rtemp, $wtemp2[0]
3738// mtc1 $rtemp, $ftemp
3739// copy_s.w $rtemp2, $wtemp2[1]
3740// $fd = mthc1 $rtemp2, $ftemp
3742MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
3744 bool IsFGR64) const {
3745
3746 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3747 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3748 // it.
3750
3751 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3752 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3753
3755 DebugLoc DL = MI.getDebugLoc();
3756 Register Fd = MI.getOperand(0).getReg();
3757 Register Ws = MI.getOperand(1).getReg();
3758
3760 const TargetRegisterClass *GPRRC =
3761 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3762 unsigned MTC1Opc = IsFGR64onMips64
3763 ? Mips::DMTC1
3764 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
3765 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
3766
3767 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3768 Register WPHI = Wtemp;
3769
3770 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
3771 if (IsFGR64) {
3772 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3773 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp);
3774 }
3775
3776 // Perform the safety regclass copy mentioned above.
3777 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3778 Register FPRPHI = IsFGR64onMips32
3779 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
3780 : Fd;
3781 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
3782 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
3783
3784 if (IsFGR64onMips32) {
3785 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3786 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
3787 .addReg(WPHI)
3788 .addImm(1);
3789 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
3790 .addReg(FPRPHI)
3791 .addReg(Rtemp2);
3792 }
3793
3794 MI.eraseFromParent();
3795 return BB;
3796}
3797
3798// Emit the FEXP2_W_1 pseudo instructions.
3799//
3800// fexp2_w_1_pseudo $wd, $wt
3801// =>
3802// ldi.w $ws, 1
3803// fexp2.w $wd, $ws, $wt
3805MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
3806 MachineBasicBlock *BB) const {
3809 const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
3810 Register Ws1 = RegInfo.createVirtualRegister(RC);
3811 Register Ws2 = RegInfo.createVirtualRegister(RC);
3812 DebugLoc DL = MI.getDebugLoc();
3813
3814 // Splat 1.0 into a vector
3815 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
3816 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
3817
3818 // Emit 1.0 * fexp2(Wt)
3819 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg())
3820 .addReg(Ws2)
3821 .addReg(MI.getOperand(1).getReg());
3822
3823 MI.eraseFromParent(); // The pseudo instruction is gone now.
3824 return BB;
3825}
3826
3827// Emit the FEXP2_D_1 pseudo instructions.
3828//
3829// fexp2_d_1_pseudo $wd, $wt
3830// =>
3831// ldi.d $ws, 1
3832// fexp2.d $wd, $ws, $wt
3834MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
3835 MachineBasicBlock *BB) const {
3838 const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
3839 Register Ws1 = RegInfo.createVirtualRegister(RC);
3840 Register Ws2 = RegInfo.createVirtualRegister(RC);
3841 DebugLoc DL = MI.getDebugLoc();
3842
3843 // Splat 1.0 into a vector
3844 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
3845 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
3846
3847 // Emit 1.0 * fexp2(Wt)
3848 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg())
3849 .addReg(Ws2)
3850 .addReg(MI.getOperand(1).getReg());
3851
3852 MI.eraseFromParent(); // The pseudo instruction is gone now.
3853 return BB;
3854}
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
This file implements a class to represent arbitrary precision integral constant values and operations...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:469
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc, SDValue Imm, bool BigEndian)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static cl::opt< bool > NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), cl::desc("Expand double precision loads and " "stores to their single precision " "counterparts"))
static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, bool BigEndian, SelectionDAG &DAG)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian)
static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG)
static bool isBitwiseInverse(SDValue N, SDValue OfNode)
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, const SmallVector< int, 16 > &Indices, SelectionDAG &DAG)
static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static bool isVectorAllOnes(SDValue N)
static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC)
static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG)
static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG)
static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG)
static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static cl::opt< bool > UseMipsTailCalls("mips-tail-calls", cl::Hidden, cl::desc("MIPS: permit tail calls."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1485
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1433
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
unsigned logBase2() const
Definition: APInt.h:1696
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getInRegsParamsCount() const
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const Triple & getTargetTriple() const
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:68
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
Flags getFlags() const
Return the raw flags of the source value,.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MipsFunctionInfo - This class is derived from MachineFunction private Mips target-specific informatio...
unsigned getIncomingArgSize() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given floating-point type and Register class.
void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given integer type and Register class.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
const TargetRegisterClass * getRepRegClassFor(MVT VT) const override
Return the 'representative' register class for the specified value type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
MipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
bool hasMips32r6() const
bool isFP64bit() const
bool isLittle() const
bool useSoftFloat() const