doxygen/ARMISelLowering_8cpp_source.html

//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file defines the interfaces that ARM uses to lower LLVM code into a

// selection DAG.

//

//===----------------------------------------------------------------------===//


#include "ARMISelLowering.h"

#include "ARMBaseInstrInfo.h"

#include "ARMBaseRegisterInfo.h"

#include "ARMCallingConv.h"

#include "ARMConstantPoolValue.h"

#include "ARMMachineFunctionInfo.h"

#include "ARMPerfectShuffle.h"

#include "ARMRegisterInfo.h"

#include "ARMSelectionDAGInfo.h"

#include "ARMSubtarget.h"

#include "ARMTargetTransformInfo.h"

#include "MCTargetDesc/ARMAddressingModes.h"

#include "MCTargetDesc/ARMBaseInfo.h"

#include "Utils/ARMBaseInfo.h"

#include "llvm/ADT/APFloat.h"

#include "llvm/ADT/APInt.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/BitVector.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/ADT/StringExtras.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/ADT/StringSwitch.h"

#include "llvm/ADT/Twine.h"

#include "llvm/Analysis/VectorUtils.h"

#include "llvm/CodeGen/CallingConvLower.h"

#include "llvm/CodeGen/ComplexDeinterleavingPass.h"

#include "llvm/CodeGen/ISDOpcodes.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineConstantPool.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineJumpTableInfo.h"

#include "llvm/CodeGen/MachineMemOperand.h"

#include "llvm/CodeGen/MachineOperand.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/RuntimeLibcallUtil.h"

#include "llvm/CodeGen/SelectionDAG.h"

#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"

#include "llvm/CodeGen/SelectionDAGNodes.h"

#include "llvm/CodeGen/TargetInstrInfo.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/CodeGen/TargetRegisterInfo.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/CodeGen/ValueTypes.h"

#include "llvm/CodeGenTypes/MachineValueType.h"

#include "llvm/IR/Attributes.h"

#include "llvm/IR/CallingConv.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DebugLoc.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GlobalAlias.h"

#include "llvm/IR/GlobalValue.h"

#include "llvm/IR/GlobalVariable.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InlineAsm.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/IntrinsicsARM.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/User.h"

#include "llvm/IR/Value.h"

#include "llvm/MC/MCInstrDesc.h"

#include "llvm/MC/MCInstrItineraries.h"

#include "llvm/MC/MCSchedule.h"

#include "llvm/Support/AtomicOrdering.h"

#include "llvm/Support/BranchProbability.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CodeGen.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/KnownBits.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/Target/TargetOptions.h"

#include "llvm/TargetParser/Triple.h"

#include <algorithm>

#include <cassert>

#include <cstdint>

#include <cstdlib>

#include <iterator>

#include <limits>

#include <optional>

#include <tuple>

#include <utility>

#include <vector>


using namespace llvm;


#define DEBUG_TYPE "arm-isel"


STATISTIC(NumTailCalls, "Number of tail calls");

STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");

STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");

STATISTIC(NumConstpoolPromoted,

  "Number of constants with their storage promoted into constant pools");


static cl::opt<bool>

ARMInterworking("arm-interworking", cl::Hidden,

  cl::desc("Enable / disable ARM interworking (for debugging only)"),

  cl::init(true));


static cl::opt<bool> EnableConstpoolPromotion(

    "arm-promote-constant", cl::Hidden,

    cl::desc("Enable / disable promotion of unnamed_addr constants into "

             "constant pools"),

    cl::init(false)); // FIXME: set to true by default once PR32780 is fixed

static cl::opt<unsigned> ConstpoolPromotionMaxSize(

    "arm-promote-constant-max-size", cl::Hidden,

    cl::desc("Maximum size of constant to promote into a constant pool"),

    cl::init(64));

static cl::opt<unsigned> ConstpoolPromotionMaxTotal(

    "arm-promote-constant-max-total", cl::Hidden,

    cl::desc("Maximum size of ALL constants to promote into a constant pool"),

    cl::init(128));


cl::opt<unsigned>

MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,

  cl::desc("Maximum interleave factor for MVE VLDn to generate."),

  cl::init(2));


cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(

    "arm-max-base-updates-to-check", cl::Hidden,

    cl::desc("Maximum number of base-updates to check generating postindex."),

    cl::init(64));


/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).

constexpr MVT FlagsVT = MVT::i32;


// The APCS parameter registers.


static const MCPhysReg GPRArgRegs[] = {

  ARM::R0, ARM::R1, ARM::R2, ARM::R3

};


static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg,

                               SelectionDAG &DAG, const SDLoc &DL) {

  assert(Arg.ArgVT.isScalarInteger());

  assert(Arg.ArgVT.bitsLT(MVT::i32));

  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, Arg.ArgVT, Value);

  SDValue Ext =

      DAG.getNode(Arg.Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,

                  MVT::i32, Trunc);

  return Ext;

}


void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {

  if (VT != PromotedLdStVT) {

    setOperationAction(ISD::LOAD, VT, Promote);

    AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);


    setOperationAction(ISD::STORE, VT, Promote);

    AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);

  }


  MVT ElemTy = VT.getVectorElementType();

  if (ElemTy != MVT::f64)

    setOperationAction(ISD::SETCC, VT, Custom);

  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);

  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);

  if (ElemTy == MVT::i32) {

    setOperationAction(ISD::SINT_TO_FP, VT, Custom);

    setOperationAction(ISD::UINT_TO_FP, VT, Custom);

    setOperationAction(ISD::FP_TO_SINT, VT, Custom);

    setOperationAction(ISD::FP_TO_UINT, VT, Custom);

  } else {

    setOperationAction(ISD::SINT_TO_FP, VT, Expand);

    setOperationAction(ISD::UINT_TO_FP, VT, Expand);

    setOperationAction(ISD::FP_TO_SINT, VT, Expand);

    setOperationAction(ISD::FP_TO_UINT, VT, Expand);

  }

  setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);

  setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);

  setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);

  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);

  setOperationAction(ISD::SELECT,            VT, Expand);

  setOperationAction(ISD::SELECT_CC,         VT, Expand);

  setOperationAction(ISD::VSELECT,           VT, Expand);

  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

  if (VT.isInteger()) {

    setOperationAction(ISD::SHL, VT, Custom);

    setOperationAction(ISD::SRA, VT, Custom);

    setOperationAction(ISD::SRL, VT, Custom);

  }


  // Neon does not support vector divide/remainder operations.

  setOperationAction(ISD::SDIV, VT, Expand);

  setOperationAction(ISD::UDIV, VT, Expand);

  setOperationAction(ISD::FDIV, VT, Expand);

  setOperationAction(ISD::SREM, VT, Expand);

  setOperationAction(ISD::UREM, VT, Expand);

  setOperationAction(ISD::FREM, VT, Expand);

  setOperationAction(ISD::SDIVREM, VT, Expand);

  setOperationAction(ISD::UDIVREM, VT, Expand);


  if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)

    for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,

                        ISD::UMIN, ISD::UMAX})

      setOperationAction(Opcode, VT, Legal);

  if (!VT.isFloatingPoint())

    for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})

      setOperationAction(Opcode, VT, Legal);

}


void ARMTargetLowering::addDRTypeForNEON(MVT VT) {

  addRegisterClass(VT, &ARM::DPRRegClass);

  addTypeForNEON(VT, MVT::f64);

}


void ARMTargetLowering::addQRTypeForNEON(MVT VT) {

  addRegisterClass(VT, &ARM::DPairRegClass);

  addTypeForNEON(VT, MVT::v2f64);

}


void ARMTargetLowering::setAllExpand(MVT VT) {

  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)

    setOperationAction(Opc, VT, Expand);


  // We support these really simple operations even on types where all

  // the actual arithmetic has to be broken down into simpler

  // operations or turned into library calls.

  setOperationAction(ISD::BITCAST, VT, Legal);

  setOperationAction(ISD::LOAD, VT, Legal);

  setOperationAction(ISD::STORE, VT, Legal);

  setOperationAction(ISD::UNDEF, VT, Legal);

}


void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,

                                       LegalizeAction Action) {

  setLoadExtAction(ISD::EXTLOAD,  From, To, Action);

  setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);

  setLoadExtAction(ISD::SEXTLOAD, From, To, Action);

}


void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {

  const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };


  for (auto VT : IntTypes) {

    addRegisterClass(VT, &ARM::MQPRRegClass);

    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);

    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);

    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);

    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

    setOperationAction(ISD::SHL, VT, Custom);

    setOperationAction(ISD::SRA, VT, Custom);

    setOperationAction(ISD::SRL, VT, Custom);

    setOperationAction(ISD::SMIN, VT, Legal);

    setOperationAction(ISD::SMAX, VT, Legal);

    setOperationAction(ISD::UMIN, VT, Legal);

    setOperationAction(ISD::UMAX, VT, Legal);

    setOperationAction(ISD::ABS, VT, Legal);

    setOperationAction(ISD::SETCC, VT, Custom);

    setOperationAction(ISD::MLOAD, VT, Custom);

    setOperationAction(ISD::MSTORE, VT, Legal);

    setOperationAction(ISD::CTLZ, VT, Legal);

    setOperationAction(ISD::CTTZ, VT, Custom);

    setOperationAction(ISD::BITREVERSE, VT, Legal);

    setOperationAction(ISD::BSWAP, VT, Legal);

    setOperationAction(ISD::SADDSAT, VT, Legal);

    setOperationAction(ISD::UADDSAT, VT, Legal);

    setOperationAction(ISD::SSUBSAT, VT, Legal);

    setOperationAction(ISD::USUBSAT, VT, Legal);

    setOperationAction(ISD::ABDS, VT, Legal);

    setOperationAction(ISD::ABDU, VT, Legal);

    setOperationAction(ISD::AVGFLOORS, VT, Legal);

    setOperationAction(ISD::AVGFLOORU, VT, Legal);

    setOperationAction(ISD::AVGCEILS, VT, Legal);

    setOperationAction(ISD::AVGCEILU, VT, Legal);


    // No native support for these.

    setOperationAction(ISD::UDIV, VT, Expand);

    setOperationAction(ISD::SDIV, VT, Expand);

    setOperationAction(ISD::UREM, VT, Expand);

    setOperationAction(ISD::SREM, VT, Expand);

    setOperationAction(ISD::UDIVREM, VT, Expand);

    setOperationAction(ISD::SDIVREM, VT, Expand);

    setOperationAction(ISD::CTPOP, VT, Expand);

    setOperationAction(ISD::SELECT, VT, Expand);

    setOperationAction(ISD::SELECT_CC, VT, Expand);


    // Vector reductions

    setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);

    setOperationAction(ISD::VECREDUCE_SMAX, VT, Legal);

    setOperationAction(ISD::VECREDUCE_UMAX, VT, Legal);

    setOperationAction(ISD::VECREDUCE_SMIN, VT, Legal);

    setOperationAction(ISD::VECREDUCE_UMIN, VT, Legal);

    setOperationAction(ISD::VECREDUCE_MUL, VT, Custom);

    setOperationAction(ISD::VECREDUCE_AND, VT, Custom);

    setOperationAction(ISD::VECREDUCE_OR, VT, Custom);

    setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);


    if (!HasMVEFP) {

      setOperationAction(ISD::SINT_TO_FP, VT, Expand);

      setOperationAction(ISD::UINT_TO_FP, VT, Expand);

      setOperationAction(ISD::FP_TO_SINT, VT, Expand);

      setOperationAction(ISD::FP_TO_UINT, VT, Expand);

    } else {

      setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);

      setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);

    }


    // Pre and Post inc are supported on loads and stores

    for (unsigned im = (unsigned)ISD::PRE_INC;

         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {

      setIndexedLoadAction(im, VT, Legal);

      setIndexedStoreAction(im, VT, Legal);

      setIndexedMaskedLoadAction(im, VT, Legal);

      setIndexedMaskedStoreAction(im, VT, Legal);

    }

  }


  const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };

  for (auto VT : FloatTypes) {

    addRegisterClass(VT, &ARM::MQPRRegClass);

    if (!HasMVEFP)

      setAllExpand(VT);


    // These are legal or custom whether we have MVE.fp or not

    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);

    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);

    setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);

    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);

    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

    setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);

    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);

    setOperationAction(ISD::SETCC, VT, Custom);

    setOperationAction(ISD::MLOAD, VT, Custom);

    setOperationAction(ISD::MSTORE, VT, Legal);

    setOperationAction(ISD::SELECT, VT, Expand);

    setOperationAction(ISD::SELECT_CC, VT, Expand);


    // Pre and Post inc are supported on loads and stores

    for (unsigned im = (unsigned)ISD::PRE_INC;

         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {

      setIndexedLoadAction(im, VT, Legal);

      setIndexedStoreAction(im, VT, Legal);

      setIndexedMaskedLoadAction(im, VT, Legal);

      setIndexedMaskedStoreAction(im, VT, Legal);

    }


    if (HasMVEFP) {

      setOperationAction(ISD::FMINNUM, VT, Legal);

      setOperationAction(ISD::FMAXNUM, VT, Legal);

      setOperationAction(ISD::FROUND, VT, Legal);

      setOperationAction(ISD::FROUNDEVEN, VT, Legal);

      setOperationAction(ISD::FRINT, VT, Legal);

      setOperationAction(ISD::FTRUNC, VT, Legal);

      setOperationAction(ISD::FFLOOR, VT, Legal);

      setOperationAction(ISD::FCEIL, VT, Legal);

      setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);

      setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);

      setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);

      setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);


      // No native support for these.

      setOperationAction(ISD::FDIV, VT, Expand);

      setOperationAction(ISD::FREM, VT, Expand);

      setOperationAction(ISD::FSQRT, VT, Expand);

      setOperationAction(ISD::FSIN, VT, Expand);

      setOperationAction(ISD::FCOS, VT, Expand);

      setOperationAction(ISD::FTAN, VT, Expand);

      setOperationAction(ISD::FPOW, VT, Expand);

      setOperationAction(ISD::FLOG, VT, Expand);

      setOperationAction(ISD::FLOG2, VT, Expand);

      setOperationAction(ISD::FLOG10, VT, Expand);

      setOperationAction(ISD::FEXP, VT, Expand);

      setOperationAction(ISD::FEXP2, VT, Expand);

      setOperationAction(ISD::FEXP10, VT, Expand);

      setOperationAction(ISD::FNEARBYINT, VT, Expand);

    }

  }


  // Custom Expand smaller than legal vector reductions to prevent false zero

  // items being added.

  setOperationAction(ISD::VECREDUCE_FADD, MVT::v4f16, Custom);

  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v4f16, Custom);

  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v4f16, Custom);

  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v4f16, Custom);

  setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);

  setOperationAction(ISD::VECREDUCE_FMUL, MVT::v2f16, Custom);

  setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);

  setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);


  // We 'support' these types up to bitcast/load/store level, regardless of

  // MVE integer-only / float support. Only doing FP data processing on the FP

  // vector types is inhibited at integer-only level.

  const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };

  for (auto VT : LongTypes) {

    addRegisterClass(VT, &ARM::MQPRRegClass);

    setAllExpand(VT);

    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);

    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);

    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

    setOperationAction(ISD::VSELECT, VT, Legal);

    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);

  }

  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);


  // We can do bitwise operations on v2i64 vectors

  setOperationAction(ISD::AND, MVT::v2i64, Legal);

  setOperationAction(ISD::OR, MVT::v2i64, Legal);

  setOperationAction(ISD::XOR, MVT::v2i64, Legal);


  // It is legal to extload from v4i8 to v4i16 or v4i32.

  addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);

  addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);

  addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);


  // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.

  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);

  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);

  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);

  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8,  Legal);

  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);


  // Some truncating stores are legal too.

  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);

  setTruncStoreAction(MVT::v4i32, MVT::v4i8,  Legal);

  setTruncStoreAction(MVT::v8i16, MVT::v8i8,  Legal);


  // Pre and Post inc on these are legal, given the correct extends

  for (unsigned im = (unsigned)ISD::PRE_INC;

       im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {

    for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {

      setIndexedLoadAction(im, VT, Legal);

      setIndexedStoreAction(im, VT, Legal);

      setIndexedMaskedLoadAction(im, VT, Legal);

      setIndexedMaskedStoreAction(im, VT, Legal);

    }

  }


  // Predicate types

  const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};

  for (auto VT : pTypes) {

    addRegisterClass(VT, &ARM::VCCRRegClass);

    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);

    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);

    setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);

    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);

    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);

    setOperationAction(ISD::SETCC, VT, Custom);

    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);

    setOperationAction(ISD::LOAD, VT, Custom);

    setOperationAction(ISD::STORE, VT, Custom);

    setOperationAction(ISD::TRUNCATE, VT, Custom);

    setOperationAction(ISD::VSELECT, VT, Expand);

    setOperationAction(ISD::SELECT, VT, Expand);

    setOperationAction(ISD::SELECT_CC, VT, Expand);


    if (!HasMVEFP) {

      setOperationAction(ISD::SINT_TO_FP, VT, Expand);

      setOperationAction(ISD::UINT_TO_FP, VT, Expand);

      setOperationAction(ISD::FP_TO_SINT, VT, Expand);

      setOperationAction(ISD::FP_TO_UINT, VT, Expand);

    }

  }

  setOperationAction(ISD::SETCC, MVT::v2i1, Expand);

  setOperationAction(ISD::TRUNCATE, MVT::v2i1, Expand);

  setOperationAction(ISD::AND, MVT::v2i1, Expand);

  setOperationAction(ISD::OR, MVT::v2i1, Expand);

  setOperationAction(ISD::XOR, MVT::v2i1, Expand);

  setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Expand);

  setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Expand);

  setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Expand);

  setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Expand);


  setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);

  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);

  setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);

  setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);

  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);

  setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);

  setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);

  setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);

}


const ARMBaseTargetMachine &ARMTargetLowering::getTM() const {

  return static_cast<const ARMBaseTargetMachine &>(getTargetMachine());

}


ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,

                                     const ARMSubtarget &STI)

    : TargetLowering(TM_, STI), Subtarget(&STI),

      RegInfo(Subtarget->getRegisterInfo()),

      Itins(Subtarget->getInstrItineraryData()) {

  const auto &TM = static_cast<const ARMBaseTargetMachine &>(TM_);


  setBooleanContents(ZeroOrOneBooleanContent);

  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);


  const Triple &TT = TM.getTargetTriple();


  if (TT.isOSBinFormatMachO()) {

    // Uses VFP for Thumb libfuncs if available.

    if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&

        Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {

      // clang-format off

      static const struct {

        const RTLIB::Libcall Op;

        const RTLIB::LibcallImpl Impl;

      } LibraryCalls[] = {

        // Single-precision floating-point arithmetic.

        { RTLIB::ADD_F32, RTLIB::impl___addsf3vfp },

        { RTLIB::SUB_F32, RTLIB::impl___subsf3vfp },

        { RTLIB::MUL_F32, RTLIB::impl___mulsf3vfp },

        { RTLIB::DIV_F32, RTLIB::impl___divsf3vfp },


        // Double-precision floating-point arithmetic.

        { RTLIB::ADD_F64, RTLIB::impl___adddf3vfp },

        { RTLIB::SUB_F64, RTLIB::impl___subdf3vfp },

        { RTLIB::MUL_F64, RTLIB::impl___muldf3vfp },

        { RTLIB::DIV_F64, RTLIB::impl___divdf3vfp },


        // Single-precision comparisons.

        { RTLIB::OEQ_F32, RTLIB::impl___eqsf2vfp },

        { RTLIB::UNE_F32, RTLIB::impl___nesf2vfp },

        { RTLIB::OLT_F32, RTLIB::impl___ltsf2vfp },

        { RTLIB::OLE_F32, RTLIB::impl___lesf2vfp },

        { RTLIB::OGE_F32, RTLIB::impl___gesf2vfp },

        { RTLIB::OGT_F32, RTLIB::impl___gtsf2vfp },

        { RTLIB::UO_F32,  RTLIB::impl___unordsf2vfp },


        // Double-precision comparisons.

        { RTLIB::OEQ_F64, RTLIB::impl___eqdf2vfp },

        { RTLIB::UNE_F64, RTLIB::impl___nedf2vfp },

        { RTLIB::OLT_F64, RTLIB::impl___ltdf2vfp },

        { RTLIB::OLE_F64, RTLIB::impl___ledf2vfp },

        { RTLIB::OGE_F64, RTLIB::impl___gedf2vfp },

        { RTLIB::OGT_F64, RTLIB::impl___gtdf2vfp },

        { RTLIB::UO_F64,  RTLIB::impl___unorddf2vfp },


        // Floating-point to integer conversions.

        // i64 conversions are done via library routines even when generating VFP

        // instructions, so use the same ones.

        { RTLIB::FPTOSINT_F64_I32, RTLIB::impl___fixdfsivfp },

        { RTLIB::FPTOUINT_F64_I32, RTLIB::impl___fixunsdfsivfp },

        { RTLIB::FPTOSINT_F32_I32, RTLIB::impl___fixsfsivfp },

        { RTLIB::FPTOUINT_F32_I32, RTLIB::impl___fixunssfsivfp },


        // Conversions between floating types.

        { RTLIB::FPROUND_F64_F32, RTLIB::impl___truncdfsf2vfp },

        { RTLIB::FPEXT_F32_F64,   RTLIB::impl___extendsfdf2vfp },


        // Integer to floating-point conversions.

        // i64 conversions are done via library routines even when generating VFP

        // instructions, so use the same ones.

        // FIXME: There appears to be some naming inconsistency in ARM libgcc:

        // e.g., __floatunsidf vs. __floatunssidfvfp.

        { RTLIB::SINTTOFP_I32_F64, RTLIB::impl___floatsidfvfp },

        { RTLIB::UINTTOFP_I32_F64, RTLIB::impl___floatunssidfvfp },

        { RTLIB::SINTTOFP_I32_F32, RTLIB::impl___floatsisfvfp },

        { RTLIB::UINTTOFP_I32_F32, RTLIB::impl___floatunssisfvfp },

      };

      // clang-format on


      for (const auto &LC : LibraryCalls)

        setLibcallImpl(LC.Op, LC.Impl);

    }

  }


  if (Subtarget->isThumb1Only())

    addRegisterClass(MVT::i32, &ARM::tGPRRegClass);

  else

    addRegisterClass(MVT::i32, &ARM::GPRRegClass);


  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&

      Subtarget->hasFPRegs()) {

    addRegisterClass(MVT::f32, &ARM::SPRRegClass);

    addRegisterClass(MVT::f64, &ARM::DPRRegClass);


    setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);

    setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);

    setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);

    setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);


    if (!Subtarget->hasVFP2Base()) {

      setAllExpand(MVT::f32);

    } else {

      for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,

                      ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})

        setOperationAction(Op, MVT::f32, Legal);

    }

    if (!Subtarget->hasFP64()) {

      setAllExpand(MVT::f64);

    } else {

      for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,

                      ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})

        setOperationAction(Op, MVT::f64, Legal);

    }

  }


  if (Subtarget->hasFullFP16()) {

    addRegisterClass(MVT::f16, &ARM::HPRRegClass);

    setOperationAction(ISD::BITCAST, MVT::i16, Custom);

    setOperationAction(ISD::BITCAST, MVT::f16, Custom);


    setOperationAction(ISD::FMINNUM, MVT::f16, Legal);

    setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);

  }


  if (Subtarget->hasBF16()) {

    addRegisterClass(MVT::bf16, &ARM::HPRRegClass);

    setAllExpand(MVT::bf16);

    if (!Subtarget->hasFullFP16())

      setOperationAction(ISD::BITCAST, MVT::bf16, Custom);

  } else {

    setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);

    setOperationAction(ISD::BF16_TO_FP, MVT::f64, Expand);

    setOperationAction(ISD::FP_TO_BF16, MVT::f32, Custom);

    setOperationAction(ISD::FP_TO_BF16, MVT::f64, Custom);

  }


  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {

    for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {

      setTruncStoreAction(VT, InnerVT, Expand);

      addAllExtLoads(VT, InnerVT, Expand);

    }


    setOperationAction(ISD::SMUL_LOHI, VT, Expand);

    setOperationAction(ISD::UMUL_LOHI, VT, Expand);


    setOperationAction(ISD::BSWAP, VT, Expand);

  }


  if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps())

    setOperationAction(ISD::SCMP, MVT::i32, Custom);


  if (!Subtarget->hasV8_1MMainlineOps())

    setOperationAction(ISD::UCMP, MVT::i32, Custom);


  if (!Subtarget->isThumb1Only())

    setOperationAction(ISD::ABS, MVT::i32, Custom);


  setOperationAction(ISD::ConstantFP, MVT::f32, Custom);

  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);


  setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);

  setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);


  if (Subtarget->hasMVEIntegerOps())

    addMVEVectorTypes(Subtarget->hasMVEFloatOps());


  // Combine low-overhead loop intrinsics so that we can lower i1 types.

  if (Subtarget->hasLOB()) {

    setTargetDAGCombine({ISD::BRCOND, ISD::BR_CC});

  }


  if (Subtarget->hasNEON()) {

    addDRTypeForNEON(MVT::v2f32);

    addDRTypeForNEON(MVT::v8i8);

    addDRTypeForNEON(MVT::v4i16);

    addDRTypeForNEON(MVT::v2i32);

    addDRTypeForNEON(MVT::v1i64);


    addQRTypeForNEON(MVT::v4f32);

    addQRTypeForNEON(MVT::v2f64);

    addQRTypeForNEON(MVT::v16i8);

    addQRTypeForNEON(MVT::v8i16);

    addQRTypeForNEON(MVT::v4i32);

    addQRTypeForNEON(MVT::v2i64);


    if (Subtarget->hasFullFP16()) {

      addQRTypeForNEON(MVT::v8f16);

      addDRTypeForNEON(MVT::v4f16);

    }


    if (Subtarget->hasBF16()) {

      addQRTypeForNEON(MVT::v8bf16);

      addDRTypeForNEON(MVT::v4bf16);

    }

  }


  if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {

    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but

    // none of Neon, MVE or VFP supports any arithmetic operations on it.

    setOperationAction(ISD::FADD, MVT::v2f64, Expand);

    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);

    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);

    // FIXME: Code duplication: FDIV and FREM are expanded always, see

    // ARMTargetLowering::addTypeForNEON method for details.

    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);

    setOperationAction(ISD::FREM, MVT::v2f64, Expand);

    // FIXME: Create unittest.

    // In another words, find a way when "copysign" appears in DAG with vector

    // operands.

    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);

    // FIXME: Code duplication: SETCC has custom operation action, see

    // ARMTargetLowering::addTypeForNEON method for details.

    setOperationAction(ISD::SETCC, MVT::v2f64, Expand);

    // FIXME: Create unittest for FNEG and for FABS.

    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);

    setOperationAction(ISD::FABS, MVT::v2f64, Expand);

    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);

    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);

    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);

    setOperationAction(ISD::FTAN, MVT::v2f64, Expand);

    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);

    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);

    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);

    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);

    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);

    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);

    setOperationAction(ISD::FEXP10, MVT::v2f64, Expand);

    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);

    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);

    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);

    setOperationAction(ISD::FROUNDEVEN, MVT::v2f64, Expand);

    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);

    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);

    setOperationAction(ISD::FMA, MVT::v2f64, Expand);

  }


  if (Subtarget->hasNEON()) {

    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively

    // supported for v4f32.

    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);

    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);

    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);

    setOperationAction(ISD::FTAN, MVT::v4f32, Expand);

    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);

    setOperationAction(ISD::FLOG, MVT::v4f32, Expand);

    setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);

    setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);

    setOperationAction(ISD::FEXP, MVT::v4f32, Expand);

    setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);

    setOperationAction(ISD::FEXP10, MVT::v4f32, Expand);

    setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);

    setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);

    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);

    setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Expand);

    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);

    setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);


    // Mark v2f32 intrinsics.

    setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);

    setOperationAction(ISD::FSIN, MVT::v2f32, Expand);

    setOperationAction(ISD::FCOS, MVT::v2f32, Expand);

    setOperationAction(ISD::FTAN, MVT::v2f32, Expand);

    setOperationAction(ISD::FPOW, MVT::v2f32, Expand);

    setOperationAction(ISD::FLOG, MVT::v2f32, Expand);

    setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);

    setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);

    setOperationAction(ISD::FEXP, MVT::v2f32, Expand);

    setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);

    setOperationAction(ISD::FEXP10, MVT::v2f32, Expand);

    setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);

    setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);

    setOperationAction(ISD::FRINT, MVT::v2f32, Expand);

    setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Expand);

    setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);

    setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);


    for (ISD::NodeType Op : {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,

                              ISD::FRINT, ISD::FTRUNC, ISD::FROUNDEVEN}) {

      setOperationAction(Op, MVT::v4f16, Expand);

      setOperationAction(Op, MVT::v8f16, Expand);

    }


    // Neon does not support some operations on v1i64 and v2i64 types.

    setOperationAction(ISD::MUL, MVT::v1i64, Expand);

    // Custom handling for some quad-vector types to detect VMULL.

    setOperationAction(ISD::MUL, MVT::v8i16, Custom);

    setOperationAction(ISD::MUL, MVT::v4i32, Custom);

    setOperationAction(ISD::MUL, MVT::v2i64, Custom);

    // Custom handling for some vector types to avoid expensive expansions

    setOperationAction(ISD::SDIV, MVT::v4i16, Custom);

    setOperationAction(ISD::SDIV, MVT::v8i8, Custom);

    setOperationAction(ISD::UDIV, MVT::v4i16, Custom);

    setOperationAction(ISD::UDIV, MVT::v8i8, Custom);

    // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with

    // a destination type that is wider than the source, and nor does

    // it have a FP_TO_[SU]INT instruction with a narrower destination than

    // source.

    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);

    setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);

    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);

    setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);

    setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);

    setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);

    setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);

    setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);


    setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);

    setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);


    // NEON does not have single instruction CTPOP for vectors with element

    // types wider than 8-bits.  However, custom lowering can leverage the

    // v8i8/v16i8 vcnt instruction.

    setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);

    setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);

    setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);

    setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);

    setOperationAction(ISD::CTPOP,      MVT::v1i64, Custom);

    setOperationAction(ISD::CTPOP,      MVT::v2i64, Custom);


    setOperationAction(ISD::CTLZ,       MVT::v1i64, Expand);

    setOperationAction(ISD::CTLZ,       MVT::v2i64, Expand);


    // NEON does not have single instruction CTTZ for vectors.

    setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);

    setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);

    setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);

    setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);


    setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);

    setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);

    setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);

    setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);


    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);

    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);

    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);

    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);


    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);

    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);

    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);

    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);


    for (MVT VT : MVT::fixedlen_vector_valuetypes()) {

      setOperationAction(ISD::MULHS, VT, Expand);

      setOperationAction(ISD::MULHU, VT, Expand);

    }


    // NEON only has FMA instructions as of VFP4.

    if (!Subtarget->hasVFP4Base()) {

      setOperationAction(ISD::FMA, MVT::v2f32, Expand);

      setOperationAction(ISD::FMA, MVT::v4f32, Expand);

    }


    setTargetDAGCombine({ISD::SHL, ISD::SRL, ISD::SRA, ISD::FP_TO_SINT,

                         ISD::FP_TO_UINT, ISD::FMUL, ISD::LOAD});


    // It is legal to extload from v4i8 to v4i16 or v4i32.

    for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,

                   MVT::v2i32}) {

      for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {

        setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);

        setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);

        setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);

      }

    }


    for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,

                    MVT::v4i32}) {

      setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);

      setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);

      setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);

      setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);

    }

  }


  if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {

    setTargetDAGCombine(

        {ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE, ISD::INSERT_SUBVECTOR,

         ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,

         ISD::SIGN_EXTEND_INREG, ISD::STORE, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,

         ISD::ANY_EXTEND, ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,

         ISD::INTRINSIC_VOID, ISD::VECREDUCE_ADD, ISD::ADD, ISD::BITCAST});

  }

  if (Subtarget->hasMVEIntegerOps()) {

    setTargetDAGCombine({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX,

                         ISD::FP_EXTEND, ISD::SELECT, ISD::SELECT_CC,

                         ISD::SETCC});

  }

  if (Subtarget->hasMVEFloatOps()) {

    setTargetDAGCombine(ISD::FADD);

  }


  if (!Subtarget->hasFP64()) {

    // When targeting a floating-point unit with only single-precision

    // operations, f64 is legal for the few double-precision instructions which

    // are present However, no double-precision operations other than moves,

    // loads and stores are provided by the hardware.

    setOperationAction(ISD::FADD,       MVT::f64, Expand);

    setOperationAction(ISD::FSUB,       MVT::f64, Expand);

    setOperationAction(ISD::FMUL,       MVT::f64, Expand);

    setOperationAction(ISD::FMA,        MVT::f64, Expand);

    setOperationAction(ISD::FDIV,       MVT::f64, Expand);

    setOperationAction(ISD::FREM,       MVT::f64, Expand);

    setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);

    setOperationAction(ISD::FGETSIGN,   MVT::f64, Expand);

    setOperationAction(ISD::FNEG,       MVT::f64, Expand);

    setOperationAction(ISD::FABS,       MVT::f64, Expand);

    setOperationAction(ISD::FSQRT,      MVT::f64, Expand);

    setOperationAction(ISD::FSIN,       MVT::f64, Expand);

    setOperationAction(ISD::FCOS,       MVT::f64, Expand);

    setOperationAction(ISD::FPOW,       MVT::f64, Expand);

    setOperationAction(ISD::FLOG,       MVT::f64, Expand);

    setOperationAction(ISD::FLOG2,      MVT::f64, Expand);

    setOperationAction(ISD::FLOG10,     MVT::f64, Expand);

    setOperationAction(ISD::FEXP,       MVT::f64, Expand);

    setOperationAction(ISD::FEXP2,      MVT::f64, Expand);

    setOperationAction(ISD::FEXP10,      MVT::f64, Expand);

    setOperationAction(ISD::FCEIL,      MVT::f64, Expand);

    setOperationAction(ISD::FTRUNC,     MVT::f64, Expand);

    setOperationAction(ISD::FRINT,      MVT::f64, Expand);

    setOperationAction(ISD::FROUNDEVEN, MVT::f64, Expand);

    setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);

    setOperationAction(ISD::FFLOOR,     MVT::f64, Expand);

    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);

    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);

    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);

    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);

    setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);

    setOperationAction(ISD::FP_ROUND,   MVT::f32, Custom);

    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);

    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);

    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);

    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);

    setOperationAction(ISD::STRICT_FP_ROUND,   MVT::f32, Custom);

  }


  if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {

    setOperationAction(ISD::FP_EXTEND,  MVT::f64, Custom);

    setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);

    if (Subtarget->hasFullFP16()) {

      setOperationAction(ISD::FP_ROUND,  MVT::f16, Custom);

      setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);

    }

  }


  if (!Subtarget->hasFP16()) {

    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Custom);

    setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);

  }


  computeRegisterProperties(Subtarget->getRegisterInfo());


  // ARM does not have floating-point extending loads.

  for (MVT VT : MVT::fp_valuetypes()) {

    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);

    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);

    setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);

  }


  // ... or truncating stores

  setTruncStoreAction(MVT::f64, MVT::f32, Expand);

  setTruncStoreAction(MVT::f32, MVT::f16, Expand);

  setTruncStoreAction(MVT::f64, MVT::f16, Expand);

  setTruncStoreAction(MVT::f32, MVT::bf16, Expand);

  setTruncStoreAction(MVT::f64, MVT::bf16, Expand);


  // ARM does not have i1 sign extending load.

  for (MVT VT : MVT::integer_valuetypes())

    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);


  // ARM supports all 4 flavors of integer indexed load / store.

  if (!Subtarget->isThumb1Only()) {

    for (unsigned im = (unsigned)ISD::PRE_INC;

         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {

      setIndexedLoadAction(im,  MVT::i1,  Legal);

      setIndexedLoadAction(im,  MVT::i8,  Legal);

      setIndexedLoadAction(im,  MVT::i16, Legal);

      setIndexedLoadAction(im,  MVT::i32, Legal);

      setIndexedStoreAction(im, MVT::i1,  Legal);

      setIndexedStoreAction(im, MVT::i8,  Legal);

      setIndexedStoreAction(im, MVT::i16, Legal);

      setIndexedStoreAction(im, MVT::i32, Legal);

    }

  } else {

    // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.

    setIndexedLoadAction(ISD::POST_INC, MVT::i32,  Legal);

    setIndexedStoreAction(ISD::POST_INC, MVT::i32,  Legal);

  }


  setOperationAction(ISD::SADDO, MVT::i32, Custom);

  setOperationAction(ISD::UADDO, MVT::i32, Custom);

  setOperationAction(ISD::SSUBO, MVT::i32, Custom);

  setOperationAction(ISD::USUBO, MVT::i32, Custom);


  setOperationAction(ISD::UADDO_CARRY, MVT::i32, Custom);

  setOperationAction(ISD::USUBO_CARRY, MVT::i32, Custom);

  if (Subtarget->hasDSP()) {

    setOperationAction(ISD::SADDSAT, MVT::i8, Custom);

    setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);

    setOperationAction(ISD::SADDSAT, MVT::i16, Custom);

    setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);

    setOperationAction(ISD::UADDSAT, MVT::i8, Custom);

    setOperationAction(ISD::USUBSAT, MVT::i8, Custom);

    setOperationAction(ISD::UADDSAT, MVT::i16, Custom);

    setOperationAction(ISD::USUBSAT, MVT::i16, Custom);

  }

  if (Subtarget->hasBaseDSP()) {

    setOperationAction(ISD::SADDSAT, MVT::i32, Legal);

    setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);

  }


  // i64 operation support.

  setOperationAction(ISD::MUL,     MVT::i64, Expand);

  setOperationAction(ISD::MULHU,   MVT::i32, Expand);

  if (Subtarget->isThumb1Only()) {

    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);

    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);

  }

  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()

      || (Subtarget->isThumb2() && !Subtarget->hasDSP()))

    setOperationAction(ISD::MULHS, MVT::i32, Expand);


  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);

  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);

  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);

  setOperationAction(ISD::SRL,       MVT::i64, Custom);

  setOperationAction(ISD::SRA,       MVT::i64, Custom);

  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);

  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);

  setOperationAction(ISD::LOAD, MVT::i64, Custom);

  setOperationAction(ISD::STORE, MVT::i64, Custom);


  // MVE lowers 64 bit shifts to lsll and lsrl

  // assuming that ISD::SRL and SRA of i64 are already marked custom

  if (Subtarget->hasMVEIntegerOps())

    setOperationAction(ISD::SHL, MVT::i64, Custom);


  // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.

  if (Subtarget->isThumb1Only()) {

    setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);

    setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);

    setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);

  }


  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())

    setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);


  // ARM does not have ROTL.

  setOperationAction(ISD::ROTL, MVT::i32, Expand);

  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {

    setOperationAction(ISD::ROTL, VT, Expand);

    setOperationAction(ISD::ROTR, VT, Expand);

  }

  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);

  // TODO: These two should be set to LibCall, but this currently breaks

  //   the Linux kernel build. See #101786.

  setOperationAction(ISD::CTPOP, MVT::i32, Expand);

  setOperationAction(ISD::CTPOP, MVT::i64, Expand);

  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {

    setOperationAction(ISD::CTLZ, MVT::i32, Expand);

    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);

  }


  // @llvm.readcyclecounter requires the Performance Monitors extension.

  // Default to the 0 expansion on unsupported platforms.

  // FIXME: Technically there are older ARM CPUs that have

  // implementation-specific ways of obtaining this information.

  if (Subtarget->hasPerfMon())

    setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);


  // Only ARMv6 has BSWAP.

  if (!Subtarget->hasV6Ops())

    setOperationAction(ISD::BSWAP, MVT::i32, Expand);


  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()

                                        : Subtarget->hasDivideInARMMode();

  if (!hasDivide) {

    // These are expanded into libcalls if the cpu doesn't have HW divider.

    setOperationAction(ISD::SDIV,  MVT::i32, LibCall);

    setOperationAction(ISD::UDIV,  MVT::i32, LibCall);

  }


  if (TT.isOSWindows() && !Subtarget->hasDivideInThumbMode()) {

    setOperationAction(ISD::SDIV, MVT::i32, Custom);

    setOperationAction(ISD::UDIV, MVT::i32, Custom);


    setOperationAction(ISD::SDIV, MVT::i64, Custom);

    setOperationAction(ISD::UDIV, MVT::i64, Custom);

  }


  setOperationAction(ISD::SREM,  MVT::i32, Expand);

  setOperationAction(ISD::UREM,  MVT::i32, Expand);


  // Register based DivRem for AEABI (RTABI 4.2)

  if (TT.isTargetAEABI() || TT.isAndroid() || TT.isTargetGNUAEABI() ||

      TT.isTargetMuslAEABI() || TT.isOSFuchsia() || TT.isOSWindows()) {

    setOperationAction(ISD::SREM, MVT::i64, Custom);

    setOperationAction(ISD::UREM, MVT::i64, Custom);

    HasStandaloneRem = false;


    setOperationAction(ISD::SDIVREM, MVT::i32, Custom);

    setOperationAction(ISD::UDIVREM, MVT::i32, Custom);

    setOperationAction(ISD::SDIVREM, MVT::i64, Custom);

    setOperationAction(ISD::UDIVREM, MVT::i64, Custom);

  } else {

    setOperationAction(ISD::SDIVREM, MVT::i32, Expand);

    setOperationAction(ISD::UDIVREM, MVT::i32, Expand);

  }


  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);

  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);

  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);

  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);


  setOperationAction(ISD::TRAP, MVT::Other, Legal);

  setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);


  // Use the default implementation.

  setOperationAction(ISD::VASTART,            MVT::Other, Custom);

  setOperationAction(ISD::VAARG,              MVT::Other, Expand);

  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);

  setOperationAction(ISD::VAEND,              MVT::Other, Expand);

  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);

  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);


  if (TT.isOSWindows())

    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);

  else

    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);


  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use

  // the default expansion.

  InsertFencesForAtomic = false;

  if (Subtarget->hasAnyDataBarrier() &&

      (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {

    // ATOMIC_FENCE needs custom lowering; the others should have been expanded

    // to ldrex/strex loops already.

    setOperationAction(ISD::ATOMIC_FENCE,     MVT::Other, Custom);

    if (!Subtarget->isThumb() || !Subtarget->isMClass())

      setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Custom);


    // On v8, we have particularly efficient implementations of atomic fences

    // if they can be combined with nearby atomic loads and stores.

    if (!Subtarget->hasAcquireRelease() ||

        getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {

      // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.

      InsertFencesForAtomic = true;

    }

  } else {

    // If there's anything we can use as a barrier, go through custom lowering

    // for ATOMIC_FENCE.

    // If target has DMB in thumb, Fences can be inserted.

    if (Subtarget->hasDataBarrier())

      InsertFencesForAtomic = true;


    setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other,

                       Subtarget->hasAnyDataBarrier() ? Custom : Expand);


    // Set them all for libcall, which will force libcalls.

    setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall);

    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall);

    // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the

    // Unordered/Monotonic case.

    if (!InsertFencesForAtomic) {

      setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);

      setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);

    }

  }


  // Compute supported atomic widths.

  if (TT.isOSLinux() || (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {

    // For targets where __sync_* routines are reliably available, we use them

    // if necessary.

    //

    // ARM Linux always supports 64-bit atomics through kernel-assisted atomic

    // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?

    //

    // ARMv6 targets have native instructions in ARM mode. For Thumb mode,

    // such targets should provide __sync_* routines, which use the ARM mode

    // instructions. (ARMv6 doesn't have dmb, but it has an equivalent

    // encoding; see ARMISD::MEMBARRIER_MCR.)

    setMaxAtomicSizeInBitsSupported(64);

  } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||

             Subtarget->hasForced32BitAtomics()) {

    // Cortex-M (besides Cortex-M0) have 32-bit atomics.

    setMaxAtomicSizeInBitsSupported(32);

  } else {

    // We can't assume anything about other targets; just use libatomic

    // routines.

    setMaxAtomicSizeInBitsSupported(0);

  }


  setMaxDivRemBitWidthSupported(64);


  setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);


  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.

  if (!Subtarget->hasV6Ops()) {

    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);

    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);

  }

  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);


  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&

      !Subtarget->isThumb1Only()) {

    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR

    // iff target supports vfp2.

    setOperationAction(ISD::BITCAST, MVT::i64, Custom);

    setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);

    setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);

    setOperationAction(ISD::GET_FPENV, MVT::i32, Legal);

    setOperationAction(ISD::SET_FPENV, MVT::i32, Legal);

    setOperationAction(ISD::RESET_FPENV, MVT::Other, Legal);

    setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal);

    setOperationAction(ISD::SET_FPMODE, MVT::i32, Custom);

    setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);

  }


  // We want to custom lower some of our intrinsics.

  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);

  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);

  setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);


  setOperationAction(ISD::SETCC,     MVT::i32, Expand);

  setOperationAction(ISD::SETCC,     MVT::f32, Expand);

  setOperationAction(ISD::SETCC,     MVT::f64, Expand);

  setOperationAction(ISD::SELECT,    MVT::i32, Custom);

  setOperationAction(ISD::SELECT,    MVT::f32, Custom);

  setOperationAction(ISD::SELECT,    MVT::f64, Custom);

  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);

  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);

  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

  if (Subtarget->hasFullFP16()) {

    setOperationAction(ISD::SETCC,     MVT::f16, Expand);

    setOperationAction(ISD::SELECT,    MVT::f16, Custom);

    setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);

  }


  setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom);


  setOperationAction(ISD::BRCOND,    MVT::Other, Custom);

  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);

  if (Subtarget->hasFullFP16())

      setOperationAction(ISD::BR_CC, MVT::f16,   Custom);

  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);

  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);

  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);


  // We don't support sin/cos/fmod/copysign/pow

  setOperationAction(ISD::FSIN,      MVT::f64, Expand);

  setOperationAction(ISD::FSIN,      MVT::f32, Expand);

  setOperationAction(ISD::FCOS,      MVT::f32, Expand);

  setOperationAction(ISD::FCOS,      MVT::f64, Expand);

  setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);

  setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);

  setOperationAction(ISD::FREM,      MVT::f64, Expand);

  setOperationAction(ISD::FREM,      MVT::f32, Expand);

  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&

      !Subtarget->isThumb1Only()) {

    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);

    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);

  }

  setOperationAction(ISD::FPOW,      MVT::f64, Expand);

  setOperationAction(ISD::FPOW,      MVT::f32, Expand);


  if (!Subtarget->hasVFP4Base()) {

    setOperationAction(ISD::FMA, MVT::f64, Expand);

    setOperationAction(ISD::FMA, MVT::f32, Expand);

  }


  // Various VFP goodness

  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {

    // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.

    if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {

      setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);

      setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);

      setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall);

      setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall);

    }


    // fp16 is a special v7 extension that adds f16 <-> f32 conversions.

    if (!Subtarget->hasFP16()) {

      setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);

      setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);

      setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall);

      setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall);

    }


    // Strict floating-point comparisons need custom lowering.

    setOperationAction(ISD::STRICT_FSETCC,  MVT::f16, Custom);

    setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);

    setOperationAction(ISD::STRICT_FSETCC,  MVT::f32, Custom);

    setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);

    setOperationAction(ISD::STRICT_FSETCC,  MVT::f64, Custom);

    setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);

  }


  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);

  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);


  // FP-ARMv8 implements a lot of rounding-like FP operations.

  if (Subtarget->hasFPARMv8Base()) {

    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);

    setOperationAction(ISD::FCEIL, MVT::f32, Legal);

    setOperationAction(ISD::FROUND, MVT::f32, Legal);

    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);

    setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);

    setOperationAction(ISD::FRINT, MVT::f32, Legal);

    setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal);

    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);

    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);

    if (Subtarget->hasNEON()) {

      setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);

      setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);

      setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);

      setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);

    }


    if (Subtarget->hasFP64()) {

      setOperationAction(ISD::FFLOOR, MVT::f64, Legal);

      setOperationAction(ISD::FCEIL, MVT::f64, Legal);

      setOperationAction(ISD::FROUND, MVT::f64, Legal);

      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);

      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);

      setOperationAction(ISD::FRINT, MVT::f64, Legal);

      setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal);

      setOperationAction(ISD::FMINNUM, MVT::f64, Legal);

      setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);

    }

  }


  // FP16 often need to be promoted to call lib functions

  // clang-format off

  if (Subtarget->hasFullFP16()) {

    setOperationAction(ISD::LRINT, MVT::f16, Expand);

    setOperationAction(ISD::LROUND, MVT::f16, Expand);

    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);


    for (auto Op : {ISD::FREM,          ISD::FPOW,         ISD::FPOWI,

                  ISD::FCOS,          ISD::FSIN,         ISD::FSINCOS,

                  ISD::FSINCOSPI,     ISD::FMODF,        ISD::FACOS,

                  ISD::FASIN,         ISD::FATAN,        ISD::FATAN2,

                  ISD::FCOSH,         ISD::FSINH,        ISD::FTANH,

                  ISD::FTAN,          ISD::FEXP,         ISD::FEXP2,

                  ISD::FEXP10,        ISD::FLOG,         ISD::FLOG2,

                  ISD::FLOG10,        ISD::STRICT_FREM,  ISD::STRICT_FPOW,

                  ISD::STRICT_FPOWI,  ISD::STRICT_FCOS,  ISD::STRICT_FSIN,

                  ISD::STRICT_FACOS,  ISD::STRICT_FASIN, ISD::STRICT_FATAN,

                  ISD::STRICT_FATAN2, ISD::STRICT_FCOSH, ISD::STRICT_FSINH,

                  ISD::STRICT_FTANH,  ISD::STRICT_FEXP,  ISD::STRICT_FEXP2,

                  ISD::STRICT_FLOG,   ISD::STRICT_FLOG2, ISD::STRICT_FLOG10,

                  ISD::STRICT_FTAN}) {

        setOperationAction(Op, MVT::f16, Promote);

    }


    // Round-to-integer need custom lowering for fp16, as Promote doesn't work

    // because the result type is integer.

    for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT})

      setOperationAction(Op, MVT::f16, Custom);


    for (auto Op : {ISD::FROUND,         ISD::FROUNDEVEN,        ISD::FTRUNC,

                    ISD::FNEARBYINT,     ISD::FRINT,             ISD::FFLOOR,

                    ISD::FCEIL,          ISD::STRICT_FROUND,     ISD::STRICT_FROUNDEVEN,

                    ISD::STRICT_FTRUNC,  ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,

                    ISD::STRICT_FFLOOR,  ISD::STRICT_FCEIL}) {

      setOperationAction(Op, MVT::f16, Legal);

    }

    // clang-format on

  }


  if (Subtarget->hasNEON()) {

    // vmin and vmax aren't available in a scalar form, so we can use

    // a NEON instruction with an undef lane instead.

    setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);

    setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);

    setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);

    setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);

    setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);

    setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);

    setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);

    setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);


    if (Subtarget->hasV8Ops()) {

      setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);

      setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);

      setOperationAction(ISD::FROUND, MVT::v2f32, Legal);

      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);

      setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Legal);

      setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);

      setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);

      setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);

      setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);

      setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);

      setOperationAction(ISD::FRINT, MVT::v2f32, Legal);

      setOperationAction(ISD::FRINT, MVT::v4f32, Legal);

    }


    if (Subtarget->hasFullFP16()) {

      setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal);

      setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal);

      setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal);

      setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal);


      setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal);

      setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal);

      setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);

      setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);


      setOperationAction(ISD::FFLOOR, MVT::v4f16, Legal);

      setOperationAction(ISD::FFLOOR, MVT::v8f16, Legal);

      setOperationAction(ISD::FROUND, MVT::v4f16, Legal);

      setOperationAction(ISD::FROUND, MVT::v8f16, Legal);

      setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Legal);

      setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Legal);

      setOperationAction(ISD::FCEIL, MVT::v4f16, Legal);

      setOperationAction(ISD::FCEIL, MVT::v8f16, Legal);

      setOperationAction(ISD::FTRUNC, MVT::v4f16, Legal);

      setOperationAction(ISD::FTRUNC, MVT::v8f16, Legal);

      setOperationAction(ISD::FRINT, MVT::v4f16, Legal);

      setOperationAction(ISD::FRINT, MVT::v8f16, Legal);

    }

  }


  // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined.  MinGW has

  // it, but it's just a wrapper around ldexp.

  if (TT.isOSWindows()) {

    for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})

      if (isOperationExpand(Op, MVT::f32))

        setOperationAction(Op, MVT::f32, Promote);

  }


  // LegalizeDAG currently can't expand fp16 LDEXP/FREXP on targets where i16

  // isn't legal.

  for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})

    if (isOperationExpand(Op, MVT::f16))

      setOperationAction(Op, MVT::f16, Promote);


  // We have target-specific dag combine patterns for the following nodes:

  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine

  setTargetDAGCombine(

      {ISD::ADD, ISD::SUB, ISD::MUL, ISD::AND, ISD::OR, ISD::XOR});


  if (Subtarget->hasMVEIntegerOps())

    setTargetDAGCombine(ISD::VSELECT);


  if (Subtarget->hasV6Ops())

    setTargetDAGCombine(ISD::SRL);

  if (Subtarget->isThumb1Only())

    setTargetDAGCombine(ISD::SHL);

  // Attempt to lower smin/smax to ssat/usat

  if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||

      Subtarget->isThumb2()) {

    setTargetDAGCombine({ISD::SMIN, ISD::SMAX});

  }


  setStackPointerRegisterToSaveRestore(ARM::SP);


  if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||

      !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())

    setSchedulingPreference(Sched::RegPressure);

  else

    setSchedulingPreference(Sched::Hybrid);


  //// temporary - rewrite interface to use type

  MaxStoresPerMemset = 8;

  MaxStoresPerMemsetOptSize = 4;

  MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores

  MaxStoresPerMemcpyOptSize = 2;

  MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores

  MaxStoresPerMemmoveOptSize = 2;


  // On ARM arguments smaller than 4 bytes are extended, so all arguments

  // are at least 4 bytes aligned.

  setMinStackArgumentAlignment(Align(4));


  // Prefer likely predicted branches to selects on out-of-order cores.

  PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();


  setPrefLoopAlignment(Align(1ULL << Subtarget->getPreferBranchLogAlignment()));

  setPrefFunctionAlignment(

      Align(1ULL << Subtarget->getPreferBranchLogAlignment()));


  setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));

}


bool ARMTargetLowering::useSoftFloat() const {

  return Subtarget->useSoftFloat();

}


bool ARMTargetLowering::preferSelectsOverBooleanArithmetic(EVT VT) const {

  return !Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32;

}


// FIXME: It might make sense to define the representative register class as the

// nearest super-register that has a non-null superset. For example, DPR_VFP2 is

// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,

// SPR's representative would be DPR_VFP2. This should work well if register

// pressure tracking were modified such that a register use would increment the

// pressure of the register class's representative and all of it's super

// classes' representatives transitively. We have not implemented this because

// of the difficulty prior to coalescing of modeling operand register classes

// due to the common occurrence of cross class copies and subregister insertions

// and extractions.

std::pair<const TargetRegisterClass *, uint8_t>


ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,

                                           MVT VT) const {

  const TargetRegisterClass *RRC = nullptr;

  uint8_t Cost = 1;

  switch (VT.SimpleTy) {

  default:

    return TargetLowering::findRepresentativeClass(TRI, VT);

  // Use DPR as representative register class for all floating point

  // and vector types. Since there are 32 SPR registers and 32 DPR registers so

  // the cost is 1 for both f32 and f64.

  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:

  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:

    RRC = &ARM::DPRRegClass;

    // When NEON is used for SP, only half of the register file is available

    // because operations that define both SP and DP results will be constrained

    // to the VFP2 class (D0-D15). We currently model this constraint prior to

    // coalescing by double-counting the SP regs. See the FIXME above.

    if (Subtarget->useNEONForSinglePrecisionFP())

      Cost = 2;

    break;

  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:

  case MVT::v4f32: case MVT::v2f64:

    RRC = &ARM::DPRRegClass;

    Cost = 2;

    break;

  case MVT::v4i64:

    RRC = &ARM::DPRRegClass;

    Cost = 4;

    break;

  case MVT::v8i64:

    RRC = &ARM::DPRRegClass;

    Cost = 8;

    break;

  }

  return std::make_pair(RRC, Cost);

}


EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,

                                          EVT VT) const {

  if (!VT.isVector())

    return getPointerTy(DL);


  // MVE has a predicate register.

  if ((Subtarget->hasMVEIntegerOps() &&

       (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||

        VT == MVT::v16i8)) ||

      (Subtarget->hasMVEFloatOps() &&

       (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))

    return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());

  return VT.changeVectorElementTypeToInteger();

}


/// getRegClassFor - Return the register class that should be used for the

/// specified value type.

const TargetRegisterClass *


ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {

  (void)isDivergent;

  // Map v4i64 to QQ registers but do not make the type legal. Similarly map

  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to

  // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive

  // MVE Q registers.

  if (Subtarget->hasNEON()) {

    if (VT == MVT::v4i64)

      return &ARM::QQPRRegClass;

    if (VT == MVT::v8i64)

      return &ARM::QQQQPRRegClass;

  }

  if (Subtarget->hasMVEIntegerOps()) {

    if (VT == MVT::v4i64)

      return &ARM::MQQPRRegClass;

    if (VT == MVT::v8i64)

      return &ARM::MQQQQPRRegClass;

  }

  return TargetLowering::getRegClassFor(VT);

}


// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the

// source/dest is aligned and the copy size is large enough. We therefore want

// to align such objects passed to memory intrinsics.


bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,

                                               Align &PrefAlign) const {

  if (!isa<MemIntrinsic>(CI))

    return false;

  MinSize = 8;

  // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1

  // cycle faster than 4-byte aligned LDM.

  PrefAlign =

      (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));

  return true;

}


// Create a fast isel object.

FastISel *


ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,

                                  const TargetLibraryInfo *libInfo) const {

  return ARM::createFastISel(funcInfo, libInfo);

}


Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {

  unsigned NumVals = N->getNumValues();

  if (!NumVals)

    return Sched::RegPressure;


  for (unsigned i = 0; i != NumVals; ++i) {

    EVT VT = N->getValueType(i);

    if (VT == MVT::Glue || VT == MVT::Other)

      continue;

    if (VT.isFloatingPoint() || VT.isVector())

      return Sched::ILP;

  }


  if (!N->isMachineOpcode())

    return Sched::RegPressure;


  // Load are scheduled for latency even if there instruction itinerary

  // is not available.

  const TargetInstrInfo *TII = Subtarget->getInstrInfo();

  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());


  if (MCID.getNumDefs() == 0)

    return Sched::RegPressure;

  if (!Itins->isEmpty() &&

      Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)

    return Sched::ILP;


  return Sched::RegPressure;

}


//===----------------------------------------------------------------------===//

// Lowering Code

//===----------------------------------------------------------------------===//


static bool isSRL16(const SDValue &Op) {

  if (Op.getOpcode() != ISD::SRL)

    return false;

  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))

    return Const->getZExtValue() == 16;

  return false;

}


static bool isSRA16(const SDValue &Op) {

  if (Op.getOpcode() != ISD::SRA)

    return false;

  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))

    return Const->getZExtValue() == 16;

  return false;

}


static bool isSHL16(const SDValue &Op) {

  if (Op.getOpcode() != ISD::SHL)

    return false;

  if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))

    return Const->getZExtValue() == 16;

  return false;

}


// Check for a signed 16-bit value. We special case SRA because it makes it

// more simple when also looking for SRAs that aren't sign extending a

// smaller value. Without the check, we'd need to take extra care with

// checking order for some operations.


static bool isS16(const SDValue &Op, SelectionDAG &DAG) {

  if (isSRA16(Op))

    return isSHL16(Op.getOperand(0));

  return DAG.ComputeNumSignBits(Op) == 17;

}


/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC


static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {

  switch (CC) {

  default: llvm_unreachable("Unknown condition code!");

  case ISD::SETNE:  return ARMCC::NE;

  case ISD::SETEQ:  return ARMCC::EQ;

  case ISD::SETGT:  return ARMCC::GT;

  case ISD::SETGE:  return ARMCC::GE;

  case ISD::SETLT:  return ARMCC::LT;

  case ISD::SETLE:  return ARMCC::LE;

  case ISD::SETUGT: return ARMCC::HI;

  case ISD::SETUGE: return ARMCC::HS;

  case ISD::SETULT: return ARMCC::LO;

  case ISD::SETULE: return ARMCC::LS;

  }

}


/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.


static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,

                        ARMCC::CondCodes &CondCode2) {

  CondCode2 = ARMCC::AL;

  switch (CC) {

  default: llvm_unreachable("Unknown FP condition!");

  case ISD::SETEQ:

  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;

  case ISD::SETGT:

  case ISD::SETOGT: CondCode = ARMCC::GT; break;

  case ISD::SETGE:

  case ISD::SETOGE: CondCode = ARMCC::GE; break;

  case ISD::SETOLT: CondCode = ARMCC::MI; break;

  case ISD::SETOLE: CondCode = ARMCC::LS; break;

  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;

  case ISD::SETO:   CondCode = ARMCC::VC; break;

  case ISD::SETUO:  CondCode = ARMCC::VS; break;

  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;

  case ISD::SETUGT: CondCode = ARMCC::HI; break;

  case ISD::SETUGE: CondCode = ARMCC::PL; break;

  case ISD::SETLT:

  case ISD::SETULT: CondCode = ARMCC::LT; break;

  case ISD::SETLE:

  case ISD::SETULE: CondCode = ARMCC::LE; break;

  case ISD::SETNE:

  case ISD::SETUNE: CondCode = ARMCC::NE; break;

  }

}


//===----------------------------------------------------------------------===//

//                      Calling Convention Implementation

//===----------------------------------------------------------------------===//


/// getEffectiveCallingConv - Get the effective calling convention, taking into

/// account presence of floating point hardware and calling convention

/// limitations, such as support for variadic functions.

CallingConv::ID

ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,

                                           bool isVarArg) const {

  switch (CC) {

  default:

    report_fatal_error("Unsupported calling convention");

  case CallingConv::ARM_AAPCS:

  case CallingConv::ARM_APCS:

  case CallingConv::GHC:

  case CallingConv::CFGuard_Check:

    return CC;

  case CallingConv::PreserveMost:

    return CallingConv::PreserveMost;

  case CallingConv::PreserveAll:

    return CallingConv::PreserveAll;

  case CallingConv::ARM_AAPCS_VFP:

  case CallingConv::Swift:

  case CallingConv::SwiftTail:

    return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;

  case CallingConv::C:

  case CallingConv::Tail:

    if (!getTM().isAAPCS_ABI())

      return CallingConv::ARM_APCS;

    else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&

             getTargetMachine().Options.FloatABIType == FloatABI::Hard &&

             !isVarArg)

      return CallingConv::ARM_AAPCS_VFP;

    else

      return CallingConv::ARM_AAPCS;

  case CallingConv::Fast:

  case CallingConv::CXX_FAST_TLS:

    if (!getTM().isAAPCS_ABI()) {

      if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)

        return CallingConv::Fast;

      return CallingConv::ARM_APCS;

    } else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&

               !isVarArg)

      return CallingConv::ARM_AAPCS_VFP;

    else

      return CallingConv::ARM_AAPCS;

  }

}


CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,

                                                 bool isVarArg) const {

  return CCAssignFnForNode(CC, false, isVarArg);

}


CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,

                                                   bool isVarArg) const {

  return CCAssignFnForNode(CC, true, isVarArg);

}


/// CCAssignFnForNode - Selects the correct CCAssignFn for the given

/// CallingConvention.

CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,

                                                 bool Return,

                                                 bool isVarArg) const {

  switch (getEffectiveCallingConv(CC, isVarArg)) {

  default:

    report_fatal_error("Unsupported calling convention");

  case CallingConv::ARM_APCS:

    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);

  case CallingConv::ARM_AAPCS:

    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);

  case CallingConv::ARM_AAPCS_VFP:

    return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);

  case CallingConv::Fast:

    return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);

  case CallingConv::GHC:

    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);

  case CallingConv::PreserveMost:

    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);

  case CallingConv::PreserveAll:

    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);

  case CallingConv::CFGuard_Check:

    return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);

  }

}


SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,

                                     MVT LocVT, MVT ValVT, SDValue Val) const {

  Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),

                    Val);

  if (Subtarget->hasFullFP16()) {

    Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);

  } else {

    Val = DAG.getNode(ISD::TRUNCATE, dl,

                      MVT::getIntegerVT(ValVT.getSizeInBits()), Val);

    Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);

  }

  return Val;

}


SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,

                                       MVT LocVT, MVT ValVT,

                                       SDValue Val) const {

  if (Subtarget->hasFullFP16()) {

    Val = DAG.getNode(ARMISD::VMOVrh, dl,

                      MVT::getIntegerVT(LocVT.getSizeInBits()), Val);

  } else {

    Val = DAG.getNode(ISD::BITCAST, dl,

                      MVT::getIntegerVT(ValVT.getSizeInBits()), Val);

    Val = DAG.getNode(ISD::ZERO_EXTEND, dl,

                      MVT::getIntegerVT(LocVT.getSizeInBits()), Val);

  }

  return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);

}


/// LowerCallResult - Lower the result values of a call into the

/// appropriate copies out of appropriate physical registers.

SDValue ARMTargetLowering::LowerCallResult(

    SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,

    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,

    SDValue ThisVal, bool isCmseNSCall) const {

  // Assign locations to each value returned by this call.

  SmallVector<CCValAssign, 16> RVLocs;

  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,

                 *DAG.getContext());

  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));


  // Copy all of the result registers out of their specified physreg.

  for (unsigned i = 0; i != RVLocs.size(); ++i) {

    CCValAssign VA = RVLocs[i];


    // Pass 'this' value directly from the argument to return value, to avoid

    // reg unit interference

    if (i == 0 && isThisReturn) {

      assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&

             "unexpected return calling convention register assignment");

      InVals.push_back(ThisVal);

      continue;

    }


    SDValue Val;

    if (VA.needsCustom() &&

        (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {

      // Handle f64 or half of a v2f64.

      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,

                                      InGlue);

      Chain = Lo.getValue(1);

      InGlue = Lo.getValue(2);

      VA = RVLocs[++i]; // skip ahead to next loc

      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,

                                      InGlue);

      Chain = Hi.getValue(1);

      InGlue = Hi.getValue(2);

      if (!Subtarget->isLittle())

        std::swap (Lo, Hi);

      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);


      if (VA.getLocVT() == MVT::v2f64) {

        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);

        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,

                          DAG.getConstant(0, dl, MVT::i32));


        VA = RVLocs[++i]; // skip ahead to next loc

        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);

        Chain = Lo.getValue(1);

        InGlue = Lo.getValue(2);

        VA = RVLocs[++i]; // skip ahead to next loc

        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);

        Chain = Hi.getValue(1);

        InGlue = Hi.getValue(2);

        if (!Subtarget->isLittle())

          std::swap (Lo, Hi);

        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);

        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,

                          DAG.getConstant(1, dl, MVT::i32));

      }

    } else {

      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),

                               InGlue);

      Chain = Val.getValue(1);

      InGlue = Val.getValue(2);

    }


    switch (VA.getLocInfo()) {

    default: llvm_unreachable("Unknown loc info!");

    case CCValAssign::Full: break;

    case CCValAssign::BCvt:

      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);

      break;

    }


    // f16 arguments have their size extended to 4 bytes and passed as if they

    // had been copied to the LSBs of a 32-bit register.

    // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)

    if (VA.needsCustom() &&

        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))

      Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);


    // On CMSE Non-secure Calls, call results (returned values) whose bitwidth

    // is less than 32 bits must be sign- or zero-extended after the call for

    // security reasons. Although the ABI mandates an extension done by the

    // callee, the latter cannot be trusted to follow the rules of the ABI.

    const ISD::InputArg &Arg = Ins[VA.getValNo()];

    if (isCmseNSCall && Arg.ArgVT.isScalarInteger() &&

        VA.getLocVT().isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))

      Val = handleCMSEValue(Val, Arg, DAG, dl);


    InVals.push_back(Val);

  }


  return Chain;

}


std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(

    const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,

    bool IsTailCall, int SPDiff) const {

  SDValue DstAddr;

  MachinePointerInfo DstInfo;

  int32_t Offset = VA.getLocMemOffset();

  MachineFunction &MF = DAG.getMachineFunction();


  if (IsTailCall) {

        Offset += SPDiff;

        auto PtrVT = getPointerTy(DAG.getDataLayout());

        int Size = VA.getLocVT().getFixedSizeInBits() / 8;

        int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);

        DstAddr = DAG.getFrameIndex(FI, PtrVT);

        DstInfo =

            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);

  } else {

        SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);

        DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),

                              StackPtr, PtrOff);

        DstInfo =

            MachinePointerInfo::getStack(DAG.getMachineFunction(), Offset);

  }


  return std::make_pair(DstAddr, DstInfo);

}


// Returns the type of copying which is required to set up a byval argument to

// a tail-called function. This isn't needed for non-tail calls, because they

// always need the equivalent of CopyOnce, but tail-calls sometimes need two to

// avoid clobbering another argument (CopyViaTemp), and sometimes can be

// optimised to zero copies when forwarding an argument from the caller's

// caller (NoCopy).

ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(

    SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {

  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();


  // Globals are always safe to copy from.

  if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))

    return CopyOnce;


  // Can only analyse frame index nodes, conservatively assume we need a

  // temporary.

  auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);

  auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);

  if (!SrcFrameIdxNode || !DstFrameIdxNode)

    return CopyViaTemp;


  int SrcFI = SrcFrameIdxNode->getIndex();

  int DstFI = DstFrameIdxNode->getIndex();

  assert(MFI.isFixedObjectIndex(DstFI) &&

         "byval passed in non-fixed stack slot");


  int64_t SrcOffset = MFI.getObjectOffset(SrcFI);

  int64_t DstOffset = MFI.getObjectOffset(DstFI);


  // If the source is in the local frame, then the copy to the argument memory

  // is always valid.

  bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);

  if (!FixedSrc ||

      (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))

    return CopyOnce;


  // In the case of byval arguments split between registers and the stack,

  // computeAddrForCallArg returns a FrameIndex which corresponds only to the

  // stack portion, but the Src SDValue will refer to the full value, including

  // the local stack memory that the register portion gets stored into. We only

  // need to compare them for equality, so normalise on the full value version.

  uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);

  DstOffset -= RegSize;


  // If the value is already in the correct location, then no copying is

  // needed. If not, then we need to copy via a temporary.

  if (SrcOffset == DstOffset)

    return NoCopy;

  else

    return CopyViaTemp;

}


void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,

                                         SDValue Chain, SDValue &Arg,

                                         RegsToPassVector &RegsToPass,

                                         CCValAssign &VA, CCValAssign &NextVA,

                                         SDValue &StackPtr,

                                         SmallVectorImpl<SDValue> &MemOpChains,

                                         bool IsTailCall,

                                         int SPDiff) const {

  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,

                              DAG.getVTList(MVT::i32, MVT::i32), Arg);

  unsigned id = Subtarget->isLittle() ? 0 : 1;

  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));


  if (NextVA.isRegLoc())

    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));

  else {

    assert(NextVA.isMemLoc());

    if (!StackPtr.getNode())

      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,

                                    getPointerTy(DAG.getDataLayout()));


    SDValue DstAddr;

    MachinePointerInfo DstInfo;

    std::tie(DstAddr, DstInfo) =

        computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);

    MemOpChains.push_back(

        DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));

  }

}


static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {

  return (CC == CallingConv::Fast && GuaranteeTailCalls) ||

         CC == CallingConv::Tail || CC == CallingConv::SwiftTail;

}


/// LowerCall - Lowering a call into a callseq_start <-

/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter

/// nodes.

SDValue

ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

                             SmallVectorImpl<SDValue> &InVals) const {

  SelectionDAG &DAG                     = CLI.DAG;

  SDLoc &dl                             = CLI.DL;

  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;

  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;

  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;

  SDValue Chain                         = CLI.Chain;

  SDValue Callee                        = CLI.Callee;

  bool &isTailCall                      = CLI.IsTailCall;

  CallingConv::ID CallConv              = CLI.CallConv;

  bool doesNotRet                       = CLI.DoesNotReturn;

  bool isVarArg                         = CLI.IsVarArg;

  const CallBase *CB = CLI.CB;


  MachineFunction &MF = DAG.getMachineFunction();

  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

  MachineFunction::CallSiteInfo CSInfo;

  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();

  bool isThisReturn = false;

  bool isCmseNSCall   = false;

  bool isSibCall = false;

  bool PreferIndirect = false;

  bool GuardWithBTI = false;


  // Analyze operands of the call, assigning locations to each operand.

  SmallVector<CCValAssign, 16> ArgLocs;

  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,

                 *DAG.getContext());

  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));


  // Lower 'returns_twice' calls to a pseudo-instruction.

  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&

      !Subtarget->noBTIAtReturnTwice())

    GuardWithBTI = AFI->branchTargetEnforcement();


  // Set type id for call site info.

  if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())

    CSInfo = MachineFunction::CallSiteInfo(*CB);


  // Determine whether this is a non-secure function call.

  if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))

    isCmseNSCall = true;


  // Disable tail calls if they're not supported.

  if (!Subtarget->supportsTailCall())

    isTailCall = false;


  // For both the non-secure calls and the returns from a CMSE entry function,

  // the function needs to do some extra work after the call, or before the

  // return, respectively, thus it cannot end with a tail call

  if (isCmseNSCall || AFI->isCmseNSEntryFunction())

    isTailCall = false;


  if (isa<GlobalAddressSDNode>(Callee)) {

    // If we're optimizing for minimum size and the function is called three or

    // more times in this block, we can improve codesize by calling indirectly

    // as BLXr has a 16-bit encoding.

    auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();

    if (CLI.CB) {

      auto *BB = CLI.CB->getParent();

      PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&

                       count_if(GV->users(), [&BB](const User *U) {

                         return isa<Instruction>(U) &&

                                cast<Instruction>(U)->getParent() == BB;

                       }) > 2;

    }

  }

  if (isTailCall) {

    // Check if it's really possible to do a tail call.

    isTailCall =

        IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs, PreferIndirect);


    if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&

        CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)

      isSibCall = true;


    // We don't support GuaranteedTailCallOpt for ARM, only automatically

    // detected sibcalls.

    if (isTailCall)

      ++NumTailCalls;

  }


  if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())

    report_fatal_error("failed to perform tail call elimination on a call "

                       "site marked musttail");


  // Get a count of how many bytes are to be pushed on the stack.

  unsigned NumBytes = CCInfo.getStackSize();


  // SPDiff is the byte offset of the call's argument area from the callee's.

  // Stores to callee stack arguments will be placed in FixedStackSlots offset

  // by this amount for a tail call. In a sibling call it must be 0 because the

  // caller will deallocate the entire stack and the callee still expects its

  // arguments to begin at SP+0. Completely unused for non-tail calls.

  int SPDiff = 0;


  if (isTailCall && !isSibCall) {

    auto FuncInfo = MF.getInfo<ARMFunctionInfo>();

    unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();


    // Since callee will pop argument stack as a tail call, we must keep the

    // popped size 16-byte aligned.

    MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();

    assert(StackAlign && "data layout string is missing stack alignment");

    NumBytes = alignTo(NumBytes, *StackAlign);


    // SPDiff will be negative if this tail call requires more space than we

    // would automatically have in our incoming argument space. Positive if we

    // can actually shrink the stack.

    SPDiff = NumReusableBytes - NumBytes;


    // If this call requires more stack than we have available from

    // LowerFormalArguments, tell FrameLowering to reserve space for it.

    if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)

      AFI->setArgRegsSaveSize(-SPDiff);

  }


  if (isSibCall) {

    // For sibling tail calls, memory operands are available in our caller's stack.

    NumBytes = 0;

  } else {

    // Adjust the stack pointer for the new arguments...

    // These operations are automatically eliminated by the prolog/epilog pass

    Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);

  }


  SDValue StackPtr =

      DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));


  RegsToPassVector RegsToPass;

  SmallVector<SDValue, 8> MemOpChains;


  // If we are doing a tail-call, any byval arguments will be written to stack

  // space which was used for incoming arguments. If any the values being used

  // are incoming byval arguments to this function, then they might be

  // overwritten by the stores of the outgoing arguments. To avoid this, we

  // need to make a temporary copy of them in local stack space, then copy back

  // to the argument area.

  DenseMap<unsigned, SDValue> ByValTemporaries;

  SDValue ByValTempChain;

  if (isTailCall) {

    SmallVector<SDValue, 8> ByValCopyChains;

    for (const CCValAssign &VA : ArgLocs) {

      unsigned ArgIdx = VA.getValNo();

      SDValue Src = OutVals[ArgIdx];

      ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;


      if (!Flags.isByVal())

        continue;


      SDValue Dst;

      MachinePointerInfo DstInfo;

      std::tie(Dst, DstInfo) =

          computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);

      ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);


      if (Copy == NoCopy) {

        // If the argument is already at the correct offset on the stack

        // (because we are forwarding a byval argument from our caller), we

        // don't need any copying.

        continue;

      } else if (Copy == CopyOnce) {

        // If the argument is in our local stack frame, no other argument

        // preparation can clobber it, so we can copy it to the final location

        // later.

        ByValTemporaries[ArgIdx] = Src;

      } else {

        assert(Copy == CopyViaTemp && "unexpected enum value");

        // If we might be copying this argument from the outgoing argument

        // stack area, we need to copy via a temporary in the local stack

        // frame.

        int TempFrameIdx = MFI.CreateStackObject(

            Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);

        SDValue Temp =

            DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));


        SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);

        SDValue AlignNode =

            DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);


        SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);

        SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};

        ByValCopyChains.push_back(

            DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));

        ByValTemporaries[ArgIdx] = Temp;

      }

    }

    if (!ByValCopyChains.empty())

      ByValTempChain =

          DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);

  }


  // During a tail call, stores to the argument area must happen after all of

  // the function's incoming arguments have been loaded because they may alias.

  // This is done by folding in a TokenFactor from LowerFormalArguments, but

  // there's no point in doing so repeatedly so this tracks whether that's

  // happened yet.

  bool AfterFormalArgLoads = false;


  // Walk the register/memloc assignments, inserting copies/loads.  In the case

  // of tail call optimization, arguments are handled later.

  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();

       i != e;

       ++i, ++realArgIdx) {

    CCValAssign &VA = ArgLocs[i];

    SDValue Arg = OutVals[realArgIdx];

    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;

    bool isByVal = Flags.isByVal();


    // Promote the value if needed.

    switch (VA.getLocInfo()) {

    default: llvm_unreachable("Unknown loc info!");

    case CCValAssign::Full: break;

    case CCValAssign::SExt:

      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);

      break;

    case CCValAssign::ZExt:

      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);

      break;

    case CCValAssign::AExt:

      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);

      break;

    case CCValAssign::BCvt:

      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);

      break;

    }


    if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {

      Chain = DAG.getStackArgumentTokenFactor(Chain);

      if (ByValTempChain) {

        // In case of large byval copies, re-using the stackframe for tail-calls

        // can lead to overwriting incoming arguments on the stack. Force

        // loading these stack arguments before the copy to avoid that.

        SmallVector<SDValue, 8> IncomingLoad;

        for (unsigned I = 0; I < OutVals.size(); ++I) {

          if (Outs[I].Flags.isByVal())

            continue;


          SDValue OutVal = OutVals[I];

          LoadSDNode *OutLN = dyn_cast_or_null<LoadSDNode>(OutVal);

          if (!OutLN)

            continue;


          FrameIndexSDNode *FIN =

              dyn_cast_or_null<FrameIndexSDNode>(OutLN->getBasePtr());

          if (!FIN)

            continue;


          if (!MFI.isFixedObjectIndex(FIN->getIndex()))

            continue;


          for (const CCValAssign &VA : ArgLocs) {

            if (VA.isMemLoc())

              IncomingLoad.push_back(OutVal.getValue(1));

          }

        }


        // Update the chain to force loads for potentially clobbered argument

        // loads to happen before the byval copy.

        if (!IncomingLoad.empty()) {

          IncomingLoad.push_back(Chain);

          Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, IncomingLoad);

        }


        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,

                            ByValTempChain);

      }

      AfterFormalArgLoads = true;

    }


    // f16 arguments have their size extended to 4 bytes and passed as if they

    // had been copied to the LSBs of a 32-bit register.

    // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)

    if (VA.needsCustom() &&

        (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {

      Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);

    } else {

      // f16 arguments could have been extended prior to argument lowering.

      // Mask them arguments if this is a CMSE nonsecure call.

      auto ArgVT = Outs[realArgIdx].ArgVT;

      if (isCmseNSCall && (ArgVT == MVT::f16)) {

        auto LocBits = VA.getLocVT().getSizeInBits();

        auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());

        SDValue Mask =

            DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));

        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);

        Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);

        Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);

      }

    }


    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces

    if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {

      SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,

                                DAG.getConstant(0, dl, MVT::i32));

      SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,

                                DAG.getConstant(1, dl, MVT::i32));


      PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],

                       StackPtr, MemOpChains, isTailCall, SPDiff);


      VA = ArgLocs[++i]; // skip ahead to next loc

      if (VA.isRegLoc()) {

        PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],

                         StackPtr, MemOpChains, isTailCall, SPDiff);

      } else {

        assert(VA.isMemLoc());

        SDValue DstAddr;

        MachinePointerInfo DstInfo;

        std::tie(DstAddr, DstInfo) =

            computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);

        MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));

      }

    } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {

      PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],

                       StackPtr, MemOpChains, isTailCall, SPDiff);

    } else if (VA.isRegLoc()) {

      if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&

          Outs[0].VT == MVT::i32) {

        assert(VA.getLocVT() == MVT::i32 &&

               "unexpected calling convention register assignment");

        assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&

               "unexpected use of 'returned'");

        isThisReturn = true;

      }

      const TargetOptions &Options = DAG.getTarget().Options;

      if (Options.EmitCallSiteInfo)

        CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);

      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));

    } else if (isByVal) {

      assert(VA.isMemLoc());

      unsigned offset = 0;


      // True if this byval aggregate will be split between registers

      // and memory.

      unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();

      unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();


      SDValue ByValSrc;

      bool NeedsStackCopy;

      if (auto It = ByValTemporaries.find(realArgIdx);

          It != ByValTemporaries.end()) {

        ByValSrc = It->second;

        NeedsStackCopy = true;

      } else {

        ByValSrc = Arg;

        NeedsStackCopy = !isTailCall;

      }


      // If part of the argument is in registers, load them.

      if (CurByValIdx < ByValArgsCount) {

        unsigned RegBegin, RegEnd;

        CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);


        EVT PtrVT = getPointerTy(DAG.getDataLayout());

        unsigned int i, j;

        for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {

          SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);

          SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, Const);

          SDValue Load =

              DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),

                          DAG.InferPtrAlign(AddArg));

          MemOpChains.push_back(Load.getValue(1));

          RegsToPass.push_back(std::make_pair(j, Load));

        }


        // If parameter size outsides register area, "offset" value

        // helps us to calculate stack slot for remained part properly.

        offset = RegEnd - RegBegin;


        CCInfo.nextInRegsParam();

      }


      // If the memory part of the argument isn't already in the correct place

      // (which can happen with tail calls), copy it into the argument area.

      if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {

        auto PtrVT = getPointerTy(DAG.getDataLayout());

        SDValue Dst;

        MachinePointerInfo DstInfo;

        std::tie(Dst, DstInfo) =

            computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);

        SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);

        SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, SrcOffset);

        SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,

                                           MVT::i32);

        SDValue AlignNode =

            DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);


        SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);

        SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};

        MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,

                                          Ops));

      }

    } else {

      assert(VA.isMemLoc());

      SDValue DstAddr;

      MachinePointerInfo DstInfo;

      std::tie(DstAddr, DstInfo) =

          computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);


      SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);

      MemOpChains.push_back(Store);

    }

  }


  if (!MemOpChains.empty())

    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);


  // Build a sequence of copy-to-reg nodes chained together with token chain

  // and flag operands which copy the outgoing args into the appropriate regs.

  SDValue InGlue;

  for (const auto &[Reg, N] : RegsToPass) {

    Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);

    InGlue = Chain.getValue(1);

  }


  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every

  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol

  // node so that legalize doesn't hack it.

  bool isDirect = false;


  const TargetMachine &TM = getTargetMachine();

  const GlobalValue *GVal = nullptr;

  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))

    GVal = G->getGlobal();

  bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();


  bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());

  bool isLocalARMFunc = false;

  auto PtrVt = getPointerTy(DAG.getDataLayout());


  if (Subtarget->genLongCalls()) {

    assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&

           "long-calls codegen is not position independent!");

    // Handle a global address or an external symbol. If it's not one of

    // those, the target's already in a register, so we don't need to do

    // anything extra.

    if (isa<GlobalAddressSDNode>(Callee)) {

      if (Subtarget->genExecuteOnly()) {

        if (Subtarget->useMovt())

          ++NumMovwMovt;

        Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,

                             DAG.getTargetGlobalAddress(GVal, dl, PtrVt));

      } else {

        // Create a constant pool entry for the callee address

        unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

        ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(

            GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);


        // Get the address of the callee into a register

        SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));

        Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);

        Callee = DAG.getLoad(

            PtrVt, dl, DAG.getEntryNode(), Addr,

            MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

      }

    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {

      const char *Sym = S->getSymbol();


      if (Subtarget->genExecuteOnly()) {

        if (Subtarget->useMovt())

          ++NumMovwMovt;

        Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,

                             DAG.getTargetGlobalAddress(GVal, dl, PtrVt));

      } else {

        // Create a constant pool entry for the callee address

        unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

        ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(

            *DAG.getContext(), Sym, ARMPCLabelIndex, 0);


        // Get the address of the callee into a register

        SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));

        Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);

        Callee = DAG.getLoad(

            PtrVt, dl, DAG.getEntryNode(), Addr,

            MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

      }

    }

  } else if (isa<GlobalAddressSDNode>(Callee)) {

    if (!PreferIndirect) {

      isDirect = true;

      bool isDef = GVal->isStrongDefinitionForLinker();


      // ARM call to a local ARM function is predicable.

      isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);

      // tBX takes a register source operand.

      if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {

        assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");

        Callee = DAG.getNode(

            ARMISD::WrapperPIC, dl, PtrVt,

            DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));

        Callee = DAG.getLoad(

            PtrVt, dl, DAG.getEntryNode(), Callee,

            MachinePointerInfo::getGOT(DAG.getMachineFunction()), MaybeAlign(),

            MachineMemOperand::MODereferenceable |

                MachineMemOperand::MOInvariant);

      } else if (Subtarget->isTargetCOFF()) {

        assert(Subtarget->isTargetWindows() &&

               "Windows is the only supported COFF target");

        unsigned TargetFlags = ARMII::MO_NO_FLAG;

        if (GVal->hasDLLImportStorageClass())

          TargetFlags = ARMII::MO_DLLIMPORT;

        else if (!TM.shouldAssumeDSOLocal(GVal))

          TargetFlags = ARMII::MO_COFFSTUB;

        Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,

                                            TargetFlags);

        if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))

          Callee =

              DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),

                          DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),

                          MachinePointerInfo::getGOT(DAG.getMachineFunction()));

      } else {

        Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);

      }

    }

  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

    isDirect = true;

    // tBX takes a register source operand.

    const char *Sym = S->getSymbol();

    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {

      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

      ARMConstantPoolValue *CPV =

        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,

                                      ARMPCLabelIndex, 4);

      SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));

      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

      Callee = DAG.getLoad(

          PtrVt, dl, DAG.getEntryNode(), CPAddr,

          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);

      Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);

    } else {

      Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);

    }

  }


  if (isCmseNSCall) {

    assert(!isARMFunc && !isDirect &&

           "Cannot handle call to ARM function or direct call");

    if (NumBytes > 0) {

      DAG.getContext()->diagnose(

          DiagnosticInfoUnsupported(DAG.getMachineFunction().getFunction(),

                                    "call to non-secure function would require "

                                    "passing arguments on stack",

                                    dl.getDebugLoc()));

    }

    if (isStructRet) {

      DAG.getContext()->diagnose(DiagnosticInfoUnsupported(

          DAG.getMachineFunction().getFunction(),

          "call to non-secure function would return value through pointer",

          dl.getDebugLoc()));

    }

  }


  // FIXME: handle tail calls differently.

  unsigned CallOpc;

  if (Subtarget->isThumb()) {

    if (GuardWithBTI)

      CallOpc = ARMISD::t2CALL_BTI;

    else if (isCmseNSCall)

      CallOpc = ARMISD::tSECALL;

    else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())

      CallOpc = ARMISD::CALL_NOLINK;

    else

      CallOpc = ARMISD::CALL;

  } else {

    if (!isDirect && !Subtarget->hasV5TOps())

      CallOpc = ARMISD::CALL_NOLINK;

    else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&

             // Emit regular call when code size is the priority

             !Subtarget->hasMinSize())

      // "mov lr, pc; b _foo" to avoid confusing the RSP

      CallOpc = ARMISD::CALL_NOLINK;

    else

      CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;

  }


  // We don't usually want to end the call-sequence here because we would tidy

  // the frame up *after* the call, however in the ABI-changing tail-call case

  // we've carefully laid out the parameters so that when sp is reset they'll be

  // in the correct location.

  if (isTailCall && !isSibCall) {

    Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);

    InGlue = Chain.getValue(1);

  }


  std::vector<SDValue> Ops;

  Ops.push_back(Chain);

  Ops.push_back(Callee);


  if (isTailCall) {

    Ops.push_back(DAG.getSignedTargetConstant(SPDiff, dl, MVT::i32));

  }


  // Add argument registers to the end of the list so that they are known live

  // into the call.

  for (const auto &[Reg, N] : RegsToPass)

    Ops.push_back(DAG.getRegister(Reg, N.getValueType()));


  // Add a register mask operand representing the call-preserved registers.

  const uint32_t *Mask;

  const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();

  if (isThisReturn) {

    // For 'this' returns, use the R0-preserving mask if applicable

    Mask = ARI->getThisReturnPreservedMask(MF, CallConv);

    if (!Mask) {

      // Set isThisReturn to false if the calling convention is not one that

      // allows 'returned' to be modeled in this way, so LowerCallResult does

      // not try to pass 'this' straight through

      isThisReturn = false;

      Mask = ARI->getCallPreservedMask(MF, CallConv);

    }

  } else

    Mask = ARI->getCallPreservedMask(MF, CallConv);


  assert(Mask && "Missing call preserved mask for calling convention");

  Ops.push_back(DAG.getRegisterMask(Mask));


  if (InGlue.getNode())

    Ops.push_back(InGlue);


  if (isTailCall) {

    MF.getFrameInfo().setHasTailCall();

    SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops);

    if (CLI.CFIType)

      Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());

    DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);

    DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));

    return Ret;

  }


  // Returns a chain and a flag for retval copy to use.

  Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops);

  if (CLI.CFIType)

    Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());

  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);

  InGlue = Chain.getValue(1);

  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));


  // If we're guaranteeing tail-calls will be honoured, the callee must

  // pop its own argument stack on return. But this call is *not* a tail call so

  // we need to undo that after it returns to restore the status-quo.

  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;

  uint64_t CalleePopBytes =

      canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U;


  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);

  if (!Ins.empty())

    InGlue = Chain.getValue(1);


  // Handle result values, copying them out of physregs into vregs that we

  // return.

  return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,

                         InVals, isThisReturn,

                         isThisReturn ? OutVals[0] : SDValue(), isCmseNSCall);

}


/// HandleByVal - Every parameter *after* a byval parameter is passed

/// on the stack.  Remember the next parameter register to allocate,

/// and then confiscate the rest of the parameter registers to insure

/// this.

void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,

                                    Align Alignment) const {

  // Byval (as with any stack) slots are always at least 4 byte aligned.

  Alignment = std::max(Alignment, Align(4));


  MCRegister Reg = State->AllocateReg(GPRArgRegs);

  if (!Reg)

    return;


  unsigned AlignInRegs = Alignment.value() / 4;

  unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;

  for (unsigned i = 0; i < Waste; ++i)

    Reg = State->AllocateReg(GPRArgRegs);


  if (!Reg)

    return;


  unsigned Excess = 4 * (ARM::R4 - Reg);


  // Special case when NSAA != SP and parameter size greater than size of

  // all remained GPR regs. In that case we can't split parameter, we must

  // send it to stack. We also must set NCRN to R4, so waste all

  // remained registers.

  const unsigned NSAAOffset = State->getStackSize();

  if (NSAAOffset != 0 && Size > Excess) {

    while (State->AllocateReg(GPRArgRegs))

      ;

    return;

  }


  // First register for byval parameter is the first register that wasn't

  // allocated before this method call, so it would be "reg".

  // If parameter is small enough to be saved in range [reg, r4), then

  // the end (first after last) register would be reg + param-size-in-regs,

  // else parameter would be splitted between registers and stack,

  // end register would be r4 in this case.

  unsigned ByValRegBegin = Reg;

  unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);

  State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);

  // Note, first register is allocated in the beginning of function already,

  // allocate remained amount of registers we need.

  for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)

    State->AllocateReg(GPRArgRegs);

  // A byval parameter that is split between registers and memory needs its

  // size truncated here.

  // In the case where the entire structure fits in registers, we set the

  // size in memory to zero.

  Size = std::max<int>(Size - Excess, 0);

}


/// IsEligibleForTailCallOptimization - Check whether the call is eligible

/// for tail call optimization. Targets which want to do tail call

/// optimization should implement this function. Note that this function also

/// processes musttail calls, so when this function returns false on a valid

/// musttail call, a fatal backend error occurs.

bool ARMTargetLowering::IsEligibleForTailCallOptimization(

    TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,

    SmallVectorImpl<CCValAssign> &ArgLocs, const bool isIndirect) const {

  CallingConv::ID CalleeCC = CLI.CallConv;

  SDValue Callee = CLI.Callee;

  bool isVarArg = CLI.IsVarArg;

  const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;

  const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;

  const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;

  const SelectionDAG &DAG = CLI.DAG;

  MachineFunction &MF = DAG.getMachineFunction();

  const Function &CallerF = MF.getFunction();

  CallingConv::ID CallerCC = CallerF.getCallingConv();


  assert(Subtarget->supportsTailCall());


  // Indirect tail-calls require a register to hold the target address. That

  // register must be:

  // * Allocatable (i.e. r0-r7 if the target is Thumb1).

  // * Not callee-saved, so must be one of r0-r3 or r12.

  // * Not used to hold an argument to the tail-called function, which might be

  //   in r0-r3.

  // * Not used to hold the return address authentication code, which is in r12

  //   if enabled.

  // Sometimes, no register matches all of these conditions, so we can't do a

  // tail-call.

  if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {

    SmallSet<MCPhysReg, 5> AddressRegisters = {ARM::R0, ARM::R1, ARM::R2,

                                               ARM::R3};

    if (!(Subtarget->isThumb1Only() ||

          MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)))

      AddressRegisters.insert(ARM::R12);

    for (const CCValAssign &AL : ArgLocs)

      if (AL.isRegLoc())

        AddressRegisters.erase(AL.getLocReg());

    if (AddressRegisters.empty()) {

      LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n");

      return false;

    }

  }


  // Look for obvious safe cases to perform tail call optimization that do not

  // require ABI changes. This is what gcc calls sibcall.


  // Exception-handling functions need a special set of instructions to indicate

  // a return to the hardware. Tail-calling another function would probably

  // break this.

  if (CallerF.hasFnAttribute("interrupt")) {

    LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");

    return false;

  }


  if (canGuaranteeTCO(CalleeCC,

                      getTargetMachine().Options.GuaranteedTailCallOpt)) {

    LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")

                      << " (guaranteed tail-call CC)\n");

    return CalleeCC == CallerCC;

  }


  // Also avoid sibcall optimization if either caller or callee uses struct

  // return semantics.

  bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();

  bool isCallerStructRet = MF.getFunction().hasStructRetAttr();

  if (isCalleeStructRet != isCallerStructRet) {

    LLVM_DEBUG(dbgs() << "false (struct-ret)\n");

    return false;

  }


  // Externally-defined functions with weak linkage should not be

  // tail-called on ARM when the OS does not support dynamic

  // pre-emption of symbols, as the AAELF spec requires normal calls

  // to undefined weak functions to be replaced with a NOP or jump to the

  // next instruction. The behaviour of branch instructions in this

  // situation (as used for tail calls) is implementation-defined, so we

  // cannot rely on the linker replacing the tail call with a return.

  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {

    const GlobalValue *GV = G->getGlobal();

    const Triple &TT = getTargetMachine().getTargetTriple();

    if (GV->hasExternalWeakLinkage() &&

        (!TT.isOSWindows() || TT.isOSBinFormatELF() ||

         TT.isOSBinFormatMachO())) {

      LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");

      return false;

    }

  }


  // Check that the call results are passed in the same way.

  LLVMContext &C = *DAG.getContext();

  if (!CCState::resultsCompatible(

          getEffectiveCallingConv(CalleeCC, isVarArg),

          getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,

          CCAssignFnForReturn(CalleeCC, isVarArg),

          CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {

    LLVM_DEBUG(dbgs() << "false (incompatible results)\n");

    return false;

  }

  // The callee has to preserve all registers the caller needs to preserve.

  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();

  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);

  if (CalleeCC != CallerCC) {

    const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);

    if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {

      LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");

      return false;

    }

  }


  // If Caller's vararg argument has been split between registers and stack, do

  // not perform tail call, since part of the argument is in caller's local

  // frame.

  const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();

  if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {

    LLVM_DEBUG(dbgs() << "false (arg reg save area)\n");

    return false;

  }


  // If the callee takes no arguments then go on to check the results of the

  // call.

  const MachineRegisterInfo &MRI = MF.getRegInfo();

  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {

    LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");

    return false;

  }


  // If the stack arguments for this call do not fit into our own save area then

  // the call cannot be made tail.

  if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())

    return false;


  LLVM_DEBUG(dbgs() << "true\n");

  return true;

}


bool

ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,

                                  MachineFunction &MF, bool isVarArg,

                                  const SmallVectorImpl<ISD::OutputArg> &Outs,

                                  LLVMContext &Context, const Type *RetTy) const {

  SmallVector<CCValAssign, 16> RVLocs;

  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);

  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));

}


static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,

                                    const SDLoc &DL, SelectionDAG &DAG) {

  const MachineFunction &MF = DAG.getMachineFunction();

  const Function &F = MF.getFunction();


  StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();


  // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset

  // version of the "preferred return address". These offsets affect the return

  // instruction if this is a return from PL1 without hypervisor extensions.

  //    IRQ/FIQ: +4     "subs pc, lr, #4"

  //    SWI:     0      "subs pc, lr, #0"

  //    ABORT:   +4     "subs pc, lr, #4"

  //    UNDEF:   +4/+2  "subs pc, lr, #0"

  // UNDEF varies depending on where the exception came from ARM or Thumb

  // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.


  int64_t LROffset;

  if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||

      IntKind == "ABORT")

    LROffset = 4;

  else if (IntKind == "SWI" || IntKind == "UNDEF")

    LROffset = 0;

  else

    report_fatal_error("Unsupported interrupt attribute. If present, value "

                       "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");


  RetOps.insert(RetOps.begin() + 1,

                DAG.getConstant(LROffset, DL, MVT::i32, false));


  return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);

}


SDValue

ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,

                               bool isVarArg,

                               const SmallVectorImpl<ISD::OutputArg> &Outs,

                               const SmallVectorImpl<SDValue> &OutVals,

                               const SDLoc &dl, SelectionDAG &DAG) const {

  // CCValAssign - represent the assignment of the return value to a location.

  SmallVector<CCValAssign, 16> RVLocs;


  // CCState - Info about the registers and stack slots.

  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,

                 *DAG.getContext());


  // Analyze outgoing return values.

  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));


  SDValue Glue;

  SmallVector<SDValue, 4> RetOps;

  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)

  bool isLittleEndian = Subtarget->isLittle();


  MachineFunction &MF = DAG.getMachineFunction();

  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

  AFI->setReturnRegsCount(RVLocs.size());


 // Report error if cmse entry function returns structure through first ptr arg.

  if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {

    // Note: using an empty SDLoc(), as the first line of the function is a

    // better place to report than the last line.

    DAG.getContext()->diagnose(DiagnosticInfoUnsupported(

        DAG.getMachineFunction().getFunction(),

        "secure entry function would return value through pointer",

        SDLoc().getDebugLoc()));

  }


  // Copy the result values into the output registers.

  for (unsigned i = 0, realRVLocIdx = 0;

       i != RVLocs.size();

       ++i, ++realRVLocIdx) {

    CCValAssign &VA = RVLocs[i];

    assert(VA.isRegLoc() && "Can only return in registers!");


    SDValue Arg = OutVals[realRVLocIdx];

    bool ReturnF16 = false;


    if (Subtarget->hasFullFP16() && getTM().isTargetHardFloat()) {

      // Half-precision return values can be returned like this:

      //

      // t11 f16 = fadd ...

      // t12: i16 = bitcast t11

      //   t13: i32 = zero_extend t12

      // t14: f32 = bitcast t13  <~~~~~~~ Arg

      //

      // to avoid code generation for bitcasts, we simply set Arg to the node

      // that produces the f16 value, t11 in this case.

      //

      if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {

        SDValue ZE = Arg.getOperand(0);

        if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {

          SDValue BC = ZE.getOperand(0);

          if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {

            Arg = BC.getOperand(0);

            ReturnF16 = true;

          }

        }

      }

    }


    switch (VA.getLocInfo()) {

    default: llvm_unreachable("Unknown loc info!");

    case CCValAssign::Full: break;

    case CCValAssign::BCvt:

      if (!ReturnF16)

        Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);

      break;

    }


    // Mask f16 arguments if this is a CMSE nonsecure entry.

    auto RetVT = Outs[realRVLocIdx].ArgVT;

    if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {

      if (VA.needsCustom() && VA.getValVT() == MVT::f16) {

        Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);

      } else {

        auto LocBits = VA.getLocVT().getSizeInBits();

        auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());

        SDValue Mask =

            DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));

        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);

        Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);

        Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);

      }

    }


    if (VA.needsCustom() &&

        (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {

      if (VA.getLocVT() == MVT::v2f64) {

        // Extract the first half and return it in two registers.

        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,

                                   DAG.getConstant(0, dl, MVT::i32));

        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,

                                       DAG.getVTList(MVT::i32, MVT::i32), Half);


        Chain =

            DAG.getCopyToReg(Chain, dl, VA.getLocReg(),

                             HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);

        Glue = Chain.getValue(1);

        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

        VA = RVLocs[++i]; // skip ahead to next loc

        Chain =

            DAG.getCopyToReg(Chain, dl, VA.getLocReg(),

                             HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);

        Glue = Chain.getValue(1);

        RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

        VA = RVLocs[++i]; // skip ahead to next loc


        // Extract the 2nd half and fall through to handle it as an f64 value.

        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,

                          DAG.getConstant(1, dl, MVT::i32));

      }

      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is

      // available.

      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,

                                  DAG.getVTList(MVT::i32, MVT::i32), Arg);

      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),

                               fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);

      Glue = Chain.getValue(1);

      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));

      VA = RVLocs[++i]; // skip ahead to next loc

      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),

                               fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);

    } else

      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);


    // Guarantee that all emitted copies are

    // stuck together, avoiding something bad.

    Glue = Chain.getValue(1);

    RetOps.push_back(DAG.getRegister(

        VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));

  }

  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();

  const MCPhysReg *I =

      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());

  if (I) {

    for (; *I; ++I) {

      if (ARM::GPRRegClass.contains(*I))

        RetOps.push_back(DAG.getRegister(*I, MVT::i32));

      else if (ARM::DPRRegClass.contains(*I))

        RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));

      else

        llvm_unreachable("Unexpected register class in CSRsViaCopy!");

    }

  }


  // Update chain and glue.

  RetOps[0] = Chain;

  if (Glue.getNode())

    RetOps.push_back(Glue);


  // CPUs which aren't M-class use a special sequence to return from

  // exceptions (roughly, any instruction setting pc and cpsr simultaneously,

  // though we use "subs pc, lr, #N").

  //

  // M-class CPUs actually use a normal return sequence with a special

  // (hardware-provided) value in LR, so the normal code path works.

  if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&

      !Subtarget->isMClass()) {

    if (Subtarget->isThumb1Only())

      report_fatal_error("interrupt attribute is not supported in Thumb1");

    return LowerInterruptReturn(RetOps, dl, DAG);

  }


  unsigned RetNode =

      AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : ARMISD::RET_GLUE;

  return DAG.getNode(RetNode, dl, MVT::Other, RetOps);

}


bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {

  if (N->getNumValues() != 1)

    return false;

  if (!N->hasNUsesOfValue(1, 0))

    return false;


  SDValue TCChain = Chain;

  SDNode *Copy = *N->user_begin();

  if (Copy->getOpcode() == ISD::CopyToReg) {

    // If the copy has a glue operand, we conservatively assume it isn't safe to

    // perform a tail call.

    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)

      return false;

    TCChain = Copy->getOperand(0);

  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {

    SDNode *VMov = Copy;

    // f64 returned in a pair of GPRs.

    SmallPtrSet<SDNode*, 2> Copies;

    for (SDNode *U : VMov->users()) {

      if (U->getOpcode() != ISD::CopyToReg)

        return false;

      Copies.insert(U);

    }

    if (Copies.size() > 2)

      return false;


    for (SDNode *U : VMov->users()) {

      SDValue UseChain = U->getOperand(0);

      if (Copies.count(UseChain.getNode()))

        // Second CopyToReg

        Copy = U;

      else {

        // We are at the top of this chain.

        // If the copy has a glue operand, we conservatively assume it

        // isn't safe to perform a tail call.

        if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)

          return false;

        // First CopyToReg

        TCChain = UseChain;

      }

    }

  } else if (Copy->getOpcode() == ISD::BITCAST) {

    // f32 returned in a single GPR.

    if (!Copy->hasOneUse())

      return false;

    Copy = *Copy->user_begin();

    if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))

      return false;

    // If the copy has a glue operand, we conservatively assume it isn't safe to

    // perform a tail call.

    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)

      return false;

    TCChain = Copy->getOperand(0);

  } else {

    return false;

  }


  bool HasRet = false;

  for (const SDNode *U : Copy->users()) {

    if (U->getOpcode() != ARMISD::RET_GLUE &&

        U->getOpcode() != ARMISD::INTRET_GLUE)

      return false;

    HasRet = true;

  }


  if (!HasRet)

    return false;


  Chain = TCChain;

  return true;

}


bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {

  if (!Subtarget->supportsTailCall())

    return false;


  if (!CI->isTailCall())

    return false;


  return true;

}


// Trying to write a 64 bit value so need to split into two 32 bit values first,

// and pass the lower and high parts through.


static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {

  SDLoc DL(Op);

  SDValue WriteValue = Op->getOperand(2);


  // This function is only supposed to be called for i64 type argument.

  assert(WriteValue.getValueType() == MVT::i64

          && "LowerWRITE_REGISTER called for non-i64 type argument.");


  SDValue Lo, Hi;

  std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);

  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };

  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);

}


// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as

// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is

// one of the above mentioned nodes. It has to be wrapped because otherwise

// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only

// be used to form addressing mode. These wrapped nodes will be selected

// into MOVi.

SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,

                                             SelectionDAG &DAG) const {

  EVT PtrVT = Op.getValueType();

  // FIXME there is no actual debug info here

  SDLoc dl(Op);

  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

  SDValue Res;


  // When generating execute-only code Constant Pools must be promoted to the

  // global data section. It's a bit ugly that we can't share them across basic

  // blocks, but this way we guarantee that execute-only behaves correct with

  // position-independent addressing modes.

  if (Subtarget->genExecuteOnly()) {

    auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();

    auto *T = CP->getType();

    auto C = const_cast<Constant*>(CP->getConstVal());

    auto M = DAG.getMachineFunction().getFunction().getParent();

    auto GV = new GlobalVariable(

                    *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,

                    Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +

                    Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +

                    Twine(AFI->createPICLabelUId())

                  );

    SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),

                                            dl, PtrVT);

    return LowerGlobalAddress(GA, DAG);

  }


  // The 16-bit ADR instruction can only encode offsets that are multiples of 4,

  // so we need to align to at least 4 bytes when we don't have 32-bit ADR.

  Align CPAlign = CP->getAlign();

  if (Subtarget->isThumb1Only())

    CPAlign = std::max(CPAlign, Align(4));

  if (CP->isMachineConstantPoolEntry())

    Res =

        DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);

  else

    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);

  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);

}


unsigned ARMTargetLowering::getJumpTableEncoding() const {

  // If we don't have a 32-bit pc-relative branch instruction then the jump

  // table consists of block addresses. Usually this is inline, but for

  // execute-only it must be placed out-of-line.

  if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())

    return MachineJumpTableInfo::EK_BlockAddress;

  return MachineJumpTableInfo::EK_Inline;

}


SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,

                                             SelectionDAG &DAG) const {

  MachineFunction &MF = DAG.getMachineFunction();

  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

  unsigned ARMPCLabelIndex = 0;

  SDLoc DL(Op);

  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();

  SDValue CPAddr;

  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();

  if (!IsPositionIndependent) {

    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));

  } else {

    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;

    ARMPCLabelIndex = AFI->createPICLabelUId();

    ARMConstantPoolValue *CPV =

      ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,

                                      ARMCP::CPBlockAddress, PCAdj);

    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));

  }

  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);

  SDValue Result = DAG.getLoad(

      PtrVT, DL, DAG.getEntryNode(), CPAddr,

      MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

  if (!IsPositionIndependent)

    return Result;

  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);

  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);

}


/// Convert a TLS address reference into the correct sequence of loads

/// and calls to compute the variable's address for Darwin, and return an

/// SDValue containing the final node.


/// Darwin only has one TLS scheme which must be capable of dealing with the

/// fully general situation, in the worst case. This means:

///     + "extern __thread" declaration.

///     + Defined in a possibly unknown dynamic library.

///

/// The general system is that each __thread variable has a [3 x i32] descriptor

/// which contains information used by the runtime to calculate the address. The

/// only part of this the compiler needs to know about is the first word, which

/// contains a function pointer that must be called with the address of the

/// entire descriptor in "r0".

///

/// Since this descriptor may be in a different unit, in general access must

/// proceed along the usual ARM rules. A common sequence to produce is:

///

///     movw rT1, :lower16:_var$non_lazy_ptr

///     movt rT1, :upper16:_var$non_lazy_ptr

///     ldr r0, [rT1]

///     ldr rT2, [r0]

///     blx rT2

///     [...address now in r0...]

SDValue

ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,

                                               SelectionDAG &DAG) const {

  assert(Subtarget->isTargetDarwin() &&

         "This function expects a Darwin target");

  SDLoc DL(Op);


  // First step is to get the address of the actua global symbol. This is where

  // the TLS descriptor lives.

  SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);


  // The first entry in the descriptor is a function pointer that we must call

  // to obtain the address of the variable.

  SDValue Chain = DAG.getEntryNode();

  SDValue FuncTLVGet = DAG.getLoad(

      MVT::i32, DL, Chain, DescAddr,

      MachinePointerInfo::getGOT(DAG.getMachineFunction()), Align(4),

      MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |

          MachineMemOperand::MOInvariant);

  Chain = FuncTLVGet.getValue(1);


  MachineFunction &F = DAG.getMachineFunction();

  MachineFrameInfo &MFI = F.getFrameInfo();

  MFI.setAdjustsStack(true);


  // TLS calls preserve all registers except those that absolutely must be

  // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be

  // silly).

  auto TRI =

      getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();

  auto ARI = static_cast<const ARMRegisterInfo *>(TRI);

  const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());


  // Finally, we can make the call. This is just a degenerate version of a

  // normal AArch64 call node: r0 takes the address of the descriptor, and

  // returns the address of the variable in this thread.

  Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());

  Chain =

      DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),

                  Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),

                  DAG.getRegisterMask(Mask), Chain.getValue(1));

  return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));

}


SDValue

ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,

                                                SelectionDAG &DAG) const {

  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");


  SDValue Chain = DAG.getEntryNode();

  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  SDLoc DL(Op);


  // Load the current TEB (thread environment block)

  SDValue Ops[] = {Chain,

                   DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),

                   DAG.getTargetConstant(15, DL, MVT::i32),

                   DAG.getTargetConstant(0, DL, MVT::i32),

                   DAG.getTargetConstant(13, DL, MVT::i32),

                   DAG.getTargetConstant(0, DL, MVT::i32),

                   DAG.getTargetConstant(2, DL, MVT::i32)};

  SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,

                                   DAG.getVTList(MVT::i32, MVT::Other), Ops);


  SDValue TEB = CurrentTEB.getValue(0);

  Chain = CurrentTEB.getValue(1);


  // Load the ThreadLocalStoragePointer from the TEB

  // A pointer to the TLS array is located at offset 0x2c from the TEB.

  SDValue TLSArray =

      DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));

  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());


  // The pointer to the thread's TLS data area is at the TLS Index scaled by 4

  // offset into the TLSArray.


  // Load the TLS index from the C runtime

  SDValue TLSIndex =

      DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);

  TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);

  TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());


  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,

                              DAG.getConstant(2, DL, MVT::i32));

  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,

                            DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),

                            MachinePointerInfo());


  // Get the offset of the start of the .tls section (section base)

  const auto *GA = cast<GlobalAddressSDNode>(Op);

  auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);

  SDValue Offset = DAG.getLoad(

      PtrVT, DL, Chain,

      DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,

                  DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),

      MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));


  return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);

}


// Lower ISD::GlobalTLSAddress using the "general dynamic" model

SDValue

ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,

                                                 SelectionDAG &DAG) const {

  SDLoc dl(GA);

  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;

  MachineFunction &MF = DAG.getMachineFunction();

  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

  ARMConstantPoolValue *CPV =

    ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,

                                    ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);

  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));

  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);

  Argument = DAG.getLoad(

      PtrVT, dl, DAG.getEntryNode(), Argument,

      MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

  SDValue Chain = Argument.getValue(1);


  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);

  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);


  // call __tls_get_addr.

  ArgListTy Args;

  Args.emplace_back(Argument, Type::getInt32Ty(*DAG.getContext()));


  // FIXME: is there useful debug info available here?

  TargetLowering::CallLoweringInfo CLI(DAG);

  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(

      CallingConv::C, Type::getInt32Ty(*DAG.getContext()),

      DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));


  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

  return CallResult.first;

}


// Lower ISD::GlobalTLSAddress using the "initial exec" or

// "local exec" model.

SDValue

ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,

                                        SelectionDAG &DAG,

                                        TLSModel::Model model) const {

  const GlobalValue *GV = GA->getGlobal();

  SDLoc dl(GA);

  SDValue Offset;

  SDValue Chain = DAG.getEntryNode();

  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  // Get the Thread Pointer

  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);


  if (model == TLSModel::InitialExec) {

    MachineFunction &MF = DAG.getMachineFunction();

    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

    // Initial exec model.

    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;

    ARMConstantPoolValue *CPV =

      ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,

                                      ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,

                                      true);

    Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));

    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);

    Offset = DAG.getLoad(

        PtrVT, dl, Chain, Offset,

        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

    Chain = Offset.getValue(1);


    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);

    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);


    Offset = DAG.getLoad(

        PtrVT, dl, Chain, Offset,

        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

  } else {

    // local exec model

    assert(model == TLSModel::LocalExec);

    ARMConstantPoolValue *CPV =

      ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);

    Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));

    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);

    Offset = DAG.getLoad(

        PtrVT, dl, Chain, Offset,

        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

  }


  // The address of the thread local variable is the add of the thread

  // pointer with the offset of the variable.

  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);

}


SDValue

ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {

  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

  if (DAG.getTarget().useEmulatedTLS())

    return LowerToTLSEmulatedModel(GA, DAG);


  if (Subtarget->isTargetDarwin())

    return LowerGlobalTLSAddressDarwin(Op, DAG);


  if (Subtarget->isTargetWindows())

    return LowerGlobalTLSAddressWindows(Op, DAG);


  // TODO: implement the "local dynamic" model

  assert(Subtarget->isTargetELF() && "Only ELF implemented here");

  TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());


  switch (model) {

    case TLSModel::GeneralDynamic:

    case TLSModel::LocalDynamic:

      return LowerToTLSGeneralDynamicModel(GA, DAG);

    case TLSModel::InitialExec:

    case TLSModel::LocalExec:

      return LowerToTLSExecModels(GA, DAG, model);

  }

  llvm_unreachable("bogus TLS model");

}


/// Return true if all users of V are within function F, looking through

/// ConstantExprs.


static bool allUsersAreInFunction(const Value *V, const Function *F) {

  SmallVector<const User*,4> Worklist(V->users());

  while (!Worklist.empty()) {

    auto *U = Worklist.pop_back_val();

    if (isa<ConstantExpr>(U)) {

      append_range(Worklist, U->users());

      continue;

    }


    auto *I = dyn_cast<Instruction>(U);

    if (!I || I->getParent()->getParent() != F)

      return false;

  }

  return true;

}


static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,

                                     const GlobalValue *GV, SelectionDAG &DAG,

                                     EVT PtrVT, const SDLoc &dl) {

  // If we're creating a pool entry for a constant global with unnamed address,

  // and the global is small enough, we can emit it inline into the constant pool

  // to save ourselves an indirection.

  //

  // This is a win if the constant is only used in one function (so it doesn't

  // need to be duplicated) or duplicating the constant wouldn't increase code

  // size (implying the constant is no larger than 4 bytes).

  const Function &F = DAG.getMachineFunction().getFunction();


  // We rely on this decision to inline being idemopotent and unrelated to the

  // use-site. We know that if we inline a variable at one use site, we'll

  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel

  // doesn't know about this optimization, so bail out if it's enabled else

  // we could decide to inline here (and thus never emit the GV) but require

  // the GV from fast-isel generated code.

  if (!EnableConstpoolPromotion ||

      DAG.getMachineFunction().getTarget().Options.EnableFastISel)

      return SDValue();


  auto *GVar = dyn_cast<GlobalVariable>(GV);

  if (!GVar || !GVar->hasInitializer() ||

      !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||

      !GVar->hasLocalLinkage())

    return SDValue();


  // If we inline a value that contains relocations, we move the relocations

  // from .data to .text. This is not allowed in position-independent code.

  auto *Init = GVar->getInitializer();

  if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&

      Init->needsDynamicRelocation())

    return SDValue();


  // The constant islands pass can only really deal with alignment requests

  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote

  // any type wanting greater alignment requirements than 4 bytes. We also

  // can only promote constants that are multiples of 4 bytes in size or

  // are paddable to a multiple of 4. Currently we only try and pad constants

  // that are strings for simplicity.

  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);

  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());

  Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);

  unsigned RequiredPadding = 4 - (Size % 4);

  bool PaddingPossible =

    RequiredPadding == 4 || (CDAInit && CDAInit->isString());

  if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||

      Size == 0)

    return SDValue();


  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);

  MachineFunction &MF = DAG.getMachineFunction();

  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();


  // We can't bloat the constant pool too much, else the ConstantIslands pass

  // may fail to converge. If we haven't promoted this global yet (it may have

  // multiple uses), and promoting it would increase the constant pool size (Sz

  // > 4), ensure we have space to do so up to MaxTotal.

  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)

    if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=

        ConstpoolPromotionMaxTotal)

      return SDValue();


  // This is only valid if all users are in a single function; we can't clone

  // the constant in general. The LLVM IR unnamed_addr allows merging

  // constants, but not cloning them.

  //

  // We could potentially allow cloning if we could prove all uses of the

  // constant in the current function don't care about the address, like

  // printf format strings. But that isn't implemented for now.

  if (!allUsersAreInFunction(GVar, &F))

    return SDValue();


  // We're going to inline this global. Pad it out if needed.

  if (RequiredPadding != 4) {

    StringRef S = CDAInit->getAsString();


    SmallVector<uint8_t,16> V(S.size());

    std::copy(S.bytes_begin(), S.bytes_end(), V.begin());

    while (RequiredPadding--)

      V.push_back(0);

    Init = ConstantDataArray::get(*DAG.getContext(), V);

  }


  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);

  SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));

  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {

    AFI->markGlobalAsPromotedToConstantPool(GVar);

    AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +

                                      PaddedSize - 4);

  }

  ++NumConstpoolPromoted;

  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

}


bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {

  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))

    if (!(GV = GA->getAliaseeObject()))

      return false;

  if (const auto *V = dyn_cast<GlobalVariable>(GV))

    return V->isConstant();

  return isa<Function>(GV);

}


SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,

                                              SelectionDAG &DAG) const {

  switch (Subtarget->getTargetTriple().getObjectFormat()) {

  default: llvm_unreachable("unknown object format");

  case Triple::COFF:

    return LowerGlobalAddressWindows(Op, DAG);

  case Triple::ELF:

    return LowerGlobalAddressELF(Op, DAG);

  case Triple::MachO:

    return LowerGlobalAddressDarwin(Op, DAG);

  }

}


SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,

                                                 SelectionDAG &DAG) const {

  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  SDLoc dl(Op);

  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

  bool IsRO = isReadOnly(GV);


  // promoteToConstantPool only if not generating XO text section

  if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())

    if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))

      return V;


  if (isPositionIndependent()) {

    SDValue G = DAG.getTargetGlobalAddress(

        GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);

    SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);

    if (!GV->isDSOLocal())

      Result =

          DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,

                      MachinePointerInfo::getGOT(DAG.getMachineFunction()));

    return Result;

  } else if (Subtarget->isROPI() && IsRO) {

    // PC-relative.

    SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);

    SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);

    return Result;

  } else if (Subtarget->isRWPI() && !IsRO) {

    // SB-relative.

    SDValue RelAddr;

    if (Subtarget->useMovt()) {

      ++NumMovwMovt;

      SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);

      RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);

    } else { // use literal pool for address constant

      ARMConstantPoolValue *CPV =

        ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);

      SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));

      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

      RelAddr = DAG.getLoad(

          PtrVT, dl, DAG.getEntryNode(), CPAddr,

          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

    }

    SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);

    SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);

    return Result;

  }


  // If we have T2 ops, we can materialize the address directly via movt/movw

  // pair. This is always cheaper. If need to generate Execute Only code, and we

  // only have Thumb1 available, we can't use a constant pool and are forced to

  // use immediate relocations.

  if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {

    if (Subtarget->useMovt())

      ++NumMovwMovt;

    // FIXME: Once remat is capable of dealing with instructions with register

    // operands, expand this into two nodes.

    return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,

                       DAG.getTargetGlobalAddress(GV, dl, PtrVT));

  } else {

    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));

    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

    return DAG.getLoad(

        PtrVT, dl, DAG.getEntryNode(), CPAddr,

        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));

  }

}


SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,

                                                    SelectionDAG &DAG) const {

  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&

         "ROPI/RWPI not currently supported for Darwin");

  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  SDLoc dl(Op);

  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();


  if (Subtarget->useMovt())

    ++NumMovwMovt;


  // FIXME: Once remat is capable of dealing with instructions with register

  // operands, expand this into multiple nodes

  unsigned Wrapper =

      isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;


  SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);

  SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);


  if (Subtarget->isGVIndirectSymbol(GV))

    Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,

                         MachinePointerInfo::getGOT(DAG.getMachineFunction()));

  return Result;

}


SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,

                                                     SelectionDAG &DAG) const {

  assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");

  assert(Subtarget->useMovt() &&

         "Windows on ARM expects to use movw/movt");

  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&

         "ROPI/RWPI not currently supported for Windows");


  const TargetMachine &TM = getTargetMachine();

  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

  ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;

  if (GV->hasDLLImportStorageClass())

    TargetFlags = ARMII::MO_DLLIMPORT;

  else if (!TM.shouldAssumeDSOLocal(GV))

    TargetFlags = ARMII::MO_COFFSTUB;

  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  SDValue Result;

  SDLoc DL(Op);


  ++NumMovwMovt;


  // FIXME: Once remat is capable of dealing with instructions with register

  // operands, expand this into two nodes.

  Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,

                       DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,

                                                  TargetFlags));

  if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))

    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,

                         MachinePointerInfo::getGOT(DAG.getMachineFunction()));

  return Result;

}


SDValue

ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {

  SDLoc dl(Op);

  SDValue Val = DAG.getConstant(0, dl, MVT::i32);

  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,

                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),

                     Op.getOperand(1), Val);

}


SDValue

ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {

  SDLoc dl(Op);

  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),

                     Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));

}


SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,

                                                      SelectionDAG &DAG) const {

  SDLoc dl(Op);

  return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,

                     Op.getOperand(0));

}


SDValue ARMTargetLowering::LowerINTRINSIC_VOID(

    SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {

  unsigned IntNo =

      Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);

  switch (IntNo) {

    default:

      return SDValue();  // Don't custom lower most intrinsics.

    case Intrinsic::arm_gnu_eabi_mcount: {

      MachineFunction &MF = DAG.getMachineFunction();

      EVT PtrVT = getPointerTy(DAG.getDataLayout());

      SDLoc dl(Op);

      SDValue Chain = Op.getOperand(0);

      // call "\01__gnu_mcount_nc"

      const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();

      const uint32_t *Mask =

          ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);

      assert(Mask && "Missing call preserved mask for calling convention");

      // Mark LR an implicit live-in.

      Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));

      SDValue ReturnAddress =

          DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);

      constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};

      SDValue Callee =

          DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);

      SDValue RegisterMask = DAG.getRegisterMask(Mask);

      if (Subtarget->isThumb())

        return SDValue(

            DAG.getMachineNode(

                ARM::tBL_PUSHLR, dl, ResultTys,

                {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),

                 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),

            0);

      return SDValue(

          DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,

                             {ReturnAddress, Callee, RegisterMask, Chain}),

          0);

    }

  }

}


SDValue

ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,

                                          const ARMSubtarget *Subtarget) const {

  unsigned IntNo = Op.getConstantOperandVal(0);

  SDLoc dl(Op);

  switch (IntNo) {

  default: return SDValue();    // Don't custom lower most intrinsics.

  case Intrinsic::thread_pointer: {

    EVT PtrVT = getPointerTy(DAG.getDataLayout());

    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);

  }

  case Intrinsic::arm_cls: {

    const SDValue &Operand = Op.getOperand(1);

    const EVT VTy = Op.getValueType();

    SDValue SRA =

        DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));

    SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);

    SDValue SHL =

        DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));

    SDValue OR =

        DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));

    SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);

    return Result;

  }

  case Intrinsic::arm_cls64: {

    // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))

    //          else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))

    const SDValue &Operand = Op.getOperand(1);

    const EVT VTy = Op.getValueType();

    SDValue Lo, Hi;

    std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);

    SDValue Constant0 = DAG.getConstant(0, dl, VTy);

    SDValue Constant1 = DAG.getConstant(1, dl, VTy);

    SDValue Constant31 = DAG.getConstant(31, dl, VTy);

    SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);

    SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);

    SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);

    SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);

    SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);

    SDValue CheckLo =

        DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);

    SDValue HiIsZero =

        DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);

    SDValue AdjustedLo =

        DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));

    SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);

    SDValue Result =

        DAG.getSelect(dl, VTy, CheckLo,

                      DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);

    return Result;

  }

  case Intrinsic::eh_sjlj_lsda: {

    MachineFunction &MF = DAG.getMachineFunction();

    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();

    EVT PtrVT = getPointerTy(DAG.getDataLayout());

    SDValue CPAddr;

    bool IsPositionIndependent = isPositionIndependent();

    unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;

    ARMConstantPoolValue *CPV =

      ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,

                                      ARMCP::CPLSDA, PCAdj);

    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));

    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

    SDValue Result = DAG.getLoad(

        PtrVT, dl, DAG.getEntryNode(), CPAddr,

        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));


    if (IsPositionIndependent) {

      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);

      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);

    }

    return Result;

  }

  case Intrinsic::arm_neon_vabs:

    return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),

                       Op.getOperand(1));

  case Intrinsic::arm_neon_vabds:

    if (Op.getValueType().isInteger())

      return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),

                         Op.getOperand(1), Op.getOperand(2));

    return SDValue();

  case Intrinsic::arm_neon_vabdu:

    return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),

                       Op.getOperand(1), Op.getOperand(2));

  case Intrinsic::arm_neon_vmulls:

  case Intrinsic::arm_neon_vmullu: {

    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)

      ? ARMISD::VMULLs : ARMISD::VMULLu;

    return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),

                       Op.getOperand(1), Op.getOperand(2));

  }

  case Intrinsic::arm_neon_vminnm:

  case Intrinsic::arm_neon_vmaxnm: {

    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)

      ? ISD::FMINNUM : ISD::FMAXNUM;

    return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),

                       Op.getOperand(1), Op.getOperand(2));

  }

  case Intrinsic::arm_neon_vminu:

  case Intrinsic::arm_neon_vmaxu: {

    if (Op.getValueType().isFloatingPoint())

      return SDValue();

    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)

      ? ISD::UMIN : ISD::UMAX;

    return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),

                         Op.getOperand(1), Op.getOperand(2));

  }

  case Intrinsic::arm_neon_vmins:

  case Intrinsic::arm_neon_vmaxs: {

    // v{min,max}s is overloaded between signed integers and floats.

    if (!Op.getValueType().isFloatingPoint()) {

      unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)

        ? ISD::SMIN : ISD::SMAX;

      return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),

                         Op.getOperand(1), Op.getOperand(2));

    }

    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)

      ? ISD::FMINIMUM : ISD::FMAXIMUM;

    return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),

                       Op.getOperand(1), Op.getOperand(2));

  }

  case Intrinsic::arm_neon_vtbl1:

    return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),

                       Op.getOperand(1), Op.getOperand(2));

  case Intrinsic::arm_neon_vtbl2:

    return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),

                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));

  case Intrinsic::arm_mve_pred_i2v:

  case Intrinsic::arm_mve_pred_v2i:

    return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),

                       Op.getOperand(1));

  case Intrinsic::arm_mve_vreinterpretq:

    return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),

                       Op.getOperand(1));

  case Intrinsic::arm_mve_lsll:

    return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),

                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));

  case Intrinsic::arm_mve_asrl:

    return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),

                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));

  }

}


static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,

                                 const ARMSubtarget *Subtarget) {

  SDLoc dl(Op);

  auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));

  if (SSID == SyncScope::SingleThread)

    return Op;


  if (!Subtarget->hasDataBarrier()) {

    // Some ARMv6 cpus can support data barriers with an mcr instruction.

    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get

    // here.

    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&

           "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");

    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),

                       DAG.getConstant(0, dl, MVT::i32));

  }


  AtomicOrdering Ord =

      static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));

  ARM_MB::MemBOpt Domain = ARM_MB::ISH;

  if (Subtarget->isMClass()) {

    // Only a full system barrier exists in the M-class architectures.

    Domain = ARM_MB::SY;

  } else if (Subtarget->preferISHSTBarriers() &&

             Ord == AtomicOrdering::Release) {

    // Swift happens to implement ISHST barriers in a way that's compatible with

    // Release semantics but weaker than ISH so we'd be fools not to use

    // it. Beware: other processors probably don't!

    Domain = ARM_MB::ISHST;

  }


  return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),

                     DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),

                     DAG.getConstant(Domain, dl, MVT::i32));

}


static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,

                             const ARMSubtarget *Subtarget) {

  // ARM pre v5TE and Thumb1 does not have preload instructions.

  if (!(Subtarget->isThumb2() ||

        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))

    // Just preserve the chain.

    return Op.getOperand(0);


  SDLoc dl(Op);

  unsigned isRead = ~Op.getConstantOperandVal(2) & 1;

  if (!isRead &&

      (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))

    // ARMv7 with MP extension has PLDW.

    return Op.getOperand(0);


  unsigned isData = Op.getConstantOperandVal(4);

  if (Subtarget->isThumb()) {

    // Invert the bits.

    isRead = ~isRead & 1;

    isData = ~isData & 1;

  }


  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),

                     Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),

                     DAG.getConstant(isData, dl, MVT::i32));

}


static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {

  MachineFunction &MF = DAG.getMachineFunction();

  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();


  // vastart just stores the address of the VarArgsFrameIndex slot into the

  // memory location argument.

  SDLoc dl(Op);

  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());

  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);

  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),

                      MachinePointerInfo(SV));

}


SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,

                                                CCValAssign &NextVA,

                                                SDValue &Root,

                                                SelectionDAG &DAG,

                                                const SDLoc &dl) const {

  MachineFunction &MF = DAG.getMachineFunction();

  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();


  const TargetRegisterClass *RC;

  if (AFI->isThumb1OnlyFunction())

    RC = &ARM::tGPRRegClass;

  else

    RC = &ARM::GPRRegClass;


  // Transform the arguments stored in physical registers into virtual ones.

  Register Reg = MF.addLiveIn(VA.getLocReg(), RC);

  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);


  SDValue ArgValue2;

  if (NextVA.isMemLoc()) {

    MachineFrameInfo &MFI = MF.getFrameInfo();

    int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);


    // Create load node to retrieve arguments from the stack.

    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));

    ArgValue2 = DAG.getLoad(

        MVT::i32, dl, Root, FIN,

        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));

  } else {

    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);

    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);

  }

  if (!Subtarget->isLittle())

    std::swap (ArgValue, ArgValue2);

  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);

}


// The remaining GPRs hold either the beginning of variable-argument

// data, or the beginning of an aggregate passed by value (usually

// byval).  Either way, we allocate stack slots adjacent to the data

// provided by our caller, and store the unallocated registers there.

// If this is a variadic function, the va_list pointer will begin with

// these values; otherwise, this reassembles a (byval) structure that

// was split between registers and memory.

// Return: The frame index registers were stored into.

int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,

                                      const SDLoc &dl, SDValue &Chain,

                                      const Value *OrigArg,

                                      unsigned InRegsParamRecordIdx,

                                      int ArgOffset, unsigned ArgSize) const {

  // Currently, two use-cases possible:

  // Case #1. Non-var-args function, and we meet first byval parameter.

  //          Setup first unallocated register as first byval register;

  //          eat all remained registers

  //          (these two actions are performed by HandleByVal method).

  //          Then, here, we initialize stack frame with

  //          "store-reg" instructions.

  // Case #2. Var-args function, that doesn't contain byval parameters.

  //          The same: eat all remained unallocated registers,

  //          initialize stack frame.


  MachineFunction &MF = DAG.getMachineFunction();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

  unsigned RBegin, REnd;

  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {

    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);

  } else {

    unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);

    RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];

    REnd = ARM::R4;

  }


  if (REnd != RBegin)

    ArgOffset = -4 * (ARM::R4 - RBegin);


  auto PtrVT = getPointerTy(DAG.getDataLayout());

  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);

  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);


  SmallVector<SDValue, 4> MemOps;

  const TargetRegisterClass *RC =

      AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;


  for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {

    Register VReg = MF.addLiveIn(Reg, RC);

    SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);

    SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,

                                 MachinePointerInfo(OrigArg, 4 * i));

    MemOps.push_back(Store);

    FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));

  }


  if (!MemOps.empty())

    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

  return FrameIndex;

}


// Setup stack frame, the va_list pointer will start from.

void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,

                                             const SDLoc &dl, SDValue &Chain,

                                             unsigned ArgOffset,

                                             unsigned TotalArgRegsSaveSize,

                                             bool ForceMutable) const {

  MachineFunction &MF = DAG.getMachineFunction();

  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();


  // Try to store any remaining integer argument regs

  // to their spots on the stack so that they may be loaded by dereferencing

  // the result of va_next.

  // If there is no regs to be stored, just point address after last

  // argument passed via stack.

  int FrameIndex = StoreByValRegs(

      CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),

      CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));

  AFI->setVarArgsFrameIndex(FrameIndex);

}


bool ARMTargetLowering::splitValueIntoRegisterParts(

    SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,

    unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {

  EVT ValueVT = Val.getValueType();

  if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {

    unsigned ValueBits = ValueVT.getSizeInBits();

    unsigned PartBits = PartVT.getSizeInBits();

    Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);

    Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);

    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);

    Parts[0] = Val;

    return true;

  }

  return false;

}


SDValue ARMTargetLowering::joinRegisterPartsIntoValue(

    SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,

    MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {

  if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {

    unsigned ValueBits = ValueVT.getSizeInBits();

    unsigned PartBits = PartVT.getSizeInBits();

    SDValue Val = Parts[0];


    Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);

    Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);

    Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

    return Val;

  }

  return SDValue();

}


SDValue ARMTargetLowering::LowerFormalArguments(

    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,

    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,

    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {

  MachineFunction &MF = DAG.getMachineFunction();

  MachineFrameInfo &MFI = MF.getFrameInfo();


  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();


  // Assign locations to all of the incoming arguments.

  SmallVector<CCValAssign, 16> ArgLocs;

  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,

                 *DAG.getContext());

  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));


  Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();

  unsigned CurArgIdx = 0;


  // Initially ArgRegsSaveSize is zero.

  // Then we increase this value each time we meet byval parameter.

  // We also increase this value in case of varargs function.

  AFI->setArgRegsSaveSize(0);


  // Calculate the amount of stack space that we need to allocate to store

  // byval and variadic arguments that are passed in registers.

  // We need to know this before we allocate the first byval or variadic

  // argument, as they will be allocated a stack slot below the CFA (Canonical

  // Frame Address, the stack pointer at entry to the function).

  unsigned ArgRegBegin = ARM::R4;

  for (const CCValAssign &VA : ArgLocs) {

    if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())

      break;


    unsigned Index = VA.getValNo();

    ISD::ArgFlagsTy Flags = Ins[Index].Flags;

    if (!Flags.isByVal())

      continue;


    assert(VA.isMemLoc() && "unexpected byval pointer in reg");

    unsigned RBegin, REnd;

    CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);

    ArgRegBegin = std::min(ArgRegBegin, RBegin);


    CCInfo.nextInRegsParam();

  }

  CCInfo.rewindByValRegsInfo();


  int lastInsIndex = -1;

  if (isVarArg && MFI.hasVAStart()) {

    unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);

    if (RegIdx != std::size(GPRArgRegs))

      ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);

  }


  unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);

  AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);

  auto PtrVT = getPointerTy(DAG.getDataLayout());


  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {

    CCValAssign &VA = ArgLocs[i];

    if (Ins[VA.getValNo()].isOrigArg()) {

      std::advance(CurOrigArg,

                   Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);

      CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();

    }

    // Arguments stored in registers.

    if (VA.isRegLoc()) {

      EVT RegVT = VA.getLocVT();

      SDValue ArgValue;


      if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {

        // f64 and vector types are split up into multiple registers or

        // combinations of registers and stack slots.

        SDValue ArgValue1 =

            GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);

        VA = ArgLocs[++i]; // skip ahead to next loc

        SDValue ArgValue2;

        if (VA.isMemLoc()) {

          int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);

          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

          ArgValue2 = DAG.getLoad(

              MVT::f64, dl, Chain, FIN,

              MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));

        } else {

          ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);

        }

        ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);

        ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,

                               ArgValue1, DAG.getIntPtrConstant(0, dl));

        ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,

                               ArgValue2, DAG.getIntPtrConstant(1, dl));

      } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {

        ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);

      } else {

        const TargetRegisterClass *RC;


        if (RegVT == MVT::f16 || RegVT == MVT::bf16)

          RC = &ARM::HPRRegClass;

        else if (RegVT == MVT::f32)

          RC = &ARM::SPRRegClass;

        else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||

                 RegVT == MVT::v4bf16)

          RC = &ARM::DPRRegClass;

        else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||

                 RegVT == MVT::v8bf16)

          RC = &ARM::QPRRegClass;

        else if (RegVT == MVT::i32)

          RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass

                                           : &ARM::GPRRegClass;

        else

          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");


        // Transform the arguments in physical registers into virtual ones.

        Register Reg = MF.addLiveIn(VA.getLocReg(), RC);

        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);


        // If this value is passed in r0 and has the returned attribute (e.g.

        // C++ 'structors), record this fact for later use.

        if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {

          AFI->setPreservesR0();

        }

      }


      // If this is an 8 or 16-bit value, it is really passed promoted

      // to 32 bits.  Insert an assert[sz]ext to capture this, then

      // truncate to the right size.

      switch (VA.getLocInfo()) {

      default: llvm_unreachable("Unknown loc info!");

      case CCValAssign::Full: break;

      case CCValAssign::BCvt:

        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);

        break;

      }


      // f16 arguments have their size extended to 4 bytes and passed as if they

      // had been copied to the LSBs of a 32-bit register.

      // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)

      if (VA.needsCustom() &&

          (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))

        ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);


      // On CMSE Entry Functions, formal integer arguments whose bitwidth is

      // less than 32 bits must be sign- or zero-extended in the callee for

      // security reasons. Although the ABI mandates an extension done by the

      // caller, the latter cannot be trusted to follow the rules of the ABI.

      const ISD::InputArg &Arg = Ins[VA.getValNo()];

      if (AFI->isCmseNSEntryFunction() && Arg.ArgVT.isScalarInteger() &&

          RegVT.isScalarInteger() && Arg.ArgVT.bitsLT(MVT::i32))

        ArgValue = handleCMSEValue(ArgValue, Arg, DAG, dl);


      InVals.push_back(ArgValue);

    } else { // VA.isRegLoc()

      // Only arguments passed on the stack should make it here.

      assert(VA.isMemLoc());

      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");


      int index = VA.getValNo();


      // Some Ins[] entries become multiple ArgLoc[] entries.

      // Process them only once.

      if (index != lastInsIndex)

        {

          ISD::ArgFlagsTy Flags = Ins[index].Flags;

          // FIXME: For now, all byval parameter objects are marked mutable.

          // This can be changed with more analysis.

          // In case of tail call optimization mark all arguments mutable.

          // Since they could be overwritten by lowering of arguments in case of

          // a tail call.

          if (Flags.isByVal()) {

            assert(Ins[index].isOrigArg() &&

                   "Byval arguments cannot be implicit");

            unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();


            int FrameIndex = StoreByValRegs(

                CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,

                VA.getLocMemOffset(), Flags.getByValSize());

            InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));

            CCInfo.nextInRegsParam();

          } else if (VA.needsCustom() && (VA.getValVT() == MVT::f16 ||

                                          VA.getValVT() == MVT::bf16)) {

            // f16 and bf16 values are passed in the least-significant half of

            // a 4 byte stack slot. This is done as-if the extension was done

            // in a 32-bit register, so the actual bytes used for the value

            // differ between little and big endian.

            assert(VA.getLocVT().getSizeInBits() == 32);

            unsigned FIOffset = VA.getLocMemOffset();

            int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits() / 8,

                                           FIOffset, true);


            SDValue Addr = DAG.getFrameIndex(FI, PtrVT);

            if (DAG.getDataLayout().isBigEndian())

              Addr = DAG.getObjectPtrOffset(dl, Addr, TypeSize::getFixed(2));


            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, Addr,

                                         MachinePointerInfo::getFixedStack(

                                             DAG.getMachineFunction(), FI)));


          } else {

            unsigned FIOffset = VA.getLocMemOffset();

            int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,

                                           FIOffset, true);


            // Create load nodes to retrieve arguments from the stack.

            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,

                                         MachinePointerInfo::getFixedStack(

                                             DAG.getMachineFunction(), FI)));

          }

          lastInsIndex = index;

        }

    }

  }


  // varargs

  if (isVarArg && MFI.hasVAStart()) {

    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),

                         TotalArgRegsSaveSize);

    if (AFI->isCmseNSEntryFunction()) {

      DAG.getContext()->diagnose(DiagnosticInfoUnsupported(

          DAG.getMachineFunction().getFunction(),

          "secure entry function must not be variadic", dl.getDebugLoc()));

    }

  }


  unsigned StackArgSize = CCInfo.getStackSize();

  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;

  if (canGuaranteeTCO(CallConv, TailCallOpt)) {

    // The only way to guarantee a tail call is if the callee restores its

    // argument area, but it must also keep the stack aligned when doing so.

    MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();

    assert(StackAlign && "data layout string is missing stack alignment");

    StackArgSize = alignTo(StackArgSize, *StackAlign);


    AFI->setArgumentStackToRestore(StackArgSize);

  }

  AFI->setArgumentStackSize(StackArgSize);


  if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {

    DAG.getContext()->diagnose(DiagnosticInfoUnsupported(

        DAG.getMachineFunction().getFunction(),

        "secure entry function requires arguments on stack", dl.getDebugLoc()));

  }


  return Chain;

}


/// isFloatingPointZero - Return true if this is +0.0.


static bool isFloatingPointZero(SDValue Op) {

  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))

    return CFP->getValueAPF().isPosZero();

  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {

    // Maybe this has already been legalized into the constant pool?

    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {

      SDValue WrapperOp = Op.getOperand(1).getOperand(0);

      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))

        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))

          return CFP->getValueAPF().isPosZero();

    }

  } else if (Op->getOpcode() == ISD::BITCAST &&

             Op->getValueType(0) == MVT::f64) {

    // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)

    // created by LowerConstantFP().

    SDValue BitcastOp = Op->getOperand(0);

    if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&

        isNullConstant(BitcastOp->getOperand(0)))

      return true;

  }

  return false;

}


/// Returns appropriate ARM CMP (cmp) and corresponding condition code for

/// the given operands.

SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,

                                     SDValue &ARMcc, SelectionDAG &DAG,

                                     const SDLoc &dl) const {

  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {

    unsigned C = RHSC->getZExtValue();

    if (!isLegalICmpImmediate((int32_t)C)) {

      // Constant does not fit, try adjusting it by one.

      switch (CC) {

      default: break;

      case ISD::SETLT:

      case ISD::SETGE:

        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {

          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;

          RHS = DAG.getConstant(C - 1, dl, MVT::i32);

        }

        break;

      case ISD::SETULT:

      case ISD::SETUGE:

        if (C != 0 && isLegalICmpImmediate(C-1)) {

          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;

          RHS = DAG.getConstant(C - 1, dl, MVT::i32);

        }

        break;

      case ISD::SETLE:

      case ISD::SETGT:

        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {

          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;

          RHS = DAG.getConstant(C + 1, dl, MVT::i32);

        }

        break;

      case ISD::SETULE:

      case ISD::SETUGT:

        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {

          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;

          RHS = DAG.getConstant(C + 1, dl, MVT::i32);

        }

        break;

      }

    }

  } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&

             (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {

    // In ARM and Thumb-2, the compare instructions can shift their second

    // operand.

    CC = ISD::getSetCCSwappedOperands(CC);

    std::swap(LHS, RHS);

  }


  // Thumb1 has very limited immediate modes, so turning an "and" into a

  // shift can save multiple instructions.

  //

  // If we have (x & C1), and C1 is an appropriate mask, we can transform it

  // into "((x << n) >> n)".  But that isn't necessarily profitable on its

  // own. If it's the operand to an unsigned comparison with an immediate,

  // we can eliminate one of the shifts: we transform

  // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".

  //

  // We avoid transforming cases which aren't profitable due to encoding

  // details:

  //

  // 1. C2 fits into the immediate field of a cmp, and the transformed version

  // would not; in that case, we're essentially trading one immediate load for

  // another.

  // 2. C1 is 255 or 65535, so we can use uxtb or uxth.

  // 3. C2 is zero; we have other code for this special case.

  //

  // FIXME: Figure out profitability for Thumb2; we usually can't save an

  // instruction, since the AND is always one instruction anyway, but we could

  // use narrow instructions in some cases.

  if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&

      LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&

      LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&

      !isSignedIntSetCC(CC)) {

    unsigned Mask = LHS.getConstantOperandVal(1);

    auto *RHSC = cast<ConstantSDNode>(RHS.getNode());

    uint64_t RHSV = RHSC->getZExtValue();

    if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {

      unsigned ShiftBits = llvm::countl_zero(Mask);

      if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {

        SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);

        LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);

        RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);

      }

    }

  }


  // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a

  // single "lsls x, c+1".  The shift sets the "C" and "Z" flags the same

  // way a cmp would.

  // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and

  // some tweaks to the heuristics for the previous and->shift transform.

  // FIXME: Optimize cases where the LHS isn't a shift.

  if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&

      isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&

      CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&

      LHS.getConstantOperandVal(1) < 31) {

    unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;

    SDValue Shift =

        DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),

                    LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));

    ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);

    return Shift.getValue(1);

  }


  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);


  // If the RHS is a constant zero then the V (overflow) flag will never be

  // set. This can allow us to simplify GE to PL or LT to MI, which can be

  // simpler for other passes (like the peephole optimiser) to deal with.

  if (isNullConstant(RHS)) {

    switch (CondCode) {

      default: break;

      case ARMCC::GE:

        CondCode = ARMCC::PL;

        break;

      case ARMCC::LT:

        CondCode = ARMCC::MI;

        break;

    }

  }


  unsigned CompareType;

  switch (CondCode) {

  default:

    CompareType = ARMISD::CMP;

    break;

  case ARMCC::EQ:

  case ARMCC::NE:

    // Uses only Z Flag

    CompareType = ARMISD::CMPZ;

    break;

  }

  ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);

  return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);

}


/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.

SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,

                                     SelectionDAG &DAG, const SDLoc &dl,

                                     bool Signaling) const {

  assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);

  SDValue Flags;

  if (!isFloatingPointZero(RHS))

    Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,

                        LHS, RHS);

  else

    Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,

                        FlagsVT, LHS);

  return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);

}


// This function returns three things: the arithmetic computation itself

// (Value), a comparison (OverflowCmp), and a condition code (ARMcc).  The

// comparison and the condition code define the case in which the arithmetic

// computation *does not* overflow.

std::pair<SDValue, SDValue>

ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,

                                 SDValue &ARMcc) const {

  assert(Op.getValueType() == MVT::i32 &&  "Unsupported value type");


  SDValue Value, OverflowCmp;

  SDValue LHS = Op.getOperand(0);

  SDValue RHS = Op.getOperand(1);

  SDLoc dl(Op);


  // FIXME: We are currently always generating CMPs because we don't support

  // generating CMN through the backend. This is not as good as the natural

  // CMP case because it causes a register dependency and cannot be folded

  // later.


  switch (Op.getOpcode()) {

  default:

    llvm_unreachable("Unknown overflow instruction!");

  case ISD::SADDO:

    ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);

    Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);

    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);

    break;

  case ISD::UADDO:

    ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);

    // We use ADDC here to correspond to its use in LowerUnsignedALUO.

    // We do not use it in the USUBO case as Value may not be used.

    Value = DAG.getNode(ARMISD::ADDC, dl,

                        DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)

                .getValue(0);

    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);

    break;

  case ISD::SSUBO:

    ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);

    Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);

    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);

    break;

  case ISD::USUBO:

    ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);

    Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);

    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);

    break;

  case ISD::UMULO:

    // We generate a UMUL_LOHI and then check if the high word is 0.

    ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);

    Value = DAG.getNode(ISD::UMUL_LOHI, dl,

                        DAG.getVTList(Op.getValueType(), Op.getValueType()),

                        LHS, RHS);

    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),

                              DAG.getConstant(0, dl, MVT::i32));

    Value = Value.getValue(0); // We only want the low 32 bits for the result.

    break;

  case ISD::SMULO:

    // We generate a SMUL_LOHI and then check if all the bits of the high word

    // are the same as the sign bit of the low word.

    ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);

    Value = DAG.getNode(ISD::SMUL_LOHI, dl,

                        DAG.getVTList(Op.getValueType(), Op.getValueType()),

                        LHS, RHS);

    OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),

                              DAG.getNode(ISD::SRA, dl, Op.getValueType(),

                                          Value.getValue(0),

                                          DAG.getConstant(31, dl, MVT::i32)));

    Value = Value.getValue(0); // We only want the low 32 bits for the result.

    break;

  } // switch (...)


  return std::make_pair(Value, OverflowCmp);

}


SDValue

ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {

  // Let legalize expand this if it isn't a legal type yet.

  if (!isTypeLegal(Op.getValueType()))

    return SDValue();


  SDValue Value, OverflowCmp;

  SDValue ARMcc;

  std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);

  SDLoc dl(Op);

  // We use 0 and 1 as false and true values.

  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);

  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);

  EVT VT = Op.getValueType();


  SDValue Overflow =

      DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);


  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);

  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);

}


static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,

                                              SelectionDAG &DAG) {

  SDLoc DL(BoolCarry);

  EVT CarryVT = BoolCarry.getValueType();


  // This converts the boolean value carry into the carry flag by doing

  // ARMISD::SUBC Carry, 1

  SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,

                              DAG.getVTList(CarryVT, MVT::i32),

                              BoolCarry, DAG.getConstant(1, DL, CarryVT));

  return Carry.getValue(1);

}


static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,

                                              SelectionDAG &DAG) {

  SDLoc DL(Flags);


  // Now convert the carry flag into a boolean carry. We do this

  // using ARMISD:ADDE 0, 0, Carry

  return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),

                     DAG.getConstant(0, DL, MVT::i32),

                     DAG.getConstant(0, DL, MVT::i32), Flags);

}


SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,

                                             SelectionDAG &DAG) const {

  // Let legalize expand this if it isn't a legal type yet.

  if (!isTypeLegal(Op.getValueType()))

    return SDValue();


  SDValue LHS = Op.getOperand(0);

  SDValue RHS = Op.getOperand(1);

  SDLoc dl(Op);


  EVT VT = Op.getValueType();

  SDVTList VTs = DAG.getVTList(VT, MVT::i32);

  SDValue Value;

  SDValue Overflow;

  switch (Op.getOpcode()) {

  default:

    llvm_unreachable("Unknown overflow instruction!");

  case ISD::UADDO:

    Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);

    // Convert the carry flag into a boolean value.

    Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);

    break;

  case ISD::USUBO: {

    Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);

    // Convert the carry flag into a boolean value.

    Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);

    // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow

    // value. So compute 1 - C.

    Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,

                           DAG.getConstant(1, dl, MVT::i32), Overflow);

    break;

  }

  }


  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);

}


static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG,

                              const ARMSubtarget *Subtarget) {

  EVT VT = Op.getValueType();

  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())

    return SDValue();

  if (!VT.isSimple())

    return SDValue();


  unsigned NewOpcode;

  switch (VT.getSimpleVT().SimpleTy) {

  default:

    return SDValue();

  case MVT::i8:

    switch (Op->getOpcode()) {

    case ISD::UADDSAT:

      NewOpcode = ARMISD::UQADD8b;

      break;

    case ISD::SADDSAT:

      NewOpcode = ARMISD::QADD8b;

      break;

    case ISD::USUBSAT:

      NewOpcode = ARMISD::UQSUB8b;

      break;

    case ISD::SSUBSAT:

      NewOpcode = ARMISD::QSUB8b;

      break;

    }

    break;

  case MVT::i16:

    switch (Op->getOpcode()) {

    case ISD::UADDSAT:

      NewOpcode = ARMISD::UQADD16b;

      break;

    case ISD::SADDSAT:

      NewOpcode = ARMISD::QADD16b;

      break;

    case ISD::USUBSAT:

      NewOpcode = ARMISD::UQSUB16b;

      break;

    case ISD::SSUBSAT:

      NewOpcode = ARMISD::QSUB16b;

      break;

    }

    break;

  }


  SDLoc dl(Op);

  SDValue Add =

      DAG.getNode(NewOpcode, dl, MVT::i32,

                  DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),

                  DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));

  return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);

}


SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {

  SDValue Cond = Op.getOperand(0);

  SDValue SelectTrue = Op.getOperand(1);

  SDValue SelectFalse = Op.getOperand(2);

  SDLoc dl(Op);

  unsigned Opc = Cond.getOpcode();


  if (Cond.getResNo() == 1 &&

      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||

       Opc == ISD::USUBO)) {

    if (!isTypeLegal(Cond->getValueType(0)))

      return SDValue();


    SDValue Value, OverflowCmp;

    SDValue ARMcc;

    std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);

    EVT VT = Op.getValueType();


    return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);

  }


  // Convert:

  //

  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)

  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)

  //

  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {

    const ConstantSDNode *CMOVTrue =

      dyn_cast<ConstantSDNode>(Cond.getOperand(0));

    const ConstantSDNode *CMOVFalse =

      dyn_cast<ConstantSDNode>(Cond.getOperand(1));


    if (CMOVTrue && CMOVFalse) {

      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();

      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();


      SDValue True;

      SDValue False;

      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {

        True = SelectTrue;

        False = SelectFalse;

      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {

        True = SelectFalse;

        False = SelectTrue;

      }


      if (True.getNode() && False.getNode())

        return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),

                       Cond.getOperand(3), DAG);

    }

  }


  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the

  // undefined bits before doing a full-word comparison with zero.

  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,

                     DAG.getConstant(1, dl, Cond.getValueType()));


  return DAG.getSelectCC(dl, Cond,

                         DAG.getConstant(0, dl, Cond.getValueType()),

                         SelectTrue, SelectFalse, ISD::SETNE);

}


static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,

                                 bool &swpCmpOps, bool &swpVselOps) {

  // Start by selecting the GE condition code for opcodes that return true for

  // 'equality'

  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||

      CC == ISD::SETULE || CC == ISD::SETGE  || CC == ISD::SETLE)

    CondCode = ARMCC::GE;


  // and GT for opcodes that return false for 'equality'.

  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||

           CC == ISD::SETULT || CC == ISD::SETGT  || CC == ISD::SETLT)

    CondCode = ARMCC::GT;


  // Since we are constrained to GE/GT, if the opcode contains 'less', we need

  // to swap the compare operands.

  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||

      CC == ISD::SETULT || CC == ISD::SETLE  || CC == ISD::SETLT)

    swpCmpOps = true;


  // Both GT and GE are ordered comparisons, and return false for 'unordered'.

  // If we have an unordered opcode, we need to swap the operands to the VSEL

  // instruction (effectively negating the condition).

  //

  // This also has the effect of swapping which one of 'less' or 'greater'

  // returns true, so we also swap the compare operands. It also switches

  // whether we return true for 'equality', so we compensate by picking the

  // opposite condition code to our original choice.

  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||

      CC == ISD::SETUGT) {

    swpCmpOps = !swpCmpOps;

    swpVselOps = !swpVselOps;

    CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;

  }


  // 'ordered' is 'anything but unordered', so use the VS condition code and

  // swap the VSEL operands.

  if (CC == ISD::SETO) {

    CondCode = ARMCC::VS;

    swpVselOps = true;

  }


  // 'unordered or not equal' is 'anything but equal', so use the EQ condition

  // code and swap the VSEL operands. Also do this if we don't care about the

  // unordered case.

  if (CC == ISD::SETUNE || CC == ISD::SETNE) {

    CondCode = ARMCC::EQ;

    swpVselOps = true;

  }

}


SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,

                                   SDValue TrueVal, SDValue ARMcc,

                                   SDValue Flags, SelectionDAG &DAG) const {

  if (!Subtarget->hasFP64() && VT == MVT::f64) {

    FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,

                           DAG.getVTList(MVT::i32, MVT::i32), FalseVal);

    TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,

                          DAG.getVTList(MVT::i32, MVT::i32), TrueVal);


    SDValue TrueLow = TrueVal.getValue(0);

    SDValue TrueHigh = TrueVal.getValue(1);

    SDValue FalseLow = FalseVal.getValue(0);

    SDValue FalseHigh = FalseVal.getValue(1);


    SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,

                              ARMcc, Flags);

    SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,

                               ARMcc, Flags);


    return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);

  }

  return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);

}


static bool isGTorGE(ISD::CondCode CC) {

  return CC == ISD::SETGT || CC == ISD::SETGE;

}


static bool isLTorLE(ISD::CondCode CC) {

  return CC == ISD::SETLT || CC == ISD::SETLE;

}


// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.

// All of these conditions (and their <= and >= counterparts) will do:

//          x < k ? k : x

//          x > k ? x : k

//          k < x ? x : k

//          k > x ? k : x


static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,

                            const SDValue TrueVal, const SDValue FalseVal,

                            const ISD::CondCode CC, const SDValue K) {

  return (isGTorGE(CC) &&

          ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||

         (isLTorLE(CC) &&

          ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));

}


// Check if two chained conditionals could be converted into SSAT or USAT.

//

// SSAT can replace a set of two conditional selectors that bound a number to an

// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:

//

//     x < -k ? -k : (x > k ? k : x)

//     x < -k ? -k : (x < k ? x : k)

//     x > -k ? (x > k ? k : x) : -k

//     x < k ? (x < -k ? -k : x) : k

//     etc.

//

// LLVM canonicalizes these to either a min(max()) or a max(min())

// pattern. This function tries to match one of these and will return a SSAT

// node if successful.

//

// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1

// is a power of 2.


static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {

  EVT VT = Op.getValueType();

  SDValue V1 = Op.getOperand(0);

  SDValue K1 = Op.getOperand(1);

  SDValue TrueVal1 = Op.getOperand(2);

  SDValue FalseVal1 = Op.getOperand(3);

  ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();


  const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;

  if (Op2.getOpcode() != ISD::SELECT_CC)

    return SDValue();


  SDValue V2 = Op2.getOperand(0);

  SDValue K2 = Op2.getOperand(1);

  SDValue TrueVal2 = Op2.getOperand(2);

  SDValue FalseVal2 = Op2.getOperand(3);

  ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();


  SDValue V1Tmp = V1;

  SDValue V2Tmp = V2;


  // Check that the registers and the constants match a max(min()) or min(max())

  // pattern

  if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||

      K2 != FalseVal2 ||

      !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))

    return SDValue();


  // Check that the constant in the lower-bound check is

  // the opposite of the constant in the upper-bound check

  // in 1's complement.

  if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))

    return SDValue();


  int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();

  int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();

  int64_t PosVal = std::max(Val1, Val2);

  int64_t NegVal = std::min(Val1, Val2);


  if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||

      !isPowerOf2_64(PosVal + 1))

    return SDValue();


  // Handle the difference between USAT (unsigned) and SSAT (signed)

  // saturation

  // At this point, PosVal is guaranteed to be positive

  uint64_t K = PosVal;

  SDLoc dl(Op);

  if (Val1 == ~Val2)

    return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,

                       DAG.getConstant(llvm::countr_one(K), dl, VT));

  if (NegVal == 0)

    return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,

                       DAG.getConstant(llvm::countr_one(K), dl, VT));


  return SDValue();

}


// Check if a condition of the type x < k ? k : x can be converted into a

// bit operation instead of conditional moves.

// Currently this is allowed given:

// - The conditions and values match up

// - k is 0 or -1 (all ones)

// This function will not check the last condition, thats up to the caller

// It returns true if the transformation can be made, and in such case

// returns x in V, and k in SatK.


static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,

                                         SDValue &SatK)

{

  SDValue LHS = Op.getOperand(0);

  SDValue RHS = Op.getOperand(1);

  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

  SDValue TrueVal = Op.getOperand(2);

  SDValue FalseVal = Op.getOperand(3);


  SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)

                                               ? &RHS

                                               : nullptr;


  // No constant operation in comparison, early out

  if (!K)

    return false;


  SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;

  V = (KTmp == TrueVal) ? FalseVal : TrueVal;

  SDValue VTmp = (K && *K == LHS) ? RHS : LHS;


  // If the constant on left and right side, or variable on left and right,

  // does not match, early out

  if (*K != KTmp || V != VTmp)

    return false;


  if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {

    SatK = *K;

    return true;

  }


  return false;

}


bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {

  if (VT == MVT::f32)

    return !Subtarget->hasVFP2Base();

  if (VT == MVT::f64)

    return !Subtarget->hasFP64();

  if (VT == MVT::f16)

    return !Subtarget->hasFullFP16();

  return false;

}


SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {

  EVT VT = Op.getValueType();

  SDLoc dl(Op);


  // Try to convert two saturating conditional selects into a single SSAT

  if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())

    if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))

      return SatValue;


  // Try to convert expressions of the form x < k ? k : x (and similar forms)

  // into more efficient bit operations, which is possible when k is 0 or -1

  // On ARM and Thumb-2 which have flexible operand 2 this will result in

  // single instructions. On Thumb the shift and the bit operation will be two

  // instructions.

  // Only allow this transformation on full-width (32-bit) operations

  SDValue LowerSatConstant;

  SDValue SatValue;

  if (VT == MVT::i32 &&

      isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {

    SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,

                                 DAG.getConstant(31, dl, VT));

    if (isNullConstant(LowerSatConstant)) {

      SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,

                                      DAG.getAllOnesConstant(dl, VT));

      return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);

    } else if (isAllOnesConstant(LowerSatConstant))

      return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);

  }


  SDValue LHS = Op.getOperand(0);

  SDValue RHS = Op.getOperand(1);

  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

  SDValue TrueVal = Op.getOperand(2);

  SDValue FalseVal = Op.getOperand(3);

  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);

  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);

  ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);

  if (Op.getValueType().isInteger()) {


    // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.

    // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))

    // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))

    // Both require less instructions than compare and conditional select.

    if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TrueVal && RHSC &&

        RHSC->isZero() && CFVal && CFVal->isZero() &&

        LHS.getValueType() == RHS.getValueType()) {

      EVT VT = LHS.getValueType();

      SDValue Shift =

          DAG.getNode(ISD::SRA, dl, VT, LHS,

                      DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));


      if (CC == ISD::SETGT)

        Shift = DAG.getNOT(dl, Shift, VT);


      return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);

    }

  }


  if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&

      LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {

    unsigned TVal = CTVal->getZExtValue();

    unsigned FVal = CFVal->getZExtValue();

    unsigned Opcode = 0;


    if (TVal == ~FVal) {

      Opcode = ARMISD::CSINV;

    } else if (TVal == ~FVal + 1) {

      Opcode = ARMISD::CSNEG;

    } else if (TVal + 1 == FVal) {

      Opcode = ARMISD::CSINC;

    } else if (TVal == FVal + 1) {

      Opcode = ARMISD::CSINC;

      std::swap(TrueVal, FalseVal);

      std::swap(TVal, FVal);

      CC = ISD::getSetCCInverse(CC, LHS.getValueType());

    }


    if (Opcode) {

      // If one of the constants is cheaper than another, materialise the

      // cheaper one and let the csel generate the other.

      if (Opcode != ARMISD::CSINC &&

          HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {

        std::swap(TrueVal, FalseVal);

        std::swap(TVal, FVal);

        CC = ISD::getSetCCInverse(CC, LHS.getValueType());

      }


      // Attempt to use ZR checking TVal is 0, possibly inverting the condition

      // to get there. CSINC not is invertable like the other two (~(~a) == a,

      // -(-a) == a, but (a+1)+1 != a).

      if (FVal == 0 && Opcode != ARMISD::CSINC) {

        std::swap(TrueVal, FalseVal);

        std::swap(TVal, FVal);

        CC = ISD::getSetCCInverse(CC, LHS.getValueType());

      }


      // Drops F's value because we can get it by inverting/negating TVal.

      FalseVal = TrueVal;


      SDValue ARMcc;

      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);

      EVT VT = TrueVal.getValueType();

      return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);

    }

  }


  if (isUnsupportedFloatingType(LHS.getValueType())) {

    softenSetCCOperands(DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);


    // If softenSetCCOperands only returned one value, we should compare it to

    // zero.

    if (!RHS.getNode()) {

      RHS = DAG.getConstant(0, dl, LHS.getValueType());

      CC = ISD::SETNE;

    }

  }


  if (LHS.getValueType() == MVT::i32) {

    // Try to generate VSEL on ARMv8.

    // The VSEL instruction can't use all the usual ARM condition

    // codes: it only has two bits to select the condition code, so it's

    // constrained to use only GE, GT, VS and EQ.

    //

    // To implement all the various ISD::SETXXX opcodes, we sometimes need to

    // swap the operands of the previous compare instruction (effectively

    // inverting the compare condition, swapping 'less' and 'greater') and

    // sometimes need to swap the operands to the VSEL (which inverts the

    // condition in the sense of firing whenever the previous condition didn't)

    if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||

                                        TrueVal.getValueType() == MVT::f32 ||

                                        TrueVal.getValueType() == MVT::f64)) {

      ARMCC::CondCodes CondCode = IntCCToARMCC(CC);

      if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||

          CondCode == ARMCC::VC || CondCode == ARMCC::NE) {

        CC = ISD::getSetCCInverse(CC, LHS.getValueType());

        std::swap(TrueVal, FalseVal);

      }

    }


    SDValue ARMcc;

    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);

    // Choose GE over PL, which vsel does now support

    if (ARMcc->getAsZExtVal() == ARMCC::PL)

      ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);

    return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);

  }


  ARMCC::CondCodes CondCode, CondCode2;

  FPCCToARMCC(CC, CondCode, CondCode2);


  // Normalize the fp compare. If RHS is zero we prefer to keep it there so we

  // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we

  // must use VSEL (limited condition codes), due to not having conditional f16

  // moves.

  if (Subtarget->hasFPARMv8Base() &&

      !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&

      (TrueVal.getValueType() == MVT::f16 ||

       TrueVal.getValueType() == MVT::f32 ||

       TrueVal.getValueType() == MVT::f64)) {

    bool swpCmpOps = false;

    bool swpVselOps = false;

    checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);


    if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||

        CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {

      if (swpCmpOps)

        std::swap(LHS, RHS);

      if (swpVselOps)

        std::swap(TrueVal, FalseVal);

    }

  }


  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);

  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);

  SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);

  if (CondCode2 != ARMCC::AL) {

    SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);

    Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);

  }

  return Result;

}


/// canChangeToInt - Given the fp compare operand, return true if it is suitable

/// to morph to an integer compare sequence.


static bool canChangeToInt(SDValue Op, bool &SeenZero,

                           const ARMSubtarget *Subtarget) {

  SDNode *N = Op.getNode();

  if (!N->hasOneUse())

    // Otherwise it requires moving the value from fp to integer registers.

    return false;

  if (!N->getNumValues())

    return false;

  EVT VT = Op.getValueType();

  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())

    // f32 case is generally profitable. f64 case only makes sense when vcmpe +

    // vmrs are very slow, e.g. cortex-a8.

    return false;


  if (isFloatingPointZero(Op)) {

    SeenZero = true;

    return true;

  }

  return ISD::isNormalLoad(N);

}


static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {

  if (isFloatingPointZero(Op))

    return DAG.getConstant(0, SDLoc(Op), MVT::i32);


  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))

    return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),

                       Ld->getPointerInfo(), Ld->getAlign(),

                       Ld->getMemOperand()->getFlags());


  llvm_unreachable("Unknown VFP cmp argument!");

}


static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,

                           SDValue &RetVal1, SDValue &RetVal2) {

  SDLoc dl(Op);


  if (isFloatingPointZero(Op)) {

    RetVal1 = DAG.getConstant(0, dl, MVT::i32);

    RetVal2 = DAG.getConstant(0, dl, MVT::i32);

    return;

  }


  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {

    SDValue Ptr = Ld->getBasePtr();

    RetVal1 =

        DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),

                    Ld->getAlign(), Ld->getMemOperand()->getFlags());


    EVT PtrType = Ptr.getValueType();

    SDValue NewPtr = DAG.getNode(ISD::ADD, dl,

                                 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));

    RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,

                          Ld->getPointerInfo().getWithOffset(4),

                          commonAlignment(Ld->getAlign(), 4),

                          Ld->getMemOperand()->getFlags());

    return;

  }


  llvm_unreachable("Unknown VFP cmp argument!");

}


/// OptimizeVFPBrcond - With nnan and without daz, it's legal to optimize some

/// f32 and even f64 comparisons to integer ones.

SDValue

ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {

  SDValue Chain = Op.getOperand(0);

  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();

  SDValue LHS = Op.getOperand(2);

  SDValue RHS = Op.getOperand(3);

  SDValue Dest = Op.getOperand(4);

  SDLoc dl(Op);


  bool LHSSeenZero = false;

  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);

  bool RHSSeenZero = false;

  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);

  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {

    // If unsafe fp math optimization is enabled and there are no other uses of

    // the CMP operands, and the condition code is EQ or NE, we can optimize it

    // to an integer comparison.

    if (CC == ISD::SETOEQ)

      CC = ISD::SETEQ;

    else if (CC == ISD::SETUNE)

      CC = ISD::SETNE;


    SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);

    SDValue ARMcc;

    if (LHS.getValueType() == MVT::f32) {

      LHS = DAG.getNode(ISD::AND, dl, MVT::i32,

                        bitcastf32Toi32(LHS, DAG), Mask);

      RHS = DAG.getNode(ISD::AND, dl, MVT::i32,

                        bitcastf32Toi32(RHS, DAG), Mask);

      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);

      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,

                         Cmp);

    }


    SDValue LHS1, LHS2;

    SDValue RHS1, RHS2;

    expandf64Toi32(LHS, DAG, LHS1, LHS2);

    expandf64Toi32(RHS, DAG, RHS1, RHS2);

    LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);

    RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);

    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);

    ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);

    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };

    return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);

  }


  return SDValue();

}


// Generate CMP + CMOV for integer abs.

SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {

  SDLoc DL(Op);


  SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, MVT::i32);


  // Generate CMP & CMOV.

  SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0),

                            DAG.getConstant(0, DL, MVT::i32));

  return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg,

                     DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);

}


SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {

  SDValue Chain = Op.getOperand(0);

  SDValue Cond = Op.getOperand(1);

  SDValue Dest = Op.getOperand(2);

  SDLoc dl(Op);


  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch

  // instruction.

  unsigned Opc = Cond.getOpcode();

  bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&

                      !Subtarget->isThumb1Only();

  if (Cond.getResNo() == 1 &&

      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||

       Opc == ISD::USUBO || OptimizeMul)) {

    // Only lower legal XALUO ops.

    if (!isTypeLegal(Cond->getValueType(0)))

      return SDValue();


    // The actual operation with overflow check.

    SDValue Value, OverflowCmp;

    SDValue ARMcc;

    std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);


    // Reverse the condition code.

    ARMCC::CondCodes CondCode =

        (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();

    CondCode = ARMCC::getOppositeCondition(CondCode);

    ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);


    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,

                       OverflowCmp);

  }


  return SDValue();

}


SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {

  SDValue Chain = Op.getOperand(0);

  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();

  SDValue LHS = Op.getOperand(2);

  SDValue RHS = Op.getOperand(3);

  SDValue Dest = Op.getOperand(4);

  SDLoc dl(Op);


  if (isUnsupportedFloatingType(LHS.getValueType())) {

    softenSetCCOperands(DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);


    // If softenSetCCOperands only returned one value, we should compare it to

    // zero.

    if (!RHS.getNode()) {

      RHS = DAG.getConstant(0, dl, LHS.getValueType());

      CC = ISD::SETNE;

    }

  }


  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch

  // instruction.

  unsigned Opc = LHS.getOpcode();

  bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&

                      !Subtarget->isThumb1Only();

  if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&

      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||

       Opc == ISD::USUBO || OptimizeMul) &&

      (CC == ISD::SETEQ || CC == ISD::SETNE)) {

    // Only lower legal XALUO ops.

    if (!isTypeLegal(LHS->getValueType(0)))

      return SDValue();


    // The actual operation with overflow check.

    SDValue Value, OverflowCmp;

    SDValue ARMcc;

    std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);


    if ((CC == ISD::SETNE) != isOneConstant(RHS)) {

      // Reverse the condition code.

      ARMCC::CondCodes CondCode =

          (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();

      CondCode = ARMCC::getOppositeCondition(CondCode);

      ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);

    }


    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,

                       OverflowCmp);

  }


  if (LHS.getValueType() == MVT::i32) {

    SDValue ARMcc;

    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);

    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);

  }


  SDNodeFlags Flags = Op->getFlags();

  if (Flags.hasNoNaNs() &&

      DAG.getDenormalMode(MVT::f32) == DenormalMode::getIEEE() &&

      DAG.getDenormalMode(MVT::f64) == DenormalMode::getIEEE() &&

      (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE ||

       CC == ISD::SETUNE)) {

    if (SDValue Result = OptimizeVFPBrcond(Op, DAG))

      return Result;

  }


  ARMCC::CondCodes CondCode, CondCode2;

  FPCCToARMCC(CC, CondCode, CondCode2);


  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);

  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);

  SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};

  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);

  if (CondCode2 != ARMCC::AL) {

    ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);

    SDValue Ops[] = {Res, Dest, ARMcc, Cmp};

    Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);

  }

  return Res;

}


SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {

  SDValue Chain = Op.getOperand(0);

  SDValue Table = Op.getOperand(1);

  SDValue Index = Op.getOperand(2);

  SDLoc dl(Op);


  EVT PTy = getPointerTy(DAG.getDataLayout());

  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);

  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);

  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);

  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));

  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);

  if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {

    // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table

    // which does another jump to the destination. This also makes it easier

    // to translate it to TBB / TBH later (Thumb2 only).

    // FIXME: This might not work if the function is extremely large.

    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,

                       Addr, Op.getOperand(2), JTI);

  }

  if (isPositionIndependent() || Subtarget->isROPI()) {

    Addr =

        DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,

                    MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));

    Chain = Addr.getValue(1);

    Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);

    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);

  } else {

    Addr =

        DAG.getLoad(PTy, dl, Chain, Addr,

                    MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));

    Chain = Addr.getValue(1);

    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);

  }

}


static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {

  EVT VT = Op.getValueType();

  SDLoc dl(Op);


  if (Op.getValueType().getVectorElementType() == MVT::i32) {

    if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)

      return Op;

    return DAG.UnrollVectorOp(Op.getNode());

  }


  const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();


  EVT NewTy;

  const EVT OpTy = Op.getOperand(0).getValueType();

  if (OpTy == MVT::v4f32)

    NewTy = MVT::v4i32;

  else if (OpTy == MVT::v4f16 && HasFullFP16)

    NewTy = MVT::v4i16;

  else if (OpTy == MVT::v8f16 && HasFullFP16)

    NewTy = MVT::v8i16;

  else

    llvm_unreachable("Invalid type for custom lowering!");


  if (VT != MVT::v4i16 && VT != MVT::v8i16)

    return DAG.UnrollVectorOp(Op.getNode());


  Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));

  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);

}


SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {

  EVT VT = Op.getValueType();

  if (VT.isVector())

    return LowerVectorFP_TO_INT(Op, DAG);


  bool IsStrict = Op->isStrictFPOpcode();

  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);


  if (isUnsupportedFloatingType(SrcVal.getValueType())) {

    RTLIB::Libcall LC;

    if (Op.getOpcode() == ISD::FP_TO_SINT ||

        Op.getOpcode() == ISD::STRICT_FP_TO_SINT)

      LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),

                              Op.getValueType());

    else

      LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),

                              Op.getValueType());

    SDLoc Loc(Op);

    MakeLibCallOptions CallOptions;

    SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();

    SDValue Result;

    std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,

                                          CallOptions, Loc, Chain);

    return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;

  }


  // FIXME: Remove this when we have strict fp instruction selection patterns

  if (IsStrict) {

    SDLoc Loc(Op);

    SDValue Result =

        DAG.getNode(Op.getOpcode() == ISD::STRICT_FP_TO_SINT ? ISD::FP_TO_SINT

                                                             : ISD::FP_TO_UINT,

                    Loc, Op.getValueType(), SrcVal);

    return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);

  }


  return Op;

}


static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,

                                  const ARMSubtarget *Subtarget) {

  EVT VT = Op.getValueType();

  EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

  EVT FromVT = Op.getOperand(0).getValueType();


  if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)

    return Op;

  if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&

      Subtarget->hasFP64())

    return Op;

  if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&

      Subtarget->hasFullFP16())

    return Op;

  if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&

      Subtarget->hasMVEFloatOps())

    return Op;

  if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&

      Subtarget->hasMVEFloatOps())

    return Op;


  if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)

    return SDValue();


  SDLoc DL(Op);

  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;

  unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;

  SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),

                            DAG.getValueType(VT.getScalarType()));

  SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,

                            DAG.getConstant((1 << BW) - 1, DL, VT));

  if (IsSigned)

    Max = DAG.getNode(ISD::SMAX, DL, VT, Max,

                      DAG.getSignedConstant(-(1 << BW), DL, VT));

  return Max;

}


static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {

  EVT VT = Op.getValueType();

  SDLoc dl(Op);


  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {

    if (VT.getVectorElementType() == MVT::f32)

      return Op;

    return DAG.UnrollVectorOp(Op.getNode());

  }


  assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||

          Op.getOperand(0).getValueType() == MVT::v8i16) &&

         "Invalid type for custom lowering!");


  const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();


  EVT DestVecType;

  if (VT == MVT::v4f32)

    DestVecType = MVT::v4i32;

  else if (VT == MVT::v4f16 && HasFullFP16)

    DestVecType = MVT::v4i16;

  else if (VT == MVT::v8f16 && HasFullFP16)

    DestVecType = MVT::v8i16;

  else

    return DAG.UnrollVectorOp(Op.getNode());


  unsigned CastOpc;

  unsigned Opc;

  switch (Op.getOpcode()) {

  default: llvm_unreachable("Invalid opcode!");

  case ISD::SINT_TO_FP:

    CastOpc = ISD::SIGN_EXTEND;

    Opc = ISD::SINT_TO_FP;

    break;

  case ISD::UINT_TO_FP:

    CastOpc = ISD::ZERO_EXTEND;

    Opc = ISD::UINT_TO_FP;

    break;

  }


  Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));

  return DAG.getNode(Opc, dl, VT, Op);

}


SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {

  EVT VT = Op.getValueType();

  if (VT.isVector())

    return LowerVectorINT_TO_FP(Op, DAG);

  if (isUnsupportedFloatingType(VT)) {

    RTLIB::Libcall LC;

    if (Op.getOpcode() == ISD::SINT_TO_FP)

      LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),

                              Op.getValueType());

    else

      LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),

                              Op.getValueType());

    MakeLibCallOptions CallOptions;

    return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),

                       CallOptions, SDLoc(Op)).first;

  }


  return Op;

}


SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {

  // Implement fcopysign with a fabs and a conditional fneg.

  SDValue Tmp0 = Op.getOperand(0);

  SDValue Tmp1 = Op.getOperand(1);

  SDLoc dl(Op);

  EVT VT = Op.getValueType();

  EVT SrcVT = Tmp1.getValueType();

  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||

    Tmp0.getOpcode() == ARMISD::VMOVDRR;

  bool UseNEON = !InGPR && Subtarget->hasNEON();


  if (UseNEON) {

    // Use VBSL to copy the sign bit.

    unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);

    SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,

                               DAG.getTargetConstant(EncodedVal, dl, MVT::i32));

    EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;

    if (VT == MVT::f64)

      Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,

                         DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),

                         DAG.getConstant(32, dl, MVT::i32));

    else /*if (VT == MVT::f32)*/

      Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);

    if (SrcVT == MVT::f32) {

      Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);

      if (VT == MVT::f64)

        Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,

                           DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),

                           DAG.getConstant(32, dl, MVT::i32));

    } else if (VT == MVT::f32)

      Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,

                         DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),

                         DAG.getConstant(32, dl, MVT::i32));

    Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);

    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);


    SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff),

                                            dl, MVT::i32);

    AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);

    SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,

                                  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));


    SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,

                              DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),

                              DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));

    if (VT == MVT::f32) {

      Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);

      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,

                        DAG.getConstant(0, dl, MVT::i32));

    } else {

      Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);

    }


    return Res;

  }


  // Bitcast operand 1 to i32.

  if (SrcVT == MVT::f64)

    Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),

                       Tmp1).getValue(1);

  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);


  // Or in the signbit with integer operations.

  SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);

  SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);

  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);

  if (VT == MVT::f32) {

    Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,

                       DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);

    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,

                       DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));

  }


  // f64: Or the high part with signbit and then combine two parts.

  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),

                     Tmp0);

  SDValue Lo = Tmp0.getValue(0);

  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);

  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);

  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);

}


SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{

  MachineFunction &MF = DAG.getMachineFunction();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  MFI.setReturnAddressIsTaken(true);


  EVT VT = Op.getValueType();

  SDLoc dl(Op);

  unsigned Depth = Op.getConstantOperandVal(0);

  if (Depth) {

    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);

    SDValue Offset = DAG.getConstant(4, dl, MVT::i32);

    return DAG.getLoad(VT, dl, DAG.getEntryNode(),

                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),

                       MachinePointerInfo());

  }


  // Return LR, which contains the return address. Mark it an implicit live-in.

  Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));

  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);

}


SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {

  const ARMBaseRegisterInfo &ARI =

    *static_cast<const ARMBaseRegisterInfo*>(RegInfo);

  MachineFunction &MF = DAG.getMachineFunction();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  MFI.setFrameAddressIsTaken(true);


  EVT VT = Op.getValueType();

  SDLoc dl(Op);  // FIXME probably not meaningful

  unsigned Depth = Op.getConstantOperandVal(0);

  Register FrameReg = ARI.getFrameRegister(MF);

  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);

  while (Depth--)

    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,

                            MachinePointerInfo());

  return FrameAddr;

}


// FIXME? Maybe this could be a TableGen attribute on some registers and

// this table could be generated automatically from RegInfo.

Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,

                                              const MachineFunction &MF) const {

  return StringSwitch<Register>(RegName)

      .Case("sp", ARM::SP)

      .Default(Register());

}


// Result is 64 bit value so split into two 32 bit values and return as a

// pair of values.


static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,

                                SelectionDAG &DAG) {

  SDLoc DL(N);


  // This function is only supposed to be called for i64 type destination.

  assert(N->getValueType(0) == MVT::i64

          && "ExpandREAD_REGISTER called for non-i64 type result.");


  SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,

                             DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),

                             N->getOperand(0),

                             N->getOperand(1));


  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),

                    Read.getValue(1)));

  Results.push_back(Read.getValue(2)); // Chain

}


/// \p BC is a bitcast that is about to be turned into a VMOVDRR.

/// When \p DstVT, the destination type of \p BC, is on the vector

/// register bank and the source of bitcast, \p Op, operates on the same bank,

/// it might be possible to combine them, such that everything stays on the

/// vector register bank.

/// \p return The node that would replace \p BT, if the combine

/// is possible.


static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,

                                                SelectionDAG &DAG) {

  SDValue Op = BC->getOperand(0);

  EVT DstVT = BC->getValueType(0);


  // The only vector instruction that can produce a scalar (remember,

  // since the bitcast was about to be turned into VMOVDRR, the source

  // type is i64) from a vector is EXTRACT_VECTOR_ELT.

  // Moreover, we can do this combine only if there is one use.

  // Finally, if the destination type is not a vector, there is not

  // much point on forcing everything on the vector bank.

  if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||

      !Op.hasOneUse())

    return SDValue();


  // If the index is not constant, we will introduce an additional

  // multiply that will stick.

  // Give up in that case.

  ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));

  if (!Index)

    return SDValue();

  unsigned DstNumElt = DstVT.getVectorNumElements();


  // Compute the new index.

  const APInt &APIntIndex = Index->getAPIntValue();

  APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);

  NewIndex *= APIntIndex;

  // Check if the new constant index fits into i32.

  if (NewIndex.getBitWidth() > 32)

    return SDValue();


  // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->

  // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)

  SDLoc dl(Op);

  SDValue ExtractSrc = Op.getOperand(0);

  EVT VecVT = EVT::getVectorVT(

      *DAG.getContext(), DstVT.getScalarType(),

      ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);

  SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);

  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,

                     DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));

}


/// ExpandBITCAST - If the target supports VFP, this function is called to

/// expand a bit convert where either the source or destination type is i64 to

/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64

/// operand type is illegal (e.g., v2f32 for a target that doesn't support

/// vectors), since the legalizer won't know what to do with that.

SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,

                                         const ARMSubtarget *Subtarget) const {

  SDLoc dl(N);

  SDValue Op = N->getOperand(0);


  // This function is only supposed to be called for i16 and i64 types, either

  // as the source or destination of the bit convert.

  EVT SrcVT = Op.getValueType();

  EVT DstVT = N->getValueType(0);


  if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&

      (DstVT == MVT::f16 || DstVT == MVT::bf16))

    return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),

                     DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));


  if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&

      (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {

    if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())

      Op = DAG.getBitcast(MVT::f16, Op);

    return DAG.getNode(

        ISD::TRUNCATE, SDLoc(N), DstVT,

        MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));

  }


  if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))

    return SDValue();


  // Turn i64->f64 into VMOVDRR.

  if (SrcVT == MVT::i64 && isTypeLegal(DstVT)) {

    // Do not force values to GPRs (this is what VMOVDRR does for the inputs)

    // if we can combine the bitcast with its source.

    if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))

      return Val;

    SDValue Lo, Hi;

    std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);

    return DAG.getNode(ISD::BITCAST, dl, DstVT,

                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));

  }


  // Turn f64->i64 into VMOVRRD.

  if (DstVT == MVT::i64 && isTypeLegal(SrcVT)) {

    SDValue Cvt;

    if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&

        SrcVT.getVectorNumElements() > 1)

      Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,

                        DAG.getVTList(MVT::i32, MVT::i32),

                        DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));

    else

      Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,

                        DAG.getVTList(MVT::i32, MVT::i32), Op);

    // Merge the pieces into a single i64 value.

    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));

  }


  return SDValue();

}


/// getZeroVector - Returns a vector of specified type with all zero elements.

/// Zero vectors are used to represent vector negation and in those cases

/// will be implemented with the NEON VNEG instruction.  However, VNEG does

/// not support i64 elements, so sometimes the zero vectors will need to be

/// explicitly constructed.  Regardless, use a canonical VMOV to create the

/// zero vector.


static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {

  assert(VT.isVector() && "Expected a vector type");

  // The canonical modified immediate encoding of a zero vector is....0!

  SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);

  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;

  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);

  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);

}


/// LowerShiftRightParts - Lower SRA_PARTS, which returns two

/// i32 values and take a 2 x i32 value to shift plus a shift amount.

SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,

                                                SelectionDAG &DAG) const {

  assert(Op.getNumOperands() == 3 && "Not a double-shift!");

  EVT VT = Op.getValueType();

  unsigned VTBits = VT.getSizeInBits();

  SDLoc dl(Op);

  SDValue ShOpLo = Op.getOperand(0);

  SDValue ShOpHi = Op.getOperand(1);

  SDValue ShAmt  = Op.getOperand(2);

  SDValue ARMcc;

  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;


  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);


  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,

                                 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);

  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);

  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,

                                   DAG.getConstant(VTBits, dl, MVT::i32));

  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);

  SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);

  SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);

  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),

                            ISD::SETGE, ARMcc, DAG, dl);

  SDValue Lo =

      DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);


  SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);

  SDValue HiBigShift = Opc == ISD::SRA

                           ? DAG.getNode(Opc, dl, VT, ShOpHi,

                                         DAG.getConstant(VTBits - 1, dl, VT))

                           : DAG.getConstant(0, dl, VT);

  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),

                            ISD::SETGE, ARMcc, DAG, dl);

  SDValue Hi =

      DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);


  SDValue Ops[2] = { Lo, Hi };

  return DAG.getMergeValues(Ops, dl);

}


/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two

/// i32 values and take a 2 x i32 value to shift plus a shift amount.

SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,

                                               SelectionDAG &DAG) const {

  assert(Op.getNumOperands() == 3 && "Not a double-shift!");

  EVT VT = Op.getValueType();

  unsigned VTBits = VT.getSizeInBits();

  SDLoc dl(Op);

  SDValue ShOpLo = Op.getOperand(0);

  SDValue ShOpHi = Op.getOperand(1);

  SDValue ShAmt  = Op.getOperand(2);

  SDValue ARMcc;


  assert(Op.getOpcode() == ISD::SHL_PARTS);

  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,

                                 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);

  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);

  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);

  SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);


  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,

                                   DAG.getConstant(VTBits, dl, MVT::i32));

  SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);

  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),

                            ISD::SETGE, ARMcc, DAG, dl);

  SDValue Hi =

      DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);


  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),

                          ISD::SETGE, ARMcc, DAG, dl);

  SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);

  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,

                           DAG.getConstant(0, dl, VT), ARMcc, CmpLo);


  SDValue Ops[2] = { Lo, Hi };

  return DAG.getMergeValues(Ops, dl);

}


SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,

                                             SelectionDAG &DAG) const {

  // The rounding mode is in bits 23:22 of the FPSCR.

  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0

  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)

  // so that the shift + and get folded into a bitfield extract.

  SDLoc dl(Op);

  SDValue Chain = Op.getOperand(0);

  SDValue Ops[] = {Chain,

                   DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};


  SDValue FPSCR =

      DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);

  Chain = FPSCR.getValue(1);

  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,

                                  DAG.getConstant(1U << 22, dl, MVT::i32));

  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,

                              DAG.getConstant(22, dl, MVT::i32));

  SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,

                            DAG.getConstant(3, dl, MVT::i32));

  return DAG.getMergeValues({And, Chain}, dl);

}


SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,

                                             SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue RMValue = Op->getOperand(1);


  // The rounding mode is in bits 23:22 of the FPSCR.

  // The llvm.set.rounding argument value to ARM rounding mode value mapping

  // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is

  // ((arg - 1) & 3) << 22).

  //

  // It is expected that the argument of llvm.set.rounding is within the

  // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is

  // responsibility of the code generated llvm.set.rounding to ensure this

  // condition.


  // Calculate new value of FPSCR[23:22].

  RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,

                        DAG.getConstant(1, DL, MVT::i32));

  RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,

                        DAG.getConstant(0x3, DL, MVT::i32));

  RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,

                        DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));


  // Get current value of FPSCR.

  SDValue Ops[] = {Chain,

                   DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};

  SDValue FPSCR =

      DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);

  Chain = FPSCR.getValue(1);

  FPSCR = FPSCR.getValue(0);


  // Put new rounding mode into FPSCR[23:22].

  const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);

  FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,

                      DAG.getConstant(RMMask, DL, MVT::i32));

  FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);

  SDValue Ops2[] = {

      Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};

  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);

}


SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,

                                           SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);

  SDValue Mode = Op->getOperand(1);


  // Generate nodes to build:

  // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)

  SDValue Ops[] = {Chain,

                   DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};

  SDValue FPSCR =

      DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);

  Chain = FPSCR.getValue(1);

  FPSCR = FPSCR.getValue(0);


  SDValue FPSCRMasked =

      DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,

                  DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));

  SDValue InputMasked =

      DAG.getNode(ISD::AND, DL, MVT::i32, Mode,

                  DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));

  FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);


  SDValue Ops2[] = {

      Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};

  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);

}


SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,

                                             SelectionDAG &DAG) const {

  SDLoc DL(Op);

  SDValue Chain = Op->getOperand(0);


  // To get the default FP mode all control bits are cleared:

  // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)

  SDValue Ops[] = {Chain,

                   DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};

  SDValue FPSCR =

      DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);

  Chain = FPSCR.getValue(1);

  FPSCR = FPSCR.getValue(0);


  SDValue FPSCRMasked = DAG.getNode(

      ISD::AND, DL, MVT::i32, FPSCR,

      DAG.getConstant(ARM::FPStatusBits | ARM::FPReservedBits, DL, MVT::i32));

  SDValue Ops2[] = {Chain,

                    DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),

                    FPSCRMasked};

  return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);

}


static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,

                         const ARMSubtarget *ST) {

  SDLoc dl(N);

  EVT VT = N->getValueType(0);

  if (VT.isVector() && ST->hasNEON()) {


    // Compute the least significant set bit: LSB = X & -X

    SDValue X = N->getOperand(0);

    SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);

    SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);


    EVT ElemTy = VT.getVectorElementType();


    if (ElemTy == MVT::i8) {

      // Compute with: cttz(x) = ctpop(lsb - 1)

      SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,

                                DAG.getTargetConstant(1, dl, ElemTy));

      SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);

      return DAG.getNode(ISD::CTPOP, dl, VT, Bits);

    }


    if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&

        (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {

      // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0

      unsigned NumBits = ElemTy.getSizeInBits();

      SDValue WidthMinus1 =

          DAG.getNode(ARMISD::VMOVIMM, dl, VT,

                      DAG.getTargetConstant(NumBits - 1, dl, ElemTy));

      SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);

      return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);

    }


    // Compute with: cttz(x) = ctpop(lsb - 1)


    // Compute LSB - 1.

    SDValue Bits;

    if (ElemTy == MVT::i64) {

      // Load constant 0xffff'ffff'ffff'ffff to register.

      SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,

                               DAG.getTargetConstant(0x1eff, dl, MVT::i32));

      Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);

    } else {

      SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,

                                DAG.getTargetConstant(1, dl, ElemTy));

      Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);

    }

    return DAG.getNode(ISD::CTPOP, dl, VT, Bits);

  }


  if (!ST->hasV6T2Ops())

    return SDValue();


  SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));

  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);

}


static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,

                          const ARMSubtarget *ST) {

  EVT VT = N->getValueType(0);

  SDLoc DL(N);


  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");

  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||

          VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&

         "Unexpected type for custom ctpop lowering");


  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;

  SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));

  Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);


  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.

  unsigned EltSize = 8;

  unsigned NumElts = VT.is64BitVector() ? 8 : 16;

  while (EltSize != VT.getScalarSizeInBits()) {

    SmallVector<SDValue, 8> Ops;

    Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,

                                  TLI.getPointerTy(DAG.getDataLayout())));

    Ops.push_back(Res);


    EltSize *= 2;

    NumElts /= 2;

    MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);

    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);

  }


  return Res;

}


/// Getvshiftimm - Check if this is a valid build_vector for the immediate

/// operand of a vector shift operation, where all the elements of the

/// build_vector must have the same constant integer value.


static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {

  // Ignore bit_converts.

  while (Op.getOpcode() == ISD::BITCAST)

    Op = Op.getOperand(0);

  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());

  APInt SplatBits, SplatUndef;

  unsigned SplatBitSize;

  bool HasAnyUndefs;

  if (!BVN ||

      !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,

                            ElementBits) ||

      SplatBitSize > ElementBits)

    return false;

  Cnt = SplatBits.getSExtValue();

  return true;

}


/// isVShiftLImm - Check if this is a valid build_vector for the immediate

/// operand of a vector shift left operation.  That value must be in the range:

///   0 <= Value < ElementBits for a left shift; or

///   0 <= Value <= ElementBits for a long left shift.


static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {

  assert(VT.isVector() && "vector shift count is not a vector type");

  int64_t ElementBits = VT.getScalarSizeInBits();

  if (!getVShiftImm(Op, ElementBits, Cnt))

    return false;

  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);

}


/// isVShiftRImm - Check if this is a valid build_vector for the immediate

/// operand of a vector shift right operation.  For a shift opcode, the value

/// is positive, but for an intrinsic the value count must be negative. The

/// absolute value must be in the range:

///   1 <= |Value| <= ElementBits for a right shift; or

///   1 <= |Value| <= ElementBits/2 for a narrow right shift.


static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,

                         int64_t &Cnt) {

  assert(VT.isVector() && "vector shift count is not a vector type");

  int64_t ElementBits = VT.getScalarSizeInBits();

  if (!getVShiftImm(Op, ElementBits, Cnt))

    return false;

  if (!isIntrinsic)

    return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));

  if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {

    Cnt = -Cnt;

    return true;

  }

  return false;

}


static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,

                          const ARMSubtarget *ST) {

  EVT VT = N->getValueType(0);

  SDLoc dl(N);

  int64_t Cnt;


  if (!VT.isVector())

    return SDValue();


  // We essentially have two forms here. Shift by an immediate and shift by a

  // vector register (there are also shift by a gpr, but that is just handled

  // with a tablegen pattern). We cannot easily match shift by an immediate in

  // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.

  // For shifting by a vector, we don't have VSHR, only VSHL (which can be

  // signed or unsigned, and a negative shift indicates a shift right).

  if (N->getOpcode() == ISD::SHL) {

    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))

      return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),

                         DAG.getConstant(Cnt, dl, MVT::i32));

    return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),

                       N->getOperand(1));

  }


  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&

         "unexpected vector shift opcode");


  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {

    unsigned VShiftOpc =

        (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);

    return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),

                       DAG.getConstant(Cnt, dl, MVT::i32));

  }


  // Other right shifts we don't have operations for (we use a shift left by a

  // negative number).

  EVT ShiftVT = N->getOperand(1).getValueType();

  SDValue NegatedCount = DAG.getNode(

      ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));

  unsigned VShiftOpc =

      (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);

  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);

}


static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,

                                const ARMSubtarget *ST) {

  EVT VT = N->getValueType(0);

  SDLoc dl(N);


  // We can get here for a node like i32 = ISD::SHL i32, i64

  if (VT != MVT::i64)

    return SDValue();


  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||

          N->getOpcode() == ISD::SHL) &&

         "Unknown shift to lower!");


  unsigned ShOpc = N->getOpcode();

  if (ST->hasMVEIntegerOps()) {

    SDValue ShAmt = N->getOperand(1);

    unsigned ShPartsOpc = ARMISD::LSLL;

    ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);


    // If the shift amount is greater than 32 or has a greater bitwidth than 64

    // then do the default optimisation

    if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||

        (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))

      return SDValue();


    // Extract the lower 32 bits of the shift amount if it's not an i32

    if (ShAmt->getValueType(0) != MVT::i32)

      ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);


    if (ShOpc == ISD::SRL) {

      if (!Con)

        // There is no t2LSRLr instruction so negate and perform an lsll if the

        // shift amount is in a register, emulating a right shift.

        ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,

                            DAG.getConstant(0, dl, MVT::i32), ShAmt);

      else

        // Else generate an lsrl on the immediate shift amount

        ShPartsOpc = ARMISD::LSRL;

    } else if (ShOpc == ISD::SRA)

      ShPartsOpc = ARMISD::ASRL;


    // Split Lower/Upper 32 bits of the destination/source

    SDValue Lo, Hi;

    std::tie(Lo, Hi) =

        DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);

    // Generate the shift operation as computed above

    Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,

                     ShAmt);

    // The upper 32 bits come from the second return value of lsll

    Hi = SDValue(Lo.getNode(), 1);

    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);

  }


  // We only lower SRA, SRL of 1 here, all others use generic lowering.

  if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)

    return SDValue();


  // If we are in thumb mode, we don't have RRX.

  if (ST->isThumb1Only())

    return SDValue();


  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.

  SDValue Lo, Hi;

  std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);


  // First, build a LSRS1/ASRS1 op, which shifts the top part by one and

  // captures the shifted out bit into a carry flag.

  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;

  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);


  // The low part is an ARMISD::RRX operand, which shifts the carry in.

  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));


  // Merge the pieces into a single i64 value.

 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);

}


static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,

                           const ARMSubtarget *ST) {

  bool Invert = false;

  bool Swap = false;

  unsigned Opc = ARMCC::AL;


  SDValue Op0 = Op.getOperand(0);

  SDValue Op1 = Op.getOperand(1);

  SDValue CC = Op.getOperand(2);

  EVT VT = Op.getValueType();

  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();

  SDLoc dl(Op);


  EVT CmpVT;

  if (ST->hasNEON())

    CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();

  else {

    assert(ST->hasMVEIntegerOps() &&

           "No hardware support for integer vector comparison!");


    if (Op.getValueType().getVectorElementType() != MVT::i1)

      return SDValue();


    // Make sure we expand floating point setcc to scalar if we do not have

    // mve.fp, so that we can handle them from there.

    if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())

      return SDValue();


    CmpVT = VT;

  }


  if (Op0.getValueType().getVectorElementType() == MVT::i64 &&

      (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {

    // Special-case integer 64-bit equality comparisons. They aren't legal,

    // but they can be lowered with a few vector instructions.

    unsigned CmpElements = CmpVT.getVectorNumElements() * 2;

    EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);

    SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);

    SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);

    SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,

                              DAG.getCondCode(ISD::SETEQ));

    SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);

    SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);

    Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);

    if (SetCCOpcode == ISD::SETNE)

      Merged = DAG.getNOT(dl, Merged, CmpVT);

    Merged = DAG.getSExtOrTrunc(Merged, dl, VT);

    return Merged;

  }


  if (CmpVT.getVectorElementType() == MVT::i64)

    // 64-bit comparisons are not legal in general.

    return SDValue();


  if (Op1.getValueType().isFloatingPoint()) {

    switch (SetCCOpcode) {

    default: llvm_unreachable("Illegal FP comparison");

    case ISD::SETUNE:

    case ISD::SETNE:

      if (ST->hasMVEFloatOps()) {

        Opc = ARMCC::NE; break;

      } else {

        Invert = true; [[fallthrough]];

      }

    case ISD::SETOEQ:

    case ISD::SETEQ:  Opc = ARMCC::EQ; break;

    case ISD::SETOLT:

    case ISD::SETLT: Swap = true; [[fallthrough]];

    case ISD::SETOGT:

    case ISD::SETGT:  Opc = ARMCC::GT; break;

    case ISD::SETOLE:

    case ISD::SETLE:  Swap = true; [[fallthrough]];

    case ISD::SETOGE:

    case ISD::SETGE: Opc = ARMCC::GE; break;

    case ISD::SETUGE: Swap = true; [[fallthrough]];

    case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;

    case ISD::SETUGT: Swap = true; [[fallthrough]];

    case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;

    case ISD::SETUEQ: Invert = true; [[fallthrough]];

    case ISD::SETONE: {

      // Expand this to (OLT | OGT).

      SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,

                                   DAG.getConstant(ARMCC::GT, dl, MVT::i32));

      SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,

                                   DAG.getConstant(ARMCC::GT, dl, MVT::i32));

      SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);

      if (Invert)

        Result = DAG.getNOT(dl, Result, VT);

      return Result;

    }

    case ISD::SETUO: Invert = true; [[fallthrough]];

    case ISD::SETO: {

      // Expand this to (OLT | OGE).

      SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,

                                   DAG.getConstant(ARMCC::GT, dl, MVT::i32));

      SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,

                                   DAG.getConstant(ARMCC::GE, dl, MVT::i32));

      SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);

      if (Invert)

        Result = DAG.getNOT(dl, Result, VT);

      return Result;

    }

    }

  } else {

    // Integer comparisons.

    switch (SetCCOpcode) {

    default: llvm_unreachable("Illegal integer comparison");

    case ISD::SETNE:

      if (ST->hasMVEIntegerOps()) {

        Opc = ARMCC::NE; break;

      } else {

        Invert = true; [[fallthrough]];

      }

    case ISD::SETEQ:  Opc = ARMCC::EQ; break;

    case ISD::SETLT:  Swap = true; [[fallthrough]];

    case ISD::SETGT:  Opc = ARMCC::GT; break;

    case ISD::SETLE:  Swap = true; [[fallthrough]];

    case ISD::SETGE:  Opc = ARMCC::GE; break;

    case ISD::SETULT: Swap = true; [[fallthrough]];

    case ISD::SETUGT: Opc = ARMCC::HI; break;

    case ISD::SETULE: Swap = true; [[fallthrough]];

    case ISD::SETUGE: Opc = ARMCC::HS; break;

    }


    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).

    if (ST->hasNEON() && Opc == ARMCC::EQ) {

      SDValue AndOp;

      if (ISD::isBuildVectorAllZeros(Op1.getNode()))

        AndOp = Op0;

      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))

        AndOp = Op1;


      // Ignore bitconvert.

      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)

        AndOp = AndOp.getOperand(0);


      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {

        Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));

        Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));

        SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);

        if (!Invert)

          Result = DAG.getNOT(dl, Result, VT);

        return Result;

      }

    }

  }


  if (Swap)

    std::swap(Op0, Op1);


  // If one of the operands is a constant vector zero, attempt to fold the

  // comparison to a specialized compare-against-zero form.

  if (ISD::isBuildVectorAllZeros(Op0.getNode()) &&

      (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||

       Opc == ARMCC::NE)) {

    if (Opc == ARMCC::GE)

      Opc = ARMCC::LE;

    else if (Opc == ARMCC::GT)

      Opc = ARMCC::LT;

    std::swap(Op0, Op1);

  }


  SDValue Result;

  if (ISD::isBuildVectorAllZeros(Op1.getNode()) &&

      (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||

       Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))

    Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,

                         DAG.getConstant(Opc, dl, MVT::i32));

  else

    Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,

                         DAG.getConstant(Opc, dl, MVT::i32));


  Result = DAG.getSExtOrTrunc(Result, dl, VT);


  if (Invert)

    Result = DAG.getNOT(dl, Result, VT);


  return Result;

}


static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {

  SDValue LHS = Op.getOperand(0);

  SDValue RHS = Op.getOperand(1);

  SDValue Carry = Op.getOperand(2);

  SDValue Cond = Op.getOperand(3);

  SDLoc DL(Op);


  assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");


  // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we

  // have to invert the carry first.

  Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,

                      DAG.getConstant(1, DL, MVT::i32), Carry);

  // This converts the boolean value carry into the carry flag.

  Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);


  SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);

  SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);


  SDValue FVal = DAG.getConstant(0, DL, MVT::i32);

  SDValue TVal = DAG.getConstant(1, DL, MVT::i32);

  SDValue ARMcc = DAG.getConstant(

      IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);

  return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,

                     Cmp.getValue(1));

}


/// isVMOVModifiedImm - Check if the specified splat value corresponds to a

/// valid vector constant for a NEON or MVE instruction with a "modified

/// immediate" operand (e.g., VMOV).  If so, return the encoded value.


static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,

                                 unsigned SplatBitSize, SelectionDAG &DAG,

                                 const SDLoc &dl, EVT &VT, EVT VectorVT,

                                 VMOVModImmType type) {

  unsigned OpCmode, Imm;

  bool is128Bits = VectorVT.is128BitVector();


  // SplatBitSize is set to the smallest size that splats the vector, so a

  // zero vector will always have SplatBitSize == 8.  However, NEON modified

  // immediate instructions others than VMOV do not support the 8-bit encoding

  // of a zero vector, and the default encoding of zero is supposed to be the

  // 32-bit version.

  if (SplatBits == 0)

    SplatBitSize = 32;


  switch (SplatBitSize) {

  case 8:

    if (type != VMOVModImm)

      return SDValue();

    // Any 1-byte value is OK.  Op=0, Cmode=1110.

    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");

    OpCmode = 0xe;

    Imm = SplatBits;

    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;

    break;


  case 16:

    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.

    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;

    if ((SplatBits & ~0xff) == 0) {

      // Value = 0x00nn: Op=x, Cmode=100x.

      OpCmode = 0x8;

      Imm = SplatBits;

      break;

    }

    if ((SplatBits & ~0xff00) == 0) {

      // Value = 0xnn00: Op=x, Cmode=101x.

      OpCmode = 0xa;

      Imm = SplatBits >> 8;

      break;

    }

    return SDValue();


  case 32:

    // NEON's 32-bit VMOV supports splat values where:

    // * only one byte is nonzero, or

    // * the least significant byte is 0xff and the second byte is nonzero, or

    // * the least significant 2 bytes are 0xff and the third is nonzero.

    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;

    if ((SplatBits & ~0xff) == 0) {

      // Value = 0x000000nn: Op=x, Cmode=000x.

      OpCmode = 0;

      Imm = SplatBits;

      break;

    }

    if ((SplatBits & ~0xff00) == 0) {

      // Value = 0x0000nn00: Op=x, Cmode=001x.

      OpCmode = 0x2;

      Imm = SplatBits >> 8;

      break;

    }

    if ((SplatBits & ~0xff0000) == 0) {

      // Value = 0x00nn0000: Op=x, Cmode=010x.

      OpCmode = 0x4;

      Imm = SplatBits >> 16;

      break;

    }

    if ((SplatBits & ~0xff000000) == 0) {

      // Value = 0xnn000000: Op=x, Cmode=011x.

      OpCmode = 0x6;

      Imm = SplatBits >> 24;

      break;

    }


    // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC

    if (type == OtherModImm) return SDValue();


    if ((SplatBits & ~0xffff) == 0 &&

        ((SplatBits | SplatUndef) & 0xff) == 0xff) {

      // Value = 0x0000nnff: Op=x, Cmode=1100.

      OpCmode = 0xc;

      Imm = SplatBits >> 8;

      break;

    }


    // cmode == 0b1101 is not supported for MVE VMVN

    if (type == MVEVMVNModImm)

      return SDValue();


    if ((SplatBits & ~0xffffff) == 0 &&

        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {

      // Value = 0x00nnffff: Op=x, Cmode=1101.

      OpCmode = 0xd;

      Imm = SplatBits >> 16;

      break;

    }


    // Note: there are a few 32-bit splat values (specifically: 00ffff00,

    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not

    // VMOV.I32.  A (very) minor optimization would be to replicate the value

    // and fall through here to test for a valid 64-bit splat.  But, then the

    // caller would also need to check and handle the change in size.

    return SDValue();


  case 64: {

    if (type != VMOVModImm)

      return SDValue();

    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.

    uint64_t BitMask = 0xff;

    unsigned ImmMask = 1;

    Imm = 0;

    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {

      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {

        Imm |= ImmMask;

      } else if ((SplatBits & BitMask) != 0) {

        return SDValue();

      }

      BitMask <<= 8;

      ImmMask <<= 1;

    }


    // Op=1, Cmode=1110.

    OpCmode = 0x1e;

    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;

    break;

  }


  default:

    llvm_unreachable("unexpected size for isVMOVModifiedImm");

  }


  unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);

  return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);

}


SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,

                                           const ARMSubtarget *ST) const {

  EVT VT = Op.getValueType();

  bool IsDouble = (VT == MVT::f64);

  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);

  const APFloat &FPVal = CFP->getValueAPF();


  // Prevent floating-point constants from using literal loads

  // when execute-only is enabled.

  if (ST->genExecuteOnly()) {

    // We shouldn't trigger this for v6m execute-only

    assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&

           "Unexpected architecture");


    // If we can represent the constant as an immediate, don't lower it

    if (isFPImmLegal(FPVal, VT))

      return Op;

    // Otherwise, construct as integer, and move to float register

    APInt INTVal = FPVal.bitcastToAPInt();

    SDLoc DL(CFP);

    switch (VT.getSimpleVT().SimpleTy) {

      default:

        llvm_unreachable("Unknown floating point type!");

        break;

      case MVT::f64: {

        SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);

        SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);

        return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);

      }

      case MVT::f32:

          return DAG.getNode(ARMISD::VMOVSR, DL, VT,

              DAG.getConstant(INTVal, DL, MVT::i32));

    }

  }


  if (!ST->hasVFP3Base())

    return SDValue();


  // Use the default (constant pool) lowering for double constants when we have

  // an SP-only FPU

  if (IsDouble && !Subtarget->hasFP64())

    return SDValue();


  // Try splatting with a VMOV.f32...

  int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);


  if (ImmVal != -1) {

    if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {

      // We have code in place to select a valid ConstantFP already, no need to

      // do any mangling.

      return Op;

    }


    // It's a float and we are trying to use NEON operations where

    // possible. Lower it to a splat followed by an extract.

    SDLoc DL(Op);

    SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);

    SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,

                                      NewVal);

    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,

                       DAG.getConstant(0, DL, MVT::i32));

  }


  // The rest of our options are NEON only, make sure that's allowed before

  // proceeding..

  if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))

    return SDValue();


  EVT VMovVT;

  uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();


  // It wouldn't really be worth bothering for doubles except for one very

  // important value, which does happen to match: 0.0. So make sure we don't do

  // anything stupid.

  if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))

    return SDValue();


  // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).

  SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),

                                     VMovVT, VT, VMOVModImm);

  if (NewVal != SDValue()) {

    SDLoc DL(Op);

    SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,

                                      NewVal);

    if (IsDouble)

      return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);


    // It's a float: cast and extract a vector element.

    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,

                                       VecConstant);

    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,

                       DAG.getConstant(0, DL, MVT::i32));

  }


  // Finally, try a VMVN.i32

  NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,

                             VT, VMVNModImm);

  if (NewVal != SDValue()) {

    SDLoc DL(Op);

    SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);


    if (IsDouble)

      return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);


    // It's a float: cast and extract a vector element.

    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,

                                       VecConstant);

    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,

                       DAG.getConstant(0, DL, MVT::i32));

  }


  return SDValue();

}


// check if an VEXT instruction can handle the shuffle mask when the

// vector sources of the shuffle are the same.


static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {

  unsigned NumElts = VT.getVectorNumElements();


  // Assume that the first shuffle index is not UNDEF.  Fail if it is.

  if (M[0] < 0)

    return false;


  Imm = M[0];


  // If this is a VEXT shuffle, the immediate value is the index of the first

  // element.  The other shuffle indices must be the successive elements after

  // the first one.

  unsigned ExpectedElt = Imm;

  for (unsigned i = 1; i < NumElts; ++i) {

    // Increment the expected index.  If it wraps around, just follow it

    // back to index zero and keep going.

    ++ExpectedElt;

    if (ExpectedElt == NumElts)

      ExpectedElt = 0;


    if (M[i] < 0) continue; // ignore UNDEF indices

    if (ExpectedElt != static_cast<unsigned>(M[i]))

      return false;

  }


  return true;

}


static bool isVEXTMask(ArrayRef<int> M, EVT VT,

                       bool &ReverseVEXT, unsigned &Imm) {

  unsigned NumElts = VT.getVectorNumElements();

  ReverseVEXT = false;


  // Assume that the first shuffle index is not UNDEF.  Fail if it is.

  if (M[0] < 0)

    return false;


  Imm = M[0];


  // If this is a VEXT shuffle, the immediate value is the index of the first

  // element.  The other shuffle indices must be the successive elements after

  // the first one.

  unsigned ExpectedElt = Imm;

  for (unsigned i = 1; i < NumElts; ++i) {

    // Increment the expected index.  If it wraps around, it may still be

    // a VEXT but the source vectors must be swapped.

    ExpectedElt += 1;

    if (ExpectedElt == NumElts * 2) {

      ExpectedElt = 0;

      ReverseVEXT = true;

    }


    if (M[i] < 0) continue; // ignore UNDEF indices

    if (ExpectedElt != static_cast<unsigned>(M[i]))

      return false;

  }


  // Adjust the index value if the source operands will be swapped.

  if (ReverseVEXT)

    Imm -= NumElts;


  return true;

}


static bool isVTBLMask(ArrayRef<int> M, EVT VT) {

  // We can handle <8 x i8> vector shuffles. If the index in the mask is out of

  // range, then 0 is placed into the resulting vector. So pretty much any mask

  // of 8 elements can work here.

  return VT == MVT::v8i8 && M.size() == 8;

}


static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,

                               unsigned Index) {

  if (Mask.size() == Elements * 2)

    return Index / Elements;

  return Mask[Index] == 0 ? 0 : 1;

}


// Checks whether the shuffle mask represents a vector transpose (VTRN) by

// checking that pairs of elements in the shuffle mask represent the same index

// in each vector, incrementing the expected index by 2 at each step.

// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]

//  v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}

//  v2={e,f,g,h}

// WhichResult gives the offset for each element in the mask based on which

// of the two results it belongs to.

//

// The transpose can be represented either as:

// result1 = shufflevector v1, v2, result1_shuffle_mask

// result2 = shufflevector v1, v2, result2_shuffle_mask

// where v1/v2 and the shuffle masks have the same number of elements

// (here WhichResult (see below) indicates which result is being checked)

//

// or as:

// results = shufflevector v1, v2, shuffle_mask

// where both results are returned in one vector and the shuffle mask has twice

// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we

// want to check the low half and high half of the shuffle mask as if it were

// the other case


static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {

  unsigned EltSz = VT.getScalarSizeInBits();

  if (EltSz == 64)

    return false;


  unsigned NumElts = VT.getVectorNumElements();

  if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)

    return false;


  // If the mask is twice as long as the input vector then we need to check the

  // upper and lower parts of the mask with a matching value for WhichResult

  // FIXME: A mask with only even values will be rejected in case the first

  // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only

  // M[0] is used to determine WhichResult

  for (unsigned i = 0; i < M.size(); i += NumElts) {

    WhichResult = SelectPairHalf(NumElts, M, i);

    for (unsigned j = 0; j < NumElts; j += 2) {

      if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||

          (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))

        return false;

    }

  }


  if (M.size() == NumElts*2)

    WhichResult = 0;


  return true;

}


/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of

/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".

/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.


static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){

  unsigned EltSz = VT.getScalarSizeInBits();

  if (EltSz == 64)

    return false;


  unsigned NumElts = VT.getVectorNumElements();

  if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)

    return false;


  for (unsigned i = 0; i < M.size(); i += NumElts) {

    WhichResult = SelectPairHalf(NumElts, M, i);

    for (unsigned j = 0; j < NumElts; j += 2) {

      if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||

          (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))

        return false;

    }

  }


  if (M.size() == NumElts*2)

    WhichResult = 0;


  return true;

}


// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking

// that the mask elements are either all even and in steps of size 2 or all odd

// and in steps of size 2.

// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]

//  v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}

//  v2={e,f,g,h}

// Requires similar checks to that of isVTRNMask with

// respect the how results are returned.


static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {

  unsigned EltSz = VT.getScalarSizeInBits();

  if (EltSz == 64)

    return false;


  unsigned NumElts = VT.getVectorNumElements();

  if (M.size() != NumElts && M.size() != NumElts*2)

    return false;


  for (unsigned i = 0; i < M.size(); i += NumElts) {

    WhichResult = SelectPairHalf(NumElts, M, i);

    for (unsigned j = 0; j < NumElts; ++j) {

      if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)

        return false;

    }

  }


  if (M.size() == NumElts*2)

    WhichResult = 0;


  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.

  if (VT.is64BitVector() && EltSz == 32)

    return false;


  return true;

}


/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of

/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".

/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,


static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){

  unsigned EltSz = VT.getScalarSizeInBits();

  if (EltSz == 64)

    return false;


  unsigned NumElts = VT.getVectorNumElements();

  if (M.size() != NumElts && M.size() != NumElts*2)

    return false;


  unsigned Half = NumElts / 2;

  for (unsigned i = 0; i < M.size(); i += NumElts) {

    WhichResult = SelectPairHalf(NumElts, M, i);

    for (unsigned j = 0; j < NumElts; j += Half) {

      unsigned Idx = WhichResult;

      for (unsigned k = 0; k < Half; ++k) {

        int MIdx = M[i + j + k];

        if (MIdx >= 0 && (unsigned) MIdx != Idx)

          return false;

        Idx += 2;

      }

    }

  }


  if (M.size() == NumElts*2)

    WhichResult = 0;


  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.

  if (VT.is64BitVector() && EltSz == 32)

    return false;


  return true;

}


// Checks whether the shuffle mask represents a vector zip (VZIP) by checking

// that pairs of elements of the shufflemask represent the same index in each

// vector incrementing sequentially through the vectors.

// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]

//  v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}

//  v2={e,f,g,h}

// Requires similar checks to that of isVTRNMask with respect the how results

// are returned.


static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {

  unsigned EltSz = VT.getScalarSizeInBits();

  if (EltSz == 64)

    return false;


  unsigned NumElts = VT.getVectorNumElements();

  if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)

    return false;


  for (unsigned i = 0; i < M.size(); i += NumElts) {

    WhichResult = SelectPairHalf(NumElts, M, i);

    unsigned Idx = WhichResult * NumElts / 2;

    for (unsigned j = 0; j < NumElts; j += 2) {

      if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||

          (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))

        return false;

      Idx += 1;

    }

  }


  if (M.size() == NumElts*2)

    WhichResult = 0;


  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.

  if (VT.is64BitVector() && EltSz == 32)

    return false;


  return true;

}


/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of

/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".

/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.


static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){

  unsigned EltSz = VT.getScalarSizeInBits();

  if (EltSz == 64)

    return false;


  unsigned NumElts = VT.getVectorNumElements();

  if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)

    return false;


  for (unsigned i = 0; i < M.size(); i += NumElts) {

    WhichResult = SelectPairHalf(NumElts, M, i);

    unsigned Idx = WhichResult * NumElts / 2;

    for (unsigned j = 0; j < NumElts; j += 2) {

      if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||

          (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))

        return false;

      Idx += 1;

    }

  }


  if (M.size() == NumElts*2)

    WhichResult = 0;


  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.

  if (VT.is64BitVector() && EltSz == 32)

    return false;


  return true;

}


/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),

/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.


static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,

                                           unsigned &WhichResult,

                                           bool &isV_UNDEF) {

  isV_UNDEF = false;

  if (isVTRNMask(ShuffleMask, VT, WhichResult))

    return ARMISD::VTRN;

  if (isVUZPMask(ShuffleMask, VT, WhichResult))

    return ARMISD::VUZP;

  if (isVZIPMask(ShuffleMask, VT, WhichResult))

    return ARMISD::VZIP;


  isV_UNDEF = true;

  if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))

    return ARMISD::VTRN;

  if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))

    return ARMISD::VUZP;

  if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))

    return ARMISD::VZIP;


  return 0;

}


/// \return true if this is a reverse operation on an vector.


static bool isReverseMask(ArrayRef<int> M, EVT VT) {

  unsigned NumElts = VT.getVectorNumElements();

  // Make sure the mask has the right size.

  if (NumElts != M.size())

      return false;


  // Look for <15, ..., 3, -1, 1, 0>.

  for (unsigned i = 0; i != NumElts; ++i)

    if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))

      return false;


  return true;

}


static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {

  unsigned NumElts = VT.getVectorNumElements();

  // Make sure the mask has the right size.

  if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))

    return false;


  // Half-width truncation patterns (e.g. v4i32 -> v8i16):

  // !Top &&  SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>

  // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>

  //  Top &&  SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>

  //  Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>

  int Ofs = Top ? 1 : 0;

  int Upper = SingleSource ? 0 : NumElts;

  for (int i = 0, e = NumElts / 2; i != e; ++i) {

    if (M[i] >= 0 && M[i] != (i * 2) + Ofs)

      return false;

    if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)

      return false;

  }

  return true;

}


static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {

  unsigned NumElts = VT.getVectorNumElements();

  // Make sure the mask has the right size.

  if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))

    return false;


  // If Top

  //   Look for <0, N, 2, N+2, 4, N+4, ..>.

  //   This inserts Input2 into Input1

  // else if not Top

  //   Look for <0, N+1, 2, N+3, 4, N+5, ..>

  //   This inserts Input1 into Input2

  unsigned Offset = Top ? 0 : 1;

  unsigned N = SingleSource ? 0 : NumElts;

  for (unsigned i = 0; i < NumElts; i += 2) {

    if (M[i] >= 0 && M[i] != (int)i)

      return false;

    if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))

      return false;

  }


  return true;

}


static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {

  unsigned NumElts = ToVT.getVectorNumElements();

  if (NumElts != M.size())

    return false;


  // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are

  // looking for patterns of:

  // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...

  //  rev: N/2 0 N/2+1 1 N/2+2 2 ...


  unsigned Off0 = rev ? NumElts / 2 : 0;

  unsigned Off1 = rev ? 0 : NumElts / 2;

  for (unsigned i = 0; i < NumElts; i += 2) {

    if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))

      return false;

    if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))

      return false;

  }


  return true;

}


// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted

// from a pair of inputs. For example:

// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),

//             FP_ROUND(EXTRACT_ELT(Y, 0),

//             FP_ROUND(EXTRACT_ELT(X, 1),

//             FP_ROUND(EXTRACT_ELT(Y, 1), ...)


static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG,

                                         const ARMSubtarget *ST) {

  assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");

  if (!ST->hasMVEFloatOps())

    return SDValue();


  SDLoc dl(BV);

  EVT VT = BV.getValueType();

  if (VT != MVT::v8f16)

    return SDValue();


  // We are looking for a buildvector of fptrunc elements, where all the

  // elements are interleavingly extracted from two sources. Check the first two

  // items are valid enough and extract some info from them (they are checked

  // properly in the loop below).

  if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||

      BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||

      BV.getOperand(0).getOperand(0).getConstantOperandVal(1) != 0)

    return SDValue();

  if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||

      BV.getOperand(1).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||

      BV.getOperand(1).getOperand(0).getConstantOperandVal(1) != 0)

    return SDValue();

  SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);

  SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);

  if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)

    return SDValue();


  // Check all the values in the BuildVector line up with our expectations.

  for (unsigned i = 1; i < 4; i++) {

    auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {

      return Trunc.getOpcode() == ISD::FP_ROUND &&

             Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

             Trunc.getOperand(0).getOperand(0) == Op &&

             Trunc.getOperand(0).getConstantOperandVal(1) == Idx;

    };

    if (!Check(BV.getOperand(i * 2 + 0), Op0, i))

      return SDValue();

    if (!Check(BV.getOperand(i * 2 + 1), Op1, i))

      return SDValue();

  }


  SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,

                           DAG.getConstant(0, dl, MVT::i32));

  return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,

                     DAG.getConstant(1, dl, MVT::i32));

}


// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted

// from a single input on alternating lanes. For example:

// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),

//             FP_ROUND(EXTRACT_ELT(X, 2),

//             FP_ROUND(EXTRACT_ELT(X, 4), ...)


static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG,

                                       const ARMSubtarget *ST) {

  assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");

  if (!ST->hasMVEFloatOps())

    return SDValue();


  SDLoc dl(BV);

  EVT VT = BV.getValueType();

  if (VT != MVT::v4f32)

    return SDValue();


  // We are looking for a buildvector of fptext elements, where all the

  // elements are alternating lanes from a single source. For example <0,2,4,6>

  // or <1,3,5,7>. Check the first two items are valid enough and extract some

  // info from them (they are checked properly in the loop below).

  if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||

      BV.getOperand(0).getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)

    return SDValue();

  SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);

  int Offset = BV.getOperand(0).getOperand(0).getConstantOperandVal(1);

  if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))

    return SDValue();


  // Check all the values in the BuildVector line up with our expectations.

  for (unsigned i = 1; i < 4; i++) {

    auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {

      return Trunc.getOpcode() == ISD::FP_EXTEND &&

             Trunc.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

             Trunc.getOperand(0).getOperand(0) == Op &&

             Trunc.getOperand(0).getConstantOperandVal(1) == Idx;

    };

    if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))

      return SDValue();

  }


  return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,

                     DAG.getConstant(Offset, dl, MVT::i32));

}


// If N is an integer constant that can be moved into a register in one

// instruction, return an SDValue of such a constant (will become a MOV

// instruction).  Otherwise return null.


static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,

                                     const ARMSubtarget *ST, const SDLoc &dl) {

  uint64_t Val;

  if (!isa<ConstantSDNode>(N))

    return SDValue();

  Val = N->getAsZExtVal();


  if (ST->isThumb1Only()) {

    if (Val <= 255 || ~Val <= 255)

      return DAG.getConstant(Val, dl, MVT::i32);

  } else {

    if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)

      return DAG.getConstant(Val, dl, MVT::i32);

  }

  return SDValue();

}


static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG,

                                    const ARMSubtarget *ST) {

  SDLoc dl(Op);

  EVT VT = Op.getValueType();


  assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");


  unsigned NumElts = VT.getVectorNumElements();

  unsigned BoolMask;

  unsigned BitsPerBool;

  if (NumElts == 2) {

    BitsPerBool = 8;

    BoolMask = 0xff;

  } else if (NumElts == 4) {

    BitsPerBool = 4;

    BoolMask = 0xf;

  } else if (NumElts == 8) {

    BitsPerBool = 2;

    BoolMask = 0x3;

  } else if (NumElts == 16) {

    BitsPerBool = 1;

    BoolMask = 0x1;

  } else

    return SDValue();


  // If this is a single value copied into all lanes (a splat), we can just sign

  // extend that single value

  SDValue FirstOp = Op.getOperand(0);

  if (!isa<ConstantSDNode>(FirstOp) &&

      llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {

        return U.get().isUndef() || U.get() == FirstOp;

      })) {

    SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,

                              DAG.getValueType(MVT::i1));

    return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);

  }


  // First create base with bits set where known

  unsigned Bits32 = 0;

  for (unsigned i = 0; i < NumElts; ++i) {

    SDValue V = Op.getOperand(i);

    if (!isa<ConstantSDNode>(V) && !V.isUndef())

      continue;

    bool BitSet = V.isUndef() ? false : V->getAsZExtVal();

    if (BitSet)

      Bits32 |= BoolMask << (i * BitsPerBool);

  }


  // Add in unknown nodes

  SDValue Base = DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,

                             DAG.getConstant(Bits32, dl, MVT::i32));

  for (unsigned i = 0; i < NumElts; ++i) {

    SDValue V = Op.getOperand(i);

    if (isa<ConstantSDNode>(V) || V.isUndef())

      continue;

    Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,

                       DAG.getConstant(i, dl, MVT::i32));

  }


  return Base;

}


static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG,

                                        const ARMSubtarget *ST) {

  if (!ST->hasMVEIntegerOps())

    return SDValue();


  // We are looking for a buildvector where each element is Op[0] + i*N

  EVT VT = Op.getValueType();

  SDValue Op0 = Op.getOperand(0);

  unsigned NumElts = VT.getVectorNumElements();


  // Get the increment value from operand 1

  SDValue Op1 = Op.getOperand(1);

  if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||

      !isa<ConstantSDNode>(Op1.getOperand(1)))

    return SDValue();

  unsigned N = Op1.getConstantOperandVal(1);

  if (N != 1 && N != 2 && N != 4 && N != 8)

    return SDValue();


  // Check that each other operand matches

  for (unsigned I = 2; I < NumElts; I++) {

    SDValue OpI = Op.getOperand(I);

    if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||

        !isa<ConstantSDNode>(OpI.getOperand(1)) ||

        OpI.getConstantOperandVal(1) != I * N)

      return SDValue();

  }


  SDLoc DL(Op);

  return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,

                     DAG.getConstant(N, DL, MVT::i32));

}


// Returns true if the operation N can be treated as qr instruction variant at

// operand Op.


static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {

  switch (N->getOpcode()) {

  case ISD::ADD:

  case ISD::MUL:

  case ISD::SADDSAT:

  case ISD::UADDSAT:

  case ISD::AVGFLOORS:

  case ISD::AVGFLOORU:

    return true;

  case ISD::SUB:

  case ISD::SSUBSAT:

  case ISD::USUBSAT:

    return N->getOperand(1).getNode() == Op;

  case ISD::INTRINSIC_WO_CHAIN:

    switch (N->getConstantOperandVal(0)) {

    case Intrinsic::arm_mve_add_predicated:

    case Intrinsic::arm_mve_mul_predicated:

    case Intrinsic::arm_mve_qadd_predicated:

    case Intrinsic::arm_mve_vhadd:

    case Intrinsic::arm_mve_hadd_predicated:

    case Intrinsic::arm_mve_vqdmulh:

    case Intrinsic::arm_mve_qdmulh_predicated:

    case Intrinsic::arm_mve_vqrdmulh:

    case Intrinsic::arm_mve_qrdmulh_predicated:

    case Intrinsic::arm_mve_vqdmull:

    case Intrinsic::arm_mve_vqdmull_predicated:

      return true;

    case Intrinsic::arm_mve_sub_predicated:

    case Intrinsic::arm_mve_qsub_predicated:

    case Intrinsic::arm_mve_vhsub:

    case Intrinsic::arm_mve_hsub_predicated:

      return N->getOperand(2).getNode() == Op;

    default:

      return false;

    }

  default:

    return false;

  }

}


// If this is a case we can't handle, return null and let the default

// expansion code take care of it.

SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,

                                             const ARMSubtarget *ST) const {

  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());

  SDLoc dl(Op);

  EVT VT = Op.getValueType();


  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)

    return LowerBUILD_VECTOR_i1(Op, DAG, ST);


  if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))

    return R;


  APInt SplatBits, SplatUndef;

  unsigned SplatBitSize;

  bool HasAnyUndefs;

  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {

    if (SplatUndef.isAllOnes())

      return DAG.getUNDEF(VT);


    // If all the users of this constant splat are qr instruction variants,

    // generate a vdup of the constant.

    if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&

        (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&

        all_of(BVN->users(),

               [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {

      EVT DupVT = SplatBitSize == 32   ? MVT::v4i32

                  : SplatBitSize == 16 ? MVT::v8i16

                                       : MVT::v16i8;

      SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);

      SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);

      return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);

    }


    if ((ST->hasNEON() && SplatBitSize <= 64) ||

        (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {

      // Check if an immediate VMOV works.

      EVT VmovVT;

      SDValue Val =

          isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),

                            SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);


      if (Val.getNode()) {

        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);

        return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);

      }


      // Try an immediate VMVN.

      uint64_t NegatedImm = (~SplatBits).getZExtValue();

      Val = isVMOVModifiedImm(

          NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,

          VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);

      if (Val.getNode()) {

        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);

        return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);

      }


      // Use vmov.f32 to materialize other v2f32 and v4f32 splats.

      if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {

        int ImmVal = ARM_AM::getFP32Imm(SplatBits);

        if (ImmVal != -1) {

          SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);

          return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);

        }

      }


      // If we are under MVE, generate a VDUP(constant), bitcast to the original

      // type.

      if (ST->hasMVEIntegerOps() &&

          (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {

        EVT DupVT = SplatBitSize == 32   ? MVT::v4i32

                    : SplatBitSize == 16 ? MVT::v8i16

                                         : MVT::v16i8;

        SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);

        SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);

        return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);

      }

    }

  }


  // Scan through the operands to see if only one value is used.

  //

  // As an optimisation, even if more than one value is used it may be more

  // profitable to splat with one value then change some lanes.

  //

  // Heuristically we decide to do this if the vector has a "dominant" value,

  // defined as splatted to more than half of the lanes.

  unsigned NumElts = VT.getVectorNumElements();

  bool isOnlyLowElement = true;

  bool usesOnlyOneValue = true;

  bool hasDominantValue = false;

  bool isConstant = true;


  // Map of the number of times a particular SDValue appears in the

  // element list.

  DenseMap<SDValue, unsigned> ValueCounts;

  SDValue Value;

  for (unsigned i = 0; i < NumElts; ++i) {

    SDValue V = Op.getOperand(i);

    if (V.isUndef())

      continue;

    if (i > 0)

      isOnlyLowElement = false;

    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))

      isConstant = false;


    unsigned &Count = ValueCounts[V];


    // Is this value dominant? (takes up more than half of the lanes)

    if (++Count > (NumElts / 2)) {

      hasDominantValue = true;

      Value = V;

    }

  }

  if (ValueCounts.size() != 1)

    usesOnlyOneValue = false;

  if (!Value.getNode() && !ValueCounts.empty())

    Value = ValueCounts.begin()->first;


  if (ValueCounts.empty())

    return DAG.getUNDEF(VT);


  // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.

  // Keep going if we are hitting this case.

  if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))

    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);


  unsigned EltSize = VT.getScalarSizeInBits();


  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to

  // i32 and try again.

  if (hasDominantValue && EltSize <= 32) {

    if (!isConstant) {

      SDValue N;


      // If we are VDUPing a value that comes directly from a vector, that will

      // cause an unnecessary move to and from a GPR, where instead we could

      // just use VDUPLANE. We can only do this if the lane being extracted

      // is at a constant index, as the VDUP from lane instructions only have

      // constant-index forms.

      ConstantSDNode *constIndex;

      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

          (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {

        // We need to create a new undef vector to use for the VDUPLANE if the

        // size of the vector from which we get the value is different than the

        // size of the vector that we need to create. We will insert the element

        // such that the register coalescer will remove unnecessary copies.

        if (VT != Value->getOperand(0).getValueType()) {

          unsigned index = constIndex->getAPIntValue().getLimitedValue() %

                             VT.getVectorNumElements();

          N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,

                 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),

                        Value, DAG.getConstant(index, dl, MVT::i32)),

                           DAG.getConstant(index, dl, MVT::i32));

        } else

          N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,

                        Value->getOperand(0), Value->getOperand(1));

      } else

        N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);


      if (!usesOnlyOneValue) {

        // The dominant value was splatted as 'N', but we now have to insert

        // all differing elements.

        for (unsigned I = 0; I < NumElts; ++I) {

          if (Op.getOperand(I) == Value)

            continue;

          SmallVector<SDValue, 3> Ops;

          Ops.push_back(N);

          Ops.push_back(Op.getOperand(I));

          Ops.push_back(DAG.getConstant(I, dl, MVT::i32));

          N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);

        }

      }

      return N;

    }

    if (VT.getVectorElementType().isFloatingPoint()) {

      SmallVector<SDValue, 8> Ops;

      MVT FVT = VT.getVectorElementType().getSimpleVT();

      assert(FVT == MVT::f32 || FVT == MVT::f16);

      MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;

      for (unsigned i = 0; i < NumElts; ++i)

        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,

                                  Op.getOperand(i)));

      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);

      SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);

      Val = LowerBUILD_VECTOR(Val, DAG, ST);

      if (Val.getNode())

        return DAG.getNode(ISD::BITCAST, dl, VT, Val);

    }

    if (usesOnlyOneValue) {

      SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);

      if (isConstant && Val.getNode())

        return DAG.getNode(ARMISD::VDUP, dl, VT, Val);

    }

  }


  // If all elements are constants and the case above didn't get hit, fall back

  // to the default expansion, which will generate a load from the constant

  // pool.

  if (isConstant)

    return SDValue();


  // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and

  // vmovn). Empirical tests suggest this is rarely worth it for vectors of

  // length <= 2.

  if (NumElts >= 4)

    if (SDValue shuffle = ReconstructShuffle(Op, DAG))

      return shuffle;


  // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into

  // VCVT's

  if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))

    return VCVT;

  if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))

    return VCVT;


  if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {

    // If we haven't found an efficient lowering, try splitting a 128-bit vector

    // into two 64-bit vectors; we might discover a better way to lower it.

    SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);

    EVT ExtVT = VT.getVectorElementType();

    EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);

    SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));

    if (Lower.getOpcode() == ISD::BUILD_VECTOR)

      Lower = LowerBUILD_VECTOR(Lower, DAG, ST);

    SDValue Upper =

        DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));

    if (Upper.getOpcode() == ISD::BUILD_VECTOR)

      Upper = LowerBUILD_VECTOR(Upper, DAG, ST);

    if (Lower && Upper)

      return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);

  }


  // Vectors with 32- or 64-bit elements can be built by directly assigning

  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands

  // will be legalized.

  if (EltSize >= 32) {

    // Do the expansion with floating-point types, since that is what the VFP

    // registers are defined to use, and since i64 is not legal.

    EVT EltVT = EVT::getFloatingPointVT(EltSize);

    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);

    SmallVector<SDValue, 8> Ops;

    for (unsigned i = 0; i < NumElts; ++i)

      Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));

    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);

    return DAG.getNode(ISD::BITCAST, dl, VT, Val);

  }


  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we

  // know the default expansion would otherwise fall back on something even

  // worse. For a vector with one or two non-undef values, that's

  // scalar_to_vector for the elements followed by a shuffle (provided the

  // shuffle is valid for the target) and materialization element by element

  // on the stack followed by a load for everything else.

  if (!isConstant && !usesOnlyOneValue) {

    SDValue Vec = DAG.getUNDEF(VT);

    for (unsigned i = 0 ; i < NumElts; ++i) {

      SDValue V = Op.getOperand(i);

      if (V.isUndef())

        continue;

      SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);

      Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);

    }

    return Vec;

  }


  return SDValue();

}


// Gather data to see if the operation can be modelled as a

// shuffle in combination with VEXTs.

SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,

                                              SelectionDAG &DAG) const {

  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");

  SDLoc dl(Op);

  EVT VT = Op.getValueType();

  unsigned NumElts = VT.getVectorNumElements();


  struct ShuffleSourceInfo {

    SDValue Vec;

    unsigned MinElt = std::numeric_limits<unsigned>::max();

    unsigned MaxElt = 0;


    // We may insert some combination of BITCASTs and VEXT nodes to force Vec to

    // be compatible with the shuffle we intend to construct. As a result

    // ShuffleVec will be some sliding window into the original Vec.

    SDValue ShuffleVec;


    // Code should guarantee that element i in Vec starts at element "WindowBase

    // + i * WindowScale in ShuffleVec".

    int WindowBase = 0;

    int WindowScale = 1;


    ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}


    bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }

  };


  // First gather all vectors used as an immediate source for this BUILD_VECTOR

  // node.

  SmallVector<ShuffleSourceInfo, 2> Sources;

  for (unsigned i = 0; i < NumElts; ++i) {

    SDValue V = Op.getOperand(i);

    if (V.isUndef())

      continue;

    else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {

      // A shuffle can only come from building a vector from various

      // elements of other vectors.

      return SDValue();

    } else if (!isa<ConstantSDNode>(V.getOperand(1))) {

      // Furthermore, shuffles require a constant mask, whereas extractelts

      // accept variable indices.

      return SDValue();

    }


    // Add this element source to the list if it's not already there.

    SDValue SourceVec = V.getOperand(0);

    auto Source = llvm::find(Sources, SourceVec);

    if (Source == Sources.end())

      Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));


    // Update the minimum and maximum lane number seen.

    unsigned EltNo = V.getConstantOperandVal(1);

    Source->MinElt = std::min(Source->MinElt, EltNo);

    Source->MaxElt = std::max(Source->MaxElt, EltNo);

  }


  // Currently only do something sane when at most two source vectors

  // are involved.

  if (Sources.size() > 2)

    return SDValue();


  // Find out the smallest element size among result and two sources, and use

  // it as element size to build the shuffle_vector.

  EVT SmallestEltTy = VT.getVectorElementType();

  for (auto &Source : Sources) {

    EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();

    if (SrcEltTy.bitsLT(SmallestEltTy))

      SmallestEltTy = SrcEltTy;

  }

  unsigned ResMultiplier =

      VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();

  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();

  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);


  // If the source vector is too wide or too narrow, we may nevertheless be able

  // to construct a compatible shuffle either by concatenating it with UNDEF or

  // extracting a suitable range of elements.

  for (auto &Src : Sources) {

    EVT SrcVT = Src.ShuffleVec.getValueType();


    uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();

    uint64_t VTSize = VT.getFixedSizeInBits();

    if (SrcVTSize == VTSize)

      continue;


    // This stage of the search produces a source with the same element type as

    // the original, but with a total width matching the BUILD_VECTOR output.

    EVT EltVT = SrcVT.getVectorElementType();

    unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();

    EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);


    if (SrcVTSize < VTSize) {

      if (2 * SrcVTSize != VTSize)

        return SDValue();

      // We can pad out the smaller vector for free, so if it's part of a

      // shuffle...

      Src.ShuffleVec =

          DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,

                      DAG.getUNDEF(Src.ShuffleVec.getValueType()));

      continue;

    }


    if (SrcVTSize != 2 * VTSize)

      return SDValue();


    if (Src.MaxElt - Src.MinElt >= NumSrcElts) {

      // Span too large for a VEXT to cope

      return SDValue();

    }


    if (Src.MinElt >= NumSrcElts) {

      // The extraction can just take the second half

      Src.ShuffleVec =

          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,

                      DAG.getConstant(NumSrcElts, dl, MVT::i32));

      Src.WindowBase = -NumSrcElts;

    } else if (Src.MaxElt < NumSrcElts) {

      // The extraction can just take the first half

      Src.ShuffleVec =

          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,

                      DAG.getConstant(0, dl, MVT::i32));

    } else {

      // An actual VEXT is needed

      SDValue VEXTSrc1 =

          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,

                      DAG.getConstant(0, dl, MVT::i32));

      SDValue VEXTSrc2 =

          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,

                      DAG.getConstant(NumSrcElts, dl, MVT::i32));


      Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,

                                   VEXTSrc2,

                                   DAG.getConstant(Src.MinElt, dl, MVT::i32));

      Src.WindowBase = -Src.MinElt;

    }

  }


  // Another possible incompatibility occurs from the vector element types. We

  // can fix this by bitcasting the source vectors to the same type we intend

  // for the shuffle.

  for (auto &Src : Sources) {

    EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();

    if (SrcEltTy == SmallestEltTy)

      continue;

    assert(ShuffleVT.getVectorElementType() == SmallestEltTy);

    Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);

    Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();

    Src.WindowBase *= Src.WindowScale;

  }


  // Final check before we try to actually produce a shuffle.

  LLVM_DEBUG({

    for (auto Src : Sources)

      assert(Src.ShuffleVec.getValueType() == ShuffleVT);

  });


  // The stars all align, our next step is to produce the mask for the shuffle.

  SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);

  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();

  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {

    SDValue Entry = Op.getOperand(i);

    if (Entry.isUndef())

      continue;


    auto Src = llvm::find(Sources, Entry.getOperand(0));

    int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();


    // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit

    // trunc. So only std::min(SrcBits, DestBits) actually get defined in this

    // segment.

    EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();

    int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),

                               VT.getScalarSizeInBits());

    int LanesDefined = BitsDefined / BitsPerShuffleLane;


    // This source is expected to fill ResMultiplier lanes of the final shuffle,

    // starting at the appropriate offset.

    int *LaneMask = &Mask[i * ResMultiplier];


    int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;

    ExtractBase += NumElts * (Src - Sources.begin());

    for (int j = 0; j < LanesDefined; ++j)

      LaneMask[j] = ExtractBase + j;

  }


  // We can't handle more than two sources. This should have already

  // been checked before this point.

  assert(Sources.size() <= 2 && "Too many sources!");


  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };

  for (unsigned i = 0; i < Sources.size(); ++i)

    ShuffleOps[i] = Sources[i].ShuffleVec;


  SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],

                                            ShuffleOps[1], Mask, DAG);

  if (!Shuffle)

    return SDValue();

  return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);

}


enum ShuffleOpCodes {

  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>

  OP_VREV,

  OP_VDUP0,

  OP_VDUP1,

  OP_VDUP2,

  OP_VDUP3,

  OP_VEXT1,

  OP_VEXT2,

  OP_VEXT3,

  OP_VUZPL, // VUZP, left result

  OP_VUZPR, // VUZP, right result

  OP_VZIPL, // VZIP, left result

  OP_VZIPR, // VZIP, right result

  OP_VTRNL, // VTRN, left result

  OP_VTRNR  // VTRN, right result

};


static bool isLegalMVEShuffleOp(unsigned PFEntry) {

  unsigned OpNum = (PFEntry >> 26) & 0x0F;

  switch (OpNum) {

  case OP_COPY:

  case OP_VREV:

  case OP_VDUP0:

  case OP_VDUP1:

  case OP_VDUP2:

  case OP_VDUP3:

    return true;

  }

  return false;

}


/// isShuffleMaskLegal - Targets can use this to indicate that they only

/// support *some* VECTOR_SHUFFLE operations, those with specific masks.

/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values

/// are assumed to be legal.


bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {

  if (VT.getVectorNumElements() == 4 &&

      (VT.is128BitVector() || VT.is64BitVector())) {

    unsigned PFIndexes[4];

    for (unsigned i = 0; i != 4; ++i) {

      if (M[i] < 0)

        PFIndexes[i] = 8;

      else

        PFIndexes[i] = M[i];

    }


    // Compute the index in the perfect shuffle table.

    unsigned PFTableIndex =

      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];

    unsigned Cost = (PFEntry >> 30);


    if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))

      return true;

  }


  bool ReverseVEXT, isV_UNDEF;

  unsigned Imm, WhichResult;


  unsigned EltSize = VT.getScalarSizeInBits();

  if (EltSize >= 32 ||

      ShuffleVectorSDNode::isSplatMask(M) ||

      ShuffleVectorInst::isIdentityMask(M, M.size()) ||

      isVREVMask(M, VT, 64) ||

      isVREVMask(M, VT, 32) ||

      isVREVMask(M, VT, 16))

    return true;

  else if (Subtarget->hasNEON() &&

           (isVEXTMask(M, VT, ReverseVEXT, Imm) ||

            isVTBLMask(M, VT) ||

            isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))

    return true;

  else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&

           isReverseMask(M, VT))

    return true;

  else if (Subtarget->hasMVEIntegerOps() &&

           (isVMOVNMask(M, VT, true, false) ||

            isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))

    return true;

  else if (Subtarget->hasMVEIntegerOps() &&

           (isTruncMask(M, VT, false, false) ||

            isTruncMask(M, VT, false, true) ||

            isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))

    return true;

  else

    return false;

}


/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit

/// the specified operations to build the shuffle.


static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,

                                      SDValue RHS, SelectionDAG &DAG,

                                      const SDLoc &dl) {

  unsigned OpNum = (PFEntry >> 26) & 0x0F;

  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);

  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);


  if (OpNum == OP_COPY) {

    if (LHSID == (1*9+2)*9+3) return LHS;

    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");

    return RHS;

  }


  SDValue OpLHS, OpRHS;

  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);

  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);

  EVT VT = OpLHS.getValueType();


  switch (OpNum) {

  default: llvm_unreachable("Unknown shuffle opcode!");

  case OP_VREV:

    // VREV divides the vector in half and swaps within the half.

    if (VT.getScalarSizeInBits() == 32)

      return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);

    // vrev <4 x i16> -> VREV32

    if (VT.getScalarSizeInBits() == 16)

      return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);

    // vrev <4 x i8> -> VREV16

    assert(VT.getScalarSizeInBits() == 8);

    return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);

  case OP_VDUP0:

  case OP_VDUP1:

  case OP_VDUP2:

  case OP_VDUP3:

    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,

                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));

  case OP_VEXT1:

  case OP_VEXT2:

  case OP_VEXT3:

    return DAG.getNode(ARMISD::VEXT, dl, VT,

                       OpLHS, OpRHS,

                       DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));

  case OP_VUZPL:

  case OP_VUZPR:

    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),

                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);

  case OP_VZIPL:

  case OP_VZIPR:

    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),

                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);

  case OP_VTRNL:

  case OP_VTRNR:

    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),

                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);

  }

}


static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,

                                       ArrayRef<int> ShuffleMask,

                                       SelectionDAG &DAG) {

  // Check to see if we can use the VTBL instruction.

  SDValue V1 = Op.getOperand(0);

  SDValue V2 = Op.getOperand(1);

  SDLoc DL(Op);


  SmallVector<SDValue, 8> VTBLMask;

  for (int I : ShuffleMask)

    VTBLMask.push_back(DAG.getSignedConstant(I, DL, MVT::i32));


  if (V2.getNode()->isUndef())

    return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,

                       DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));


  return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,

                     DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));

}


static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {

  SDLoc DL(Op);

  EVT VT = Op.getValueType();


  assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&

         "Expect an v8i16/v16i8 type");

  SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));

  // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,

  // extract the first 8 bytes into the top double word and the last 8 bytes

  // into the bottom double word, through a new vector shuffle that will be

  // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.

  std::vector<int> NewMask;

  for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)

    NewMask.push_back(VT.getVectorNumElements() / 2 + i);

  for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)

    NewMask.push_back(i);

  return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);

}


static EVT getVectorTyFromPredicateVector(EVT VT) {

  switch (VT.getSimpleVT().SimpleTy) {

  case MVT::v2i1:

    return MVT::v2f64;

  case MVT::v4i1:

    return MVT::v4i32;

  case MVT::v8i1:

    return MVT::v8i16;

  case MVT::v16i1:

    return MVT::v16i8;

  default:

    llvm_unreachable("Unexpected vector predicate type");

  }

}


static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT,

                                    SelectionDAG &DAG) {

  // Converting from boolean predicates to integers involves creating a vector

  // of all ones or all zeroes and selecting the lanes based upon the real

  // predicate.

  SDValue AllOnes =

      DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);

  AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);


  SDValue AllZeroes =

      DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);

  AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);


  // Get full vector type from predicate type

  EVT NewVT = getVectorTyFromPredicateVector(VT);


  SDValue RecastV1;

  // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast

  // this to a v16i1. This cannot be done with an ordinary bitcast because the

  // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,

  // since we know in hardware the sizes are really the same.

  if (VT != MVT::v16i1)

    RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);

  else

    RecastV1 = Pred;


  // Select either all ones or zeroes depending upon the real predicate bits.

  SDValue PredAsVector =

      DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);


  // Recast our new predicate-as-integer v16i8 vector into something

  // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.

  return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);

}


static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG,

                                      const ARMSubtarget *ST) {

  EVT VT = Op.getValueType();

  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());

  ArrayRef<int> ShuffleMask = SVN->getMask();


  assert(ST->hasMVEIntegerOps() &&

         "No support for vector shuffle of boolean predicates");


  SDValue V1 = Op.getOperand(0);

  SDValue V2 = Op.getOperand(1);

  SDLoc dl(Op);

  if (isReverseMask(ShuffleMask, VT)) {

    SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);

    SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);

    SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,

                              DAG.getConstant(16, dl, MVT::i32));

    return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);

  }


  // Until we can come up with optimised cases for every single vector

  // shuffle in existence we have chosen the least painful strategy. This is

  // to essentially promote the boolean predicate to a 8-bit integer, where

  // each predicate represents a byte. Then we fall back on a normal integer

  // vector shuffle and convert the result back into a predicate vector. In

  // many cases the generated code might be even better than scalar code

  // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit

  // fields in a register into 8 other arbitrary 2-bit fields!

  SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);

  EVT NewVT = PredAsVector1.getValueType();

  SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)

                                       : PromoteMVEPredVector(dl, V2, VT, DAG);

  assert(PredAsVector2.getValueType() == NewVT &&

         "Expected identical vector type in expanded i1 shuffle!");


  // Do the shuffle!

  SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,

                                          PredAsVector2, ShuffleMask);


  // Now return the result of comparing the shuffled vector with zero,

  // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1

  // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.

  if (VT == MVT::v2i1) {

    SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);

    SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,

                              DAG.getConstant(ARMCC::NE, dl, MVT::i32));

    return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);

  }

  return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,

                     DAG.getConstant(ARMCC::NE, dl, MVT::i32));

}


static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op,

                                            ArrayRef<int> ShuffleMask,

                                            SelectionDAG &DAG) {

  // Attempt to lower the vector shuffle using as many whole register movs as

  // possible. This is useful for types smaller than 32bits, which would

  // often otherwise become a series for grp movs.

  SDLoc dl(Op);

  EVT VT = Op.getValueType();

  if (VT.getScalarSizeInBits() >= 32)

    return SDValue();


  assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&

         "Unexpected vector type");

  int NumElts = VT.getVectorNumElements();

  int QuarterSize = NumElts / 4;

  // The four final parts of the vector, as i32's

  SDValue Parts[4];


  // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not

  // <u,u,u,u>), returning the vmov lane index

  auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {

    // Detect which mov lane this would be from the first non-undef element.

    int MovIdx = -1;

    for (int i = 0; i < Length; i++) {

      if (ShuffleMask[Start + i] >= 0) {

        if (ShuffleMask[Start + i] % Length != i)

          return -1;

        MovIdx = ShuffleMask[Start + i] / Length;

        break;

      }

    }

    // If all items are undef, leave this for other combines

    if (MovIdx == -1)

      return -1;

    // Check the remaining values are the correct part of the same mov

    for (int i = 1; i < Length; i++) {

      if (ShuffleMask[Start + i] >= 0 &&

          (ShuffleMask[Start + i] / Length != MovIdx ||

           ShuffleMask[Start + i] % Length != i))

        return -1;

    }

    return MovIdx;

  };


  for (int Part = 0; Part < 4; ++Part) {

    // Does this part look like a mov

    int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);

    if (Elt != -1) {

      SDValue Input = Op->getOperand(0);

      if (Elt >= 4) {

        Input = Op->getOperand(1);

        Elt -= 4;

      }

      SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);

      Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,

                                DAG.getConstant(Elt, dl, MVT::i32));

    }

  }


  // Nothing interesting found, just return

  if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])

    return SDValue();


  // The other parts need to be built with the old shuffle vector, cast to a

  // v4i32 and extract_vector_elts

  if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {

    SmallVector<int, 16> NewShuffleMask;

    for (int Part = 0; Part < 4; ++Part)

      for (int i = 0; i < QuarterSize; i++)

        NewShuffleMask.push_back(

            Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);

    SDValue NewShuffle = DAG.getVectorShuffle(

        VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);

    SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);


    for (int Part = 0; Part < 4; ++Part)

      if (!Parts[Part])

        Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,

                                  BitCast, DAG.getConstant(Part, dl, MVT::i32));

  }

  // Build a vector out of the various parts and bitcast it back to the original

  // type.

  SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);

  return DAG.getBitcast(VT, NewVec);

}


static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op,

                                              ArrayRef<int> ShuffleMask,

                                              SelectionDAG &DAG) {

  SDValue V1 = Op.getOperand(0);

  SDValue V2 = Op.getOperand(1);

  EVT VT = Op.getValueType();

  unsigned NumElts = VT.getVectorNumElements();


  // An One-Off Identity mask is one that is mostly an identity mask from as

  // single source but contains a single element out-of-place, either from a

  // different vector or from another position in the same vector. As opposed to

  // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert

  // pair directly.

  auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,

                                 int &OffElement) {

    OffElement = -1;

    int NonUndef = 0;

    for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {

      if (Mask[i] == -1)

        continue;

      NonUndef++;

      if (Mask[i] != i + BaseOffset) {

        if (OffElement == -1)

          OffElement = i;

        else

          return false;

      }

    }

    return NonUndef > 2 && OffElement != -1;

  };

  int OffElement;

  SDValue VInput;

  if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))

    VInput = V1;

  else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))

    VInput = V2;

  else

    return SDValue();


  SDLoc dl(Op);

  EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16

                ? MVT::i32

                : VT.getScalarType();

  SDValue Elt = DAG.getNode(

      ISD::EXTRACT_VECTOR_ELT, dl, SVT,

      ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,

      DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));

  return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,

                     DAG.getVectorIdxConstant(OffElement % NumElts, dl));

}


static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,

                                   const ARMSubtarget *ST) {

  SDValue V1 = Op.getOperand(0);

  SDValue V2 = Op.getOperand(1);

  SDLoc dl(Op);

  EVT VT = Op.getValueType();

  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());

  unsigned EltSize = VT.getScalarSizeInBits();


  if (ST->hasMVEIntegerOps() && EltSize == 1)

    return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);


  // Convert shuffles that are directly supported on NEON to target-specific

  // DAG nodes, instead of keeping them as shuffles and matching them again

  // during code selection.  This is more efficient and avoids the possibility

  // of inconsistencies between legalization and selection.

  // FIXME: floating-point vectors should be canonicalized to integer vectors

  // of the same time so that they get CSEd properly.

  ArrayRef<int> ShuffleMask = SVN->getMask();


  if (EltSize <= 32) {

    if (SVN->isSplat()) {

      int Lane = SVN->getSplatIndex();

      // If this is undef splat, generate it via "just" vdup, if possible.

      if (Lane == -1) Lane = 0;


      // Test if V1 is a SCALAR_TO_VECTOR.

      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {

        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));

      }

      // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR

      // (and probably will turn into a SCALAR_TO_VECTOR once legalization

      // reaches it).

      if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&

          !isa<ConstantSDNode>(V1.getOperand(0))) {

        bool IsScalarToVector = true;

        for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)

          if (!V1.getOperand(i).isUndef()) {

            IsScalarToVector = false;

            break;

          }

        if (IsScalarToVector)

          return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));

      }

      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,

                         DAG.getConstant(Lane, dl, MVT::i32));

    }


    bool ReverseVEXT = false;

    unsigned Imm = 0;

    if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {

      if (ReverseVEXT)

        std::swap(V1, V2);

      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,

                         DAG.getConstant(Imm, dl, MVT::i32));

    }


    if (isVREVMask(ShuffleMask, VT, 64))

      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);

    if (isVREVMask(ShuffleMask, VT, 32))

      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);

    if (isVREVMask(ShuffleMask, VT, 16))

      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);


    if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {

      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,

                         DAG.getConstant(Imm, dl, MVT::i32));

    }


    // Check for Neon shuffles that modify both input vectors in place.

    // If both results are used, i.e., if there are two shuffles with the same

    // source operands and with masks corresponding to both results of one of

    // these operations, DAG memoization will ensure that a single node is

    // used for both shuffles.

    unsigned WhichResult = 0;

    bool isV_UNDEF = false;

    if (ST->hasNEON()) {

      if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(

              ShuffleMask, VT, WhichResult, isV_UNDEF)) {

        if (isV_UNDEF)

          V2 = V1;

        return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)

            .getValue(WhichResult);

      }

    }

    if (ST->hasMVEIntegerOps()) {

      if (isVMOVNMask(ShuffleMask, VT, false, false))

        return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,

                           DAG.getConstant(0, dl, MVT::i32));

      if (isVMOVNMask(ShuffleMask, VT, true, false))

        return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,

                           DAG.getConstant(1, dl, MVT::i32));

      if (isVMOVNMask(ShuffleMask, VT, true, true))

        return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,

                           DAG.getConstant(1, dl, MVT::i32));

    }


    // Also check for these shuffles through CONCAT_VECTORS: we canonicalize

    // shuffles that produce a result larger than their operands with:

    //   shuffle(concat(v1, undef), concat(v2, undef))

    // ->

    //   shuffle(concat(v1, v2), undef)

    // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).

    //

    // This is useful in the general case, but there are special cases where

    // native shuffles produce larger results: the two-result ops.

    //

    // Look through the concat when lowering them:

    //   shuffle(concat(v1, v2), undef)

    // ->

    //   concat(VZIP(v1, v2):0, :1)

    //

    if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {

      SDValue SubV1 = V1->getOperand(0);

      SDValue SubV2 = V1->getOperand(1);

      EVT SubVT = SubV1.getValueType();


      // We expect these to have been canonicalized to -1.

      assert(llvm::all_of(ShuffleMask, [&](int i) {

        return i < (int)VT.getVectorNumElements();

      }) && "Unexpected shuffle index into UNDEF operand!");


      if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(

              ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {

        if (isV_UNDEF)

          SubV2 = SubV1;

        assert((WhichResult == 0) &&

               "In-place shuffle of concat can only have one result!");

        SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),

                                  SubV1, SubV2);

        return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),

                           Res.getValue(1));

      }

    }

  }


  if (ST->hasMVEIntegerOps() && EltSize <= 32) {

    if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))

      return V;


    for (bool Top : {false, true}) {

      for (bool SingleSource : {false, true}) {

        if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {

          MVT FromSVT = MVT::getIntegerVT(EltSize * 2);

          MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);

          SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);

          SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,

                                   SingleSource ? V1 : V2);

          if (Top) {

            SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);

            Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);

            Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);

          }

          return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);

        }

      }

    }

  }


  // If the shuffle is not directly supported and it has 4 elements, use

  // the PerfectShuffle-generated table to synthesize it from other shuffles.

  unsigned NumElts = VT.getVectorNumElements();

  if (NumElts == 4) {

    unsigned PFIndexes[4];

    for (unsigned i = 0; i != 4; ++i) {

      if (ShuffleMask[i] < 0)

        PFIndexes[i] = 8;

      else

        PFIndexes[i] = ShuffleMask[i];

    }


    // Compute the index in the perfect shuffle table.

    unsigned PFTableIndex =

      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];

    unsigned Cost = (PFEntry >> 30);


    if (Cost <= 4) {

      if (ST->hasNEON())

        return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);

      else if (isLegalMVEShuffleOp(PFEntry)) {

        unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);

        unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);

        unsigned PFEntryLHS = PerfectShuffleTable[LHSID];

        unsigned PFEntryRHS = PerfectShuffleTable[RHSID];

        if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))

          return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);

      }

    }

  }


  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.

  if (EltSize >= 32) {

    // Do the expansion with floating-point types, since that is what the VFP

    // registers are defined to use, and since i64 is not legal.

    EVT EltVT = EVT::getFloatingPointVT(EltSize);

    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);

    V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);

    V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);

    SmallVector<SDValue, 8> Ops;

    for (unsigned i = 0; i < NumElts; ++i) {

      if (ShuffleMask[i] < 0)

        Ops.push_back(DAG.getUNDEF(EltVT));

      else

        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,

                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,

                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),

                                                  dl, MVT::i32)));

    }

    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);

    return DAG.getNode(ISD::BITCAST, dl, VT, Val);

  }


  if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&

      isReverseMask(ShuffleMask, VT))

    return LowerReverse_VECTOR_SHUFFLE(Op, DAG);


  if (ST->hasNEON() && VT == MVT::v8i8)

    if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))

      return NewOp;


  if (ST->hasMVEIntegerOps())

    if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))

      return NewOp;


  return SDValue();

}


static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,

                                         const ARMSubtarget *ST) {

  EVT VecVT = Op.getOperand(0).getValueType();

  SDLoc dl(Op);


  assert(ST->hasMVEIntegerOps() &&

         "LowerINSERT_VECTOR_ELT_i1 called without MVE!");


  SDValue Conv =

      DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));

  unsigned Lane = Op.getConstantOperandVal(2);

  unsigned LaneWidth =

      getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;

  unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;

  SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,

                            Op.getOperand(1), DAG.getValueType(MVT::i1));

  SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,

                            DAG.getConstant(~Mask, dl, MVT::i32));

  return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);

}


SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

                                                  SelectionDAG &DAG) const {

  // INSERT_VECTOR_ELT is legal only for immediate indexes.

  SDValue Lane = Op.getOperand(2);

  if (!isa<ConstantSDNode>(Lane))

    return SDValue();


  SDValue Elt = Op.getOperand(1);

  EVT EltVT = Elt.getValueType();


  if (Subtarget->hasMVEIntegerOps() &&

      Op.getValueType().getScalarSizeInBits() == 1)

    return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);


  if (getTypeAction(*DAG.getContext(), EltVT) ==

      TargetLowering::TypeSoftPromoteHalf) {

    // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,

    // but the type system will try to do that if we don't intervene.

    // Reinterpret any such vector-element insertion as one with the

    // corresponding integer types.


    SDLoc dl(Op);


    EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());

    assert(getTypeAction(*DAG.getContext(), IEltVT) !=

           TargetLowering::TypeSoftPromoteHalf);


    SDValue VecIn = Op.getOperand(0);

    EVT VecVT = VecIn.getValueType();

    EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,

                                  VecVT.getVectorNumElements());


    SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);

    SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);

    SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,

                                  IVecIn, IElt, Lane);

    return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);

  }


  return Op;

}


static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG,

                                          const ARMSubtarget *ST) {

  EVT VecVT = Op.getOperand(0).getValueType();

  SDLoc dl(Op);


  assert(ST->hasMVEIntegerOps() &&

         "LowerINSERT_VECTOR_ELT_i1 called without MVE!");


  SDValue Conv =

      DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));

  unsigned Lane = Op.getConstantOperandVal(1);

  unsigned LaneWidth =

      getVectorTyFromPredicateVector(VecVT).getScalarSizeInBits() / 8;

  SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,

                              DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));

  return Shift;

}


static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG,

                                       const ARMSubtarget *ST) {

  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.

  SDValue Lane = Op.getOperand(1);

  if (!isa<ConstantSDNode>(Lane))

    return SDValue();


  SDValue Vec = Op.getOperand(0);

  EVT VT = Vec.getValueType();


  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)

    return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);


  if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {

    SDLoc dl(Op);

    return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);

  }


  return Op;

}


static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,

                                      const ARMSubtarget *ST) {

  SDLoc dl(Op);

  assert(Op.getValueType().getScalarSizeInBits() == 1 &&

         "Unexpected custom CONCAT_VECTORS lowering");

  assert(isPowerOf2_32(Op.getNumOperands()) &&

         "Unexpected custom CONCAT_VECTORS lowering");

  assert(ST->hasMVEIntegerOps() &&

         "CONCAT_VECTORS lowering only supported for MVE");


  auto ConcatPair = [&](SDValue V1, SDValue V2) {

    EVT Op1VT = V1.getValueType();

    EVT Op2VT = V2.getValueType();

    assert(Op1VT == Op2VT && "Operand types don't match!");

    assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&

           "Unexpected i1 concat operations!");

    EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());


    SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);

    SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);


    // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets

    // promoted to v8i16, etc.

    MVT ElType =

        getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();

    unsigned NumElts = 2 * Op1VT.getVectorNumElements();


    EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);

    if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {

      // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller

      // ConcatVT.

      SDValue ConVec =

          DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);

      return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,

                         DAG.getConstant(ARMCC::NE, dl, MVT::i32));

    }


    // Extract the vector elements from Op1 and Op2 one by one and truncate them

    // to be the right size for the destination. For example, if Op1 is v4i1

    // then the promoted vector is v4i32. The result of concatenation gives a

    // v8i1, which when promoted is v8i16. That means each i32 element from Op1

    // needs truncating to i16 and inserting in the result.

    auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {

      EVT NewVT = NewV.getValueType();

      EVT ConcatVT = ConVec.getValueType();

      unsigned ExtScale = 1;

      if (NewVT == MVT::v2f64) {

        NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);

        ExtScale = 2;

      }

      for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {

        SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,

                                  DAG.getIntPtrConstant(i * ExtScale, dl));

        ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,

                             DAG.getConstant(j, dl, MVT::i32));

      }

      return ConVec;

    };

    unsigned j = 0;

    SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);

    ConVec = ExtractInto(NewV1, ConVec, j);

    ConVec = ExtractInto(NewV2, ConVec, j);


    // Now return the result of comparing the subvector with zero, which will

    // generate a real predicate, i.e. v4i1, v8i1 or v16i1.

    return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,

                       DAG.getConstant(ARMCC::NE, dl, MVT::i32));

  };


  // Concat each pair of subvectors and pack into the lower half of the array.

  SmallVector<SDValue> ConcatOps(Op->ops());

  while (ConcatOps.size() > 1) {

    for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {

      SDValue V1 = ConcatOps[I];

      SDValue V2 = ConcatOps[I + 1];

      ConcatOps[I / 2] = ConcatPair(V1, V2);

    }

    ConcatOps.resize(ConcatOps.size() / 2);

  }

  return ConcatOps[0];

}


static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,

                                   const ARMSubtarget *ST) {

  EVT VT = Op->getValueType(0);

  if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)

    return LowerCONCAT_VECTORS_i1(Op, DAG, ST);


  // The only time a CONCAT_VECTORS operation can have legal types is when

  // two 64-bit vectors are concatenated to a 128-bit vector.

  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&

         "unexpected CONCAT_VECTORS");

  SDLoc dl(Op);

  SDValue Val = DAG.getUNDEF(MVT::v2f64);

  SDValue Op0 = Op.getOperand(0);

  SDValue Op1 = Op.getOperand(1);

  if (!Op0.isUndef())

    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,

                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),

                      DAG.getIntPtrConstant(0, dl));

  if (!Op1.isUndef())

    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,

                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),

                      DAG.getIntPtrConstant(1, dl));

  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);

}


static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,

                                      const ARMSubtarget *ST) {

  SDValue V1 = Op.getOperand(0);

  SDValue V2 = Op.getOperand(1);

  SDLoc dl(Op);

  EVT VT = Op.getValueType();

  EVT Op1VT = V1.getValueType();

  unsigned NumElts = VT.getVectorNumElements();

  unsigned Index = V2->getAsZExtVal();


  assert(VT.getScalarSizeInBits() == 1 &&

         "Unexpected custom EXTRACT_SUBVECTOR lowering");

  assert(ST->hasMVEIntegerOps() &&

         "EXTRACT_SUBVECTOR lowering only supported for MVE");


  SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);


  // We now have Op1 promoted to a vector of integers, where v8i1 gets

  // promoted to v8i16, etc.


  MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();


  if (NumElts == 2) {

    EVT SubVT = MVT::v4i32;

    SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);

    for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {

      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,

                                DAG.getIntPtrConstant(i, dl));

      SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,

                           DAG.getConstant(j, dl, MVT::i32));

      SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,

                           DAG.getConstant(j + 1, dl, MVT::i32));

    }

    SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,

                              DAG.getConstant(ARMCC::NE, dl, MVT::i32));

    return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);

  }


  EVT SubVT = MVT::getVectorVT(ElType, NumElts);

  SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);

  for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {

    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,

                              DAG.getIntPtrConstant(i, dl));

    SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,

                         DAG.getConstant(j, dl, MVT::i32));

  }


  // Now return the result of comparing the subvector with zero,

  // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.

  return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,

                     DAG.getConstant(ARMCC::NE, dl, MVT::i32));

}


// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).


static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG,

                               const ARMSubtarget *ST) {

  assert(ST->hasMVEIntegerOps() && "Expected MVE!");

  EVT VT = N->getValueType(0);

  assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&

         "Expected a vector i1 type!");

  SDValue Op = N->getOperand(0);

  EVT FromVT = Op.getValueType();

  SDLoc DL(N);


  SDValue And =

      DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));

  return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),

                     DAG.getCondCode(ISD::SETNE));

}


static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG,

                             const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasMVEIntegerOps())

    return SDValue();


  EVT ToVT = N->getValueType(0);

  if (ToVT.getScalarType() == MVT::i1)

    return LowerTruncatei1(N, DAG, Subtarget);


  // MVE does not have a single instruction to perform the truncation of a v4i32

  // into the lower half of a v8i16, in the same way that a NEON vmovn would.

  // Most of the instructions in MVE follow the 'Beats' system, where moving

  // values from different lanes is usually something that the instructions

  // avoid.

  //

  // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,

  // which take a the top/bottom half of a larger lane and extend it (or do the

  // opposite, truncating into the top/bottom lane from a larger lane). Note

  // that because of the way we widen lanes, a v4i16 is really a v4i32 using the

  // bottom 16bits from each vector lane. This works really well with T/B

  // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need

  // to move order.

  //

  // But truncates and sext/zext are always going to be fairly common from llvm.

  // We have several options for how to deal with them:

  // - Wherever possible combine them into an instruction that makes them

  //   "free". This includes loads/stores, which can perform the trunc as part

  //   of the memory operation. Or certain shuffles that can be turned into

  //   VMOVN/VMOVL.

  // - Lane Interleaving to transform blocks surrounded by ext/trunc. So

  //   trunc(mul(sext(a), sext(b))) may become

  //   VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in

  //   this case can use VMULL). This is performed in the

  //   MVELaneInterleavingPass.

  // - Otherwise we have an option. By default we would expand the

  //   zext/sext/trunc into a series of lane extract/inserts going via GPR

  //   registers. One for each vector lane in the vector. This can obviously be

  //   very expensive.

  // - The other option is to use the fact that loads/store can extend/truncate

  //   to turn a trunc into two truncating stack stores and a stack reload. This

  //   becomes 3 back-to-back memory operations, but at least that is less than

  //   all the insert/extracts.

  //

  // In order to do the last, we convert certain trunc's into MVETRUNC, which

  // are either optimized where they can be, or eventually lowered into stack

  // stores/loads. This prevents us from splitting a v8i16 trunc into two stores

  // two early, where other instructions would be better, and stops us from

  // having to reconstruct multiple buildvector shuffles into loads/stores.

  if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)

    return SDValue();

  EVT FromVT = N->getOperand(0).getValueType();

  if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)

    return SDValue();


  SDValue Lo, Hi;

  std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);

  SDLoc DL(N);

  return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);

}


static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG,

                                 const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasMVEIntegerOps())

    return SDValue();


  // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.


  EVT ToVT = N->getValueType(0);

  if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)

    return SDValue();

  SDValue Op = N->getOperand(0);

  EVT FromVT = Op.getValueType();

  if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)

    return SDValue();


  SDLoc DL(N);

  EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());

  if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)

    ExtVT = MVT::v8i16;


  unsigned Opcode =

      N->getOpcode() == ISD::SIGN_EXTEND ? ARMISD::MVESEXT : ARMISD::MVEZEXT;

  SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);

  SDValue Ext1 = Ext.getValue(1);


  if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {

    Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);

    Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);

  }


  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);

}


/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each

/// element has been zero/sign-extended, depending on the isSigned parameter,

/// from an integer type half its size.


static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,

                                   bool isSigned) {

  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.

  EVT VT = N->getValueType(0);

  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {

    SDNode *BVN = N->getOperand(0).getNode();

    if (BVN->getValueType(0) != MVT::v4i32 ||

        BVN->getOpcode() != ISD::BUILD_VECTOR)

      return false;

    unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;

    unsigned HiElt = 1 - LoElt;

    ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));

    ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));

    ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));

    ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));

    if (!Lo0 || !Hi0 || !Lo1 || !Hi1)

      return false;

    if (isSigned) {

      if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&

          Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)

        return true;

    } else {

      if (Hi0->isZero() && Hi1->isZero())

        return true;

    }

    return false;

  }


  if (N->getOpcode() != ISD::BUILD_VECTOR)

    return false;


  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

    SDNode *Elt = N->getOperand(i).getNode();

    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {

      unsigned EltSize = VT.getScalarSizeInBits();

      unsigned HalfSize = EltSize / 2;

      if (isSigned) {

        if (!isIntN(HalfSize, C->getSExtValue()))

          return false;

      } else {

        if (!isUIntN(HalfSize, C->getZExtValue()))

          return false;

      }

      continue;

    }

    return false;

  }


  return true;

}


/// isSignExtended - Check if a node is a vector value that is sign-extended

/// or a constant BUILD_VECTOR with sign-extended elements.


static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {

  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))

    return true;

  if (isExtendedBUILD_VECTOR(N, DAG, true))

    return true;

  return false;

}


/// isZeroExtended - Check if a node is a vector value that is zero-extended (or

/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.


static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {

  if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||

      ISD::isZEXTLoad(N))

    return true;

  if (isExtendedBUILD_VECTOR(N, DAG, false))

    return true;

  return false;

}


static EVT getExtensionTo64Bits(const EVT &OrigVT) {

  if (OrigVT.getSizeInBits() >= 64)

    return OrigVT;


  assert(OrigVT.isSimple() && "Expecting a simple value type");


  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;

  switch (OrigSimpleTy) {

  default: llvm_unreachable("Unexpected Vector Type");

  case MVT::v2i8:

  case MVT::v2i16:

     return MVT::v2i32;

  case MVT::v4i8:

    return  MVT::v4i16;

  }

}


/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total

/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.

/// We insert the required extension here to get the vector to fill a D register.


static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,

                                            const EVT &OrigTy,

                                            const EVT &ExtTy,

                                            unsigned ExtOpcode) {

  // The vector originally had a size of OrigTy. It was then extended to ExtTy.

  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than

  // 64-bits we need to insert a new extension so that it will be 64-bits.

  assert(ExtTy.is128BitVector() && "Unexpected extension size");

  if (OrigTy.getSizeInBits() >= 64)

    return N;


  // Must extend size to at least 64 bits to be used as an operand for VMULL.

  EVT NewVT = getExtensionTo64Bits(OrigTy);


  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);

}


/// SkipLoadExtensionForVMULL - return a load of the original vector size that

/// does not do any sign/zero extension. If the original vector is less

/// than 64 bits, an appropriate extension will be added after the load to

/// reach a total size of 64 bits. We have to add the extension separately

/// because ARM does not have a sign/zero extending load for vectors.


static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {

  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());


  // The load already has the right type.

  if (ExtendedTy == LD->getMemoryVT())

    return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),

                       LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),

                       LD->getMemOperand()->getFlags());


  // We need to create a zextload/sextload. We cannot just create a load

  // followed by a zext/zext node because LowerMUL is also run during normal

  // operation legalization where we can't create illegal types.

  return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,

                        LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),

                        LD->getMemoryVT(), LD->getAlign(),

                        LD->getMemOperand()->getFlags());

}


/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,

/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return

/// the unextended value. The unextended vector should be 64 bits so that it can

/// be used as an operand to a VMULL instruction. If the original vector size

/// before extension is less than 64 bits we add a an extension to resize

/// the vector to 64 bits.


static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {

  if (N->getOpcode() == ISD::SIGN_EXTEND ||

      N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)

    return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,

                                        N->getOperand(0)->getValueType(0),

                                        N->getValueType(0),

                                        N->getOpcode());


  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

    assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&

           "Expected extending load");


    SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);

    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));

    unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

    SDValue extLoad =

        DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);

    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);


    return newLoad;

  }


  // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will

  // have been legalized as a BITCAST from v4i32.

  if (N->getOpcode() == ISD::BITCAST) {

    SDNode *BVN = N->getOperand(0).getNode();

    assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&

           BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");

    unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;

    return DAG.getBuildVector(

        MVT::v2i32, SDLoc(N),

        {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});

  }

  // Construct a new BUILD_VECTOR with elements truncated to half the size.

  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");

  EVT VT = N->getValueType(0);

  unsigned EltSize = VT.getScalarSizeInBits() / 2;

  unsigned NumElts = VT.getVectorNumElements();

  MVT TruncVT = MVT::getIntegerVT(EltSize);

  SmallVector<SDValue, 8> Ops;

  SDLoc dl(N);

  for (unsigned i = 0; i != NumElts; ++i) {

    const APInt &CInt = N->getConstantOperandAPInt(i);

    // Element types smaller than 32 bits are not legal, so use i32 elements.

    // The values are implicitly truncated so sext vs. zext doesn't matter.

    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));

  }

  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);

}


static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {

  unsigned Opcode = N->getOpcode();

  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {

    SDNode *N0 = N->getOperand(0).getNode();

    SDNode *N1 = N->getOperand(1).getNode();

    return N0->hasOneUse() && N1->hasOneUse() &&

      isSignExtended(N0, DAG) && isSignExtended(N1, DAG);

  }

  return false;

}


static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {

  unsigned Opcode = N->getOpcode();

  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {

    SDNode *N0 = N->getOperand(0).getNode();

    SDNode *N1 = N->getOperand(1).getNode();

    return N0->hasOneUse() && N1->hasOneUse() &&

      isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);

  }

  return false;

}


static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {

  // Multiplications are only custom-lowered for 128-bit vectors so that

  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.

  EVT VT = Op.getValueType();

  assert(VT.is128BitVector() && VT.isInteger() &&

         "unexpected type for custom-lowering ISD::MUL");

  SDNode *N0 = Op.getOperand(0).getNode();

  SDNode *N1 = Op.getOperand(1).getNode();

  unsigned NewOpc = 0;

  bool isMLA = false;

  bool isN0SExt = isSignExtended(N0, DAG);

  bool isN1SExt = isSignExtended(N1, DAG);

  if (isN0SExt && isN1SExt)

    NewOpc = ARMISD::VMULLs;

  else {

    bool isN0ZExt = isZeroExtended(N0, DAG);

    bool isN1ZExt = isZeroExtended(N1, DAG);

    if (isN0ZExt && isN1ZExt)

      NewOpc = ARMISD::VMULLu;

    else if (isN1SExt || isN1ZExt) {

      // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these

      // into (s/zext A * s/zext C) + (s/zext B * s/zext C)

      if (isN1SExt && isAddSubSExt(N0, DAG)) {

        NewOpc = ARMISD::VMULLs;

        isMLA = true;

      } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {

        NewOpc = ARMISD::VMULLu;

        isMLA = true;

      } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {

        std::swap(N0, N1);

        NewOpc = ARMISD::VMULLu;

        isMLA = true;

      }

    }


    if (!NewOpc) {

      if (VT == MVT::v2i64)

        // Fall through to expand this.  It is not legal.

        return SDValue();

      else

        // Other vector multiplications are legal.

        return Op;

    }

  }


  // Legalize to a VMULL instruction.

  SDLoc DL(Op);

  SDValue Op0;

  SDValue Op1 = SkipExtensionForVMULL(N1, DAG);

  if (!isMLA) {

    Op0 = SkipExtensionForVMULL(N0, DAG);

    assert(Op0.getValueType().is64BitVector() &&

           Op1.getValueType().is64BitVector() &&

           "unexpected types for extended operands to VMULL");

    return DAG.getNode(NewOpc, DL, VT, Op0, Op1);

  }


  // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during

  // isel lowering to take advantage of no-stall back to back vmul + vmla.

  //   vmull q0, d4, d6

  //   vmlal q0, d5, d6

  // is faster than

  //   vaddl q0, d4, d5

  //   vmovl q1, d6

  //   vmul  q0, q0, q1

  SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);

  SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);

  EVT Op1VT = Op1.getValueType();

  return DAG.getNode(N0->getOpcode(), DL, VT,

                     DAG.getNode(NewOpc, DL, VT,

                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),

                     DAG.getNode(NewOpc, DL, VT,

                               DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));

}


static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,

                              SelectionDAG &DAG) {

  // TODO: Should this propagate fast-math-flags?


  // Convert to float

  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));

  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));

  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);

  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);

  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);

  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);

  // Get reciprocal estimate.

  // float4 recip = vrecpeq_f32(yf);

  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),

                   Y);

  // Because char has a smaller range than uchar, we can actually get away

  // without any newton steps.  This requires that we use a weird bias

  // of 0xb000, however (again, this has been exhaustively tested).

  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);

  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);

  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);

  Y = DAG.getConstant(0xb000, dl, MVT::v4i32);

  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);

  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);

  // Convert back to short.

  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);

  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);

  return X;

}


static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,

                               SelectionDAG &DAG) {

  // TODO: Should this propagate fast-math-flags?


  SDValue N2;

  // Convert to float.

  // float4 yf = vcvt_f32_s32(vmovl_s16(y));

  // float4 xf = vcvt_f32_s32(vmovl_s16(x));

  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);

  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);

  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);

  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);


  // Use reciprocal estimate and one refinement step.

  // float4 recip = vrecpeq_f32(yf);

  // recip *= vrecpsq_f32(yf, recip);

  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),

                   N1);

  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

                   DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),

                   N1, N2);

  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);

  // Because short has a smaller range than ushort, we can actually get away

  // with only a single newton step.  This requires that we use a weird bias

  // of 89, however (again, this has been exhaustively tested).

  // float4 result = as_float4(as_int4(xf*recip) + 0x89);

  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);

  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);

  N1 = DAG.getConstant(0x89, dl, MVT::v4i32);

  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);

  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);

  // Convert back to integer and return.

  // return vmovn_s32(vcvt_s32_f32(result));

  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);

  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);

  return N0;

}


static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG,

                         const ARMSubtarget *ST) {

  EVT VT = Op.getValueType();

  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&

         "unexpected type for custom-lowering ISD::SDIV");


  SDLoc dl(Op);

  SDValue N0 = Op.getOperand(0);

  SDValue N1 = Op.getOperand(1);

  SDValue N2, N3;


  if (VT == MVT::v8i8) {

    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);

    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);


    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,

                     DAG.getIntPtrConstant(4, dl));

    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,

                     DAG.getIntPtrConstant(4, dl));

    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,

                     DAG.getIntPtrConstant(0, dl));

    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,

                     DAG.getIntPtrConstant(0, dl));


    N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16

    N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16


    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);

    N0 = LowerCONCAT_VECTORS(N0, DAG, ST);


    N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);

    return N0;

  }

  return LowerSDIV_v4i16(N0, N1, dl, DAG);

}


static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG,

                         const ARMSubtarget *ST) {

  // TODO: Should this propagate fast-math-flags?

  EVT VT = Op.getValueType();

  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&

         "unexpected type for custom-lowering ISD::UDIV");


  SDLoc dl(Op);

  SDValue N0 = Op.getOperand(0);

  SDValue N1 = Op.getOperand(1);

  SDValue N2, N3;


  if (VT == MVT::v8i8) {

    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);

    N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);


    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,

                     DAG.getIntPtrConstant(4, dl));

    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,

                     DAG.getIntPtrConstant(4, dl));

    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,

                     DAG.getIntPtrConstant(0, dl));

    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,

                     DAG.getIntPtrConstant(0, dl));


    N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16

    N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16


    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);

    N0 = LowerCONCAT_VECTORS(N0, DAG, ST);


    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,

                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,

                                     MVT::i32),

                     N0);

    return N0;

  }


  // v4i16 sdiv ... Convert to float.

  // float4 yf = vcvt_f32_s32(vmovl_u16(y));

  // float4 xf = vcvt_f32_s32(vmovl_u16(x));

  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);

  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);

  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);

  SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);


  // Use reciprocal estimate and two refinement steps.

  // float4 recip = vrecpeq_f32(yf);

  // recip *= vrecpsq_f32(yf, recip);

  // recip *= vrecpsq_f32(yf, recip);

  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),

                   BN1);

  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

                   DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),

                   BN1, N2);

  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);

  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,

                   DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),

                   BN1, N2);

  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);

  // Simply multiplying by the reciprocal estimate can leave us a few ulps

  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,

  // and that it will never cause us to return an answer too large).

  // float4 result = as_float4(as_int4(xf*recip) + 2);

  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);

  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);

  N1 = DAG.getConstant(2, dl, MVT::v4i32);

  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);

  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);

  // Convert back to integer and return.

  // return vmovn_u32(vcvt_s32_f32(result));

  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);

  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);

  return N0;

}


static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {

  SDNode *N = Op.getNode();

  EVT VT = N->getValueType(0);

  SDVTList VTs = DAG.getVTList(VT, MVT::i32);


  SDValue Carry = Op.getOperand(2);


  SDLoc DL(Op);


  SDValue Result;

  if (Op.getOpcode() == ISD::UADDO_CARRY) {

    // This converts the boolean value carry into the carry flag.

    Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);


    // Do the addition proper using the carry flag we wanted.

    Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),

                         Op.getOperand(1), Carry);


    // Now convert the carry flag into a boolean value.

    Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);

  } else {

    // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we

    // have to invert the carry first.

    Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,

                        DAG.getConstant(1, DL, MVT::i32), Carry);

    // This converts the boolean value carry into the carry flag.

    Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);


    // Do the subtraction proper using the carry flag we wanted.

    Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),

                         Op.getOperand(1), Carry);


    // Now convert the carry flag into a boolean value.

    Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);

    // But the carry returned by ARMISD::SUBE is not a borrow as expected

    // by ISD::USUBO_CARRY, so compute 1 - C.

    Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,

                        DAG.getConstant(1, DL, MVT::i32), Carry);

  }


  // Return both values.

  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);

}


SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,

                                                  bool Signed,

                                                  SDValue &Chain) const {

  EVT VT = Op.getValueType();

  assert((VT == MVT::i32 || VT == MVT::i64) &&

         "unexpected type for custom lowering DIV");

  SDLoc dl(Op);


  const auto &DL = DAG.getDataLayout();

  RTLIB::Libcall LC;

  if (Signed)

    LC = VT == MVT::i32 ? RTLIB::SDIVREM_I32 : RTLIB::SDIVREM_I64;

  else

    LC = VT == MVT::i32 ? RTLIB::UDIVREM_I32 : RTLIB::UDIVREM_I64;


  const char *Name = getLibcallName(LC);

  SDValue ES = DAG.getExternalSymbol(Name, getPointerTy(DL));


  ARMTargetLowering::ArgListTy Args;


  for (auto AI : {1, 0}) {

    SDValue Operand = Op.getOperand(AI);

    Args.emplace_back(Operand,

                      Operand.getValueType().getTypeForEVT(*DAG.getContext()));

  }


  CallLoweringInfo CLI(DAG);

  CLI.setDebugLoc(dl)

    .setChain(Chain)

    .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),

               ES, std::move(Args));


  return LowerCallTo(CLI).first;

}


// This is a code size optimisation: return the original SDIV node to

// DAGCombiner when we don't want to expand SDIV into a sequence of

// instructions, and an empty node otherwise which will cause the

// SDIV to be expanded in DAGCombine.

SDValue

ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,

                                 SelectionDAG &DAG,

                                 SmallVectorImpl<SDNode *> &Created) const {

  // TODO: Support SREM

  if (N->getOpcode() != ISD::SDIV)

    return SDValue();


  const auto &ST = DAG.getSubtarget<ARMSubtarget>();

  const bool MinSize = ST.hasMinSize();

  const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()

                                      : ST.hasDivideInARMMode();


  // Don't touch vector types; rewriting this may lead to scalarizing

  // the int divs.

  if (N->getOperand(0).getValueType().isVector())

    return SDValue();


  // Bail if MinSize is not set, and also for both ARM and Thumb mode we need

  // hwdiv support for this to be really profitable.

  if (!(MinSize && HasDivide))

    return SDValue();


  // ARM mode is a bit simpler than Thumb: we can handle large power

  // of 2 immediates with 1 mov instruction; no further checks required,

  // just return the sdiv node.

  if (!ST.isThumb())

    return SDValue(N, 0);


  // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,

  // and thus lose the code size benefits of a MOVS that requires only 2.

  // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,

  // but as it's doing exactly this, it's not worth the trouble to get TTI.

  if (Divisor.sgt(128))

    return SDValue();


  return SDValue(N, 0);

}


SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,

                                            bool Signed) const {

  assert(Op.getValueType() == MVT::i32 &&

         "unexpected type for custom lowering DIV");

  SDLoc dl(Op);


  SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,

                               DAG.getEntryNode(), Op.getOperand(1));


  return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);

}


static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {

  SDLoc DL(N);

  SDValue Op = N->getOperand(1);

  if (N->getValueType(0) == MVT::i32)

    return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);

  SDValue Lo, Hi;

  std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);

  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,

                     DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));

}


void ARMTargetLowering::ExpandDIV_Windows(

    SDValue Op, SelectionDAG &DAG, bool Signed,

    SmallVectorImpl<SDValue> &Results) const {

  const auto &DL = DAG.getDataLayout();


  assert(Op.getValueType() == MVT::i64 &&

         "unexpected type for custom lowering DIV");

  SDLoc dl(Op);


  SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());


  SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);


  SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);

  SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,

                              DAG.getConstant(32, dl, getPointerTy(DL)));

  Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);


  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));

}


static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) {

  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());

  EVT MemVT = LD->getMemoryVT();

  assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||

          MemVT == MVT::v16i1) &&

         "Expected a predicate type!");

  assert(MemVT == Op.getValueType());

  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&

         "Expected a non-extending load");

  assert(LD->isUnindexed() && "Expected a unindexed load");


  // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit

  // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We

  // need to make sure that 8/4/2 bits are actually loaded into the correct

  // place, which means loading the value and then shuffling the values into

  // the bottom bits of the predicate.

  // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect

  // for BE).

  // Speaking of BE, apparently the rest of llvm will assume a reverse order to

  // a natural VMSR(load), so needs to be reversed.


  SDLoc dl(Op);

  SDValue Load = DAG.getExtLoad(

      ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),

      EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),

      LD->getMemOperand());

  SDValue Val = Load;

  if (DAG.getDataLayout().isBigEndian())

    Val = DAG.getNode(ISD::SRL, dl, MVT::i32,

                      DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),

                      DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));

  SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);

  if (MemVT != MVT::v16i1)

    Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,

                       DAG.getConstant(0, dl, MVT::i32));

  return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);

}


void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,

                                  SelectionDAG &DAG) const {

  LoadSDNode *LD = cast<LoadSDNode>(N);

  EVT MemVT = LD->getMemoryVT();

  assert(LD->isUnindexed() && "Loads should be unindexed at this point.");


  if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&

      !Subtarget->isThumb1Only() && LD->isVolatile() &&

      LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {

    SDLoc dl(N);

    SDValue Result = DAG.getMemIntrinsicNode(

        ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),

        {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());

    SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);

    SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);

    SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);

    Results.append({Pair, Result.getValue(2)});

  }

}


static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) {

  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());

  EVT MemVT = ST->getMemoryVT();

  assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||

          MemVT == MVT::v16i1) &&

         "Expected a predicate type!");

  assert(MemVT == ST->getValue().getValueType());

  assert(!ST->isTruncatingStore() && "Expected a non-extending store");

  assert(ST->isUnindexed() && "Expected a unindexed store");


  // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with

  // top bits unset and a scalar store.

  SDLoc dl(Op);

  SDValue Build = ST->getValue();

  if (MemVT != MVT::v16i1) {

    SmallVector<SDValue, 16> Ops;

    for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {

      unsigned Elt = DAG.getDataLayout().isBigEndian()

                         ? MemVT.getVectorNumElements() - I - 1

                         : I;

      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,

                                DAG.getConstant(Elt, dl, MVT::i32)));

    }

    for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)

      Ops.push_back(DAG.getUNDEF(MVT::i32));

    Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);

  }

  SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);

  if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())

    GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,

                      DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),

                      DAG.getConstant(16, dl, MVT::i32));

  return DAG.getTruncStore(

      ST->getChain(), dl, GRP, ST->getBasePtr(),

      EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),

      ST->getMemOperand());

}


static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,

                          const ARMSubtarget *Subtarget) {

  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());

  EVT MemVT = ST->getMemoryVT();

  assert(ST->isUnindexed() && "Stores should be unindexed at this point.");


  if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&

      !Subtarget->isThumb1Only() && ST->isVolatile() &&

      ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {

    SDNode *N = Op.getNode();

    SDLoc dl(N);


    SDValue Lo = DAG.getNode(

        ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),

        DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,

                              MVT::i32));

    SDValue Hi = DAG.getNode(

        ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),

        DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,

                              MVT::i32));


    return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),

                                   {ST->getChain(), Lo, Hi, ST->getBasePtr()},

                                   MemVT, ST->getMemOperand());

  } else if (Subtarget->hasMVEIntegerOps() &&

             ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||

               MemVT == MVT::v16i1))) {

    return LowerPredicateStore(Op, DAG);

  }


  return SDValue();

}


static bool isZeroVector(SDValue N) {

  return (ISD::isBuildVectorAllZeros(N.getNode()) ||

          (N->getOpcode() == ARMISD::VMOVIMM &&

           isNullConstant(N->getOperand(0))));

}


static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) {

  MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());

  MVT VT = Op.getSimpleValueType();

  SDValue Mask = N->getMask();

  SDValue PassThru = N->getPassThru();

  SDLoc dl(Op);


  if (isZeroVector(PassThru))

    return Op;


  // MVE Masked loads use zero as the passthru value. Here we convert undef to

  // zero too, and other values are lowered to a select.

  SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,

                                DAG.getTargetConstant(0, dl, MVT::i32));

  SDValue NewLoad = DAG.getMaskedLoad(

      VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,

      N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),

      N->getExtensionType(), N->isExpandingLoad());

  SDValue Combo = NewLoad;

  bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||

                             PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&

                            isZeroVector(PassThru->getOperand(0));

  if (!PassThru.isUndef() && !PassThruIsCastZero)

    Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);

  return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);

}


static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG,

                              const ARMSubtarget *ST) {

  if (!ST->hasMVEIntegerOps())

    return SDValue();


  SDLoc dl(Op);

  unsigned BaseOpcode = 0;

  switch (Op->getOpcode()) {

  default: llvm_unreachable("Expected VECREDUCE opcode");

  case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;

  case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;

  case ISD::VECREDUCE_MUL:  BaseOpcode = ISD::MUL; break;

  case ISD::VECREDUCE_AND:  BaseOpcode = ISD::AND; break;

  case ISD::VECREDUCE_OR:   BaseOpcode = ISD::OR; break;

  case ISD::VECREDUCE_XOR:  BaseOpcode = ISD::XOR; break;

  case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;

  case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;

  }


  SDValue Op0 = Op->getOperand(0);

  EVT VT = Op0.getValueType();

  EVT EltVT = VT.getVectorElementType();

  unsigned NumElts = VT.getVectorNumElements();

  unsigned NumActiveLanes = NumElts;


  assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||

          NumActiveLanes == 2) &&

         "Only expected a power 2 vector size");


  // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements

  // allows us to easily extract vector elements from the lanes.

  while (NumActiveLanes > 4) {

    unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;

    SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);

    Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);

    NumActiveLanes /= 2;

  }


  SDValue Res;

  if (NumActiveLanes == 4) {

    // The remaining 4 elements are summed sequentially

    SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,

                              DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));

    SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,

                              DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));

    SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,

                              DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));

    SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,

                              DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));

    SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());

    SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());

    Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());

  } else {

    SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,

                              DAG.getConstant(0, dl, MVT::i32));

    SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,

                              DAG.getConstant(1, dl, MVT::i32));

    Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());

  }


  // Result type may be wider than element type.

  if (EltVT != Op->getValueType(0))

    Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);

  return Res;

}


static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG,

                               const ARMSubtarget *ST) {

  if (!ST->hasMVEFloatOps())

    return SDValue();

  return LowerVecReduce(Op, DAG, ST);

}


static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG,

                                    const ARMSubtarget *ST) {

  if (!ST->hasNEON())

    return SDValue();


  SDLoc dl(Op);

  SDValue Op0 = Op->getOperand(0);

  EVT VT = Op0.getValueType();

  EVT EltVT = VT.getVectorElementType();


  unsigned PairwiseIntrinsic = 0;

  switch (Op->getOpcode()) {

  default:

    llvm_unreachable("Expected VECREDUCE opcode");

  case ISD::VECREDUCE_UMIN:

    PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;

    break;

  case ISD::VECREDUCE_UMAX:

    PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;

    break;

  case ISD::VECREDUCE_SMIN:

    PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;

    break;

  case ISD::VECREDUCE_SMAX:

    PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;

    break;

  }

  SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);


  unsigned NumElts = VT.getVectorNumElements();

  unsigned NumActiveLanes = NumElts;


  assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||

          NumActiveLanes == 2) &&

         "Only expected a power 2 vector size");


  // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.

  if (VT.is128BitVector()) {

    SDValue Lo, Hi;

    std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);

    VT = Lo.getValueType();

    Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});

    NumActiveLanes /= 2;

  }


  // Use pairwise reductions until one lane remains

  while (NumActiveLanes > 1) {

    Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});

    NumActiveLanes /= 2;

  }


  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,

                            DAG.getConstant(0, dl, MVT::i32));


  // Result type may be wider than element type.

  if (EltVT != Op.getValueType()) {

    unsigned Extend = 0;

    switch (Op->getOpcode()) {

    default:

      llvm_unreachable("Expected VECREDUCE opcode");

    case ISD::VECREDUCE_UMIN:

    case ISD::VECREDUCE_UMAX:

      Extend = ISD::ZERO_EXTEND;

      break;

    case ISD::VECREDUCE_SMIN:

    case ISD::VECREDUCE_SMAX:

      Extend = ISD::SIGN_EXTEND;

      break;

    }

    Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);

  }

  return Res;

}


static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {

  if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))

    // Acquire/Release load/store is not legal for targets without a dmb or

    // equivalent available.

    return SDValue();


  // Monotonic load/store is legal for all targets.

  return Op;

}


static void ReplaceREADCYCLECOUNTER(SDNode *N,

                                    SmallVectorImpl<SDValue> &Results,

                                    SelectionDAG &DAG,

                                    const ARMSubtarget *Subtarget) {

  SDLoc DL(N);

  // Under Power Management extensions, the cycle-count is:

  //    mrc p15, #0, <Rt>, c9, c13, #0

  SDValue Ops[] = { N->getOperand(0), // Chain

                    DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),

                    DAG.getTargetConstant(15, DL, MVT::i32),

                    DAG.getTargetConstant(0, DL, MVT::i32),

                    DAG.getTargetConstant(9, DL, MVT::i32),

                    DAG.getTargetConstant(13, DL, MVT::i32),

                    DAG.getTargetConstant(0, DL, MVT::i32)

  };


  SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,

                                 DAG.getVTList(MVT::i32, MVT::Other), Ops);

  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,

                                DAG.getConstant(0, DL, MVT::i32)));

  Results.push_back(Cycles32.getValue(1));

}


static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0,

                                      SDValue V1) {

  SDLoc dl(V0.getNode());

  SDValue RegClass =

      DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);

  SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);

  SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);

  const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};

  return SDValue(

      DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);

}


static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V) {

  SDLoc dl(V.getNode());

  auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);

  bool isBigEndian = DAG.getDataLayout().isBigEndian();

  if (isBigEndian)

    std::swap(VLo, VHi);

  return createGPRPairNode2xi32(DAG, VLo, VHi);

}


static void ReplaceCMP_SWAP_64Results(SDNode *N,

                                      SmallVectorImpl<SDValue> &Results,

                                      SelectionDAG &DAG) {

  assert(N->getValueType(0) == MVT::i64 &&

         "AtomicCmpSwap on types less than 64 should be legal");

  SDValue Ops[] = {

      createGPRPairNode2xi32(DAG, N->getOperand(1),

                             DAG.getUNDEF(MVT::i32)), // pointer, temp

      createGPRPairNodei64(DAG, N->getOperand(2)),    // expected

      createGPRPairNodei64(DAG, N->getOperand(3)),    // new

      N->getOperand(0),                               // chain in

  };

  SDNode *CmpSwap = DAG.getMachineNode(

      ARM::CMP_SWAP_64, SDLoc(N),

      DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);


  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();

  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});


  bool isBigEndian = DAG.getDataLayout().isBigEndian();


  SDValue Lo =

      DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,

                                 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));

  SDValue Hi =

      DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,

                                 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));

  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));

  Results.push_back(SDValue(CmpSwap, 2));

}


SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {

  SDLoc dl(Op);

  EVT VT = Op.getValueType();

  SDValue Chain = Op.getOperand(0);

  SDValue LHS = Op.getOperand(1);

  SDValue RHS = Op.getOperand(2);

  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();

  bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;


  // If we don't have instructions of this float type then soften to a libcall

  // and use SETCC instead.

  if (isUnsupportedFloatingType(LHS.getValueType())) {

    softenSetCCOperands(DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS,

                        Chain, IsSignaling);

    if (!RHS.getNode()) {

      RHS = DAG.getConstant(0, dl, LHS.getValueType());

      CC = ISD::SETNE;

    }

    SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,

                                 DAG.getCondCode(CC));

    return DAG.getMergeValues({Result, Chain}, dl);

  }


  ARMCC::CondCodes CondCode, CondCode2;

  FPCCToARMCC(CC, CondCode, CondCode2);


  SDValue True = DAG.getConstant(1, dl, VT);

  SDValue False =  DAG.getConstant(0, dl, VT);

  SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);

  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);

  SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);

  if (CondCode2 != ARMCC::AL) {

    ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);

    Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);

  }

  return DAG.getMergeValues({Result, Chain}, dl);

}


SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {

  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();


  EVT VT = getPointerTy(DAG.getDataLayout());

  int FI = MFI.CreateFixedObject(4, 0, false);

  return DAG.getFrameIndex(FI, VT);

}


SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,

                                           SelectionDAG &DAG) const {

  SDLoc DL(Op);

  MakeLibCallOptions CallOptions;

  MVT SVT = Op.getOperand(0).getSimpleValueType();

  RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, MVT::bf16);

  SDValue Res =

      makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;

  return DAG.getBitcast(MVT::i32, Res);

}


SDValue ARMTargetLowering::LowerCMP(SDValue Op, SelectionDAG &DAG) const {

  SDLoc dl(Op);

  SDValue LHS = Op.getOperand(0);

  SDValue RHS = Op.getOperand(1);


  // Determine if this is signed or unsigned comparison

  bool IsSigned = (Op.getOpcode() == ISD::SCMP);


  // Special case for Thumb1 UCMP only

  if (!IsSigned && Subtarget->isThumb1Only()) {

    // For Thumb unsigned comparison, use this sequence:

    // subs r2, r0, r1   ; r2 = LHS - RHS, sets flags

    // sbc r2, r2        ; r2 = r2 - r2 - !carry

    // cmp r1, r0        ; compare RHS with LHS

    // sbc r1, r1        ; r1 = r1 - r1 - !carry

    // subs r0, r2, r1   ; r0 = r2 - r1 (final result)


    // First subtraction: LHS - RHS

    SDValue Sub1WithFlags = DAG.getNode(

        ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);

    SDValue Sub1Result = Sub1WithFlags.getValue(0);

    SDValue Flags1 = Sub1WithFlags.getValue(1);


    // SUBE: Sub1Result - Sub1Result - !carry

    // This gives 0 if LHS >= RHS (unsigned), -1 if LHS < RHS (unsigned)

    SDValue Sbc1 =

        DAG.getNode(ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT),

                    Sub1Result, Sub1Result, Flags1);

    SDValue Sbc1Result = Sbc1.getValue(0);


    // Second comparison: RHS vs LHS (reverse comparison)

    SDValue CmpFlags = DAG.getNode(ARMISD::CMP, dl, FlagsVT, RHS, LHS);


    // SUBE: RHS - RHS - !carry

    // This gives 0 if RHS <= LHS (unsigned), -1 if RHS > LHS (unsigned)

    SDValue Sbc2 = DAG.getNode(

        ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, CmpFlags);

    SDValue Sbc2Result = Sbc2.getValue(0);


    // Final subtraction: Sbc1Result - Sbc2Result (no flags needed)

    SDValue Result =

        DAG.getNode(ISD::SUB, dl, MVT::i32, Sbc1Result, Sbc2Result);

    if (Op.getValueType() != MVT::i32)

      Result = DAG.getSExtOrTrunc(Result, dl, Op.getValueType());


    return Result;

  }


  // For the ARM assembly pattern:

  // subs r0, r0, r1   ; subtract RHS from LHS and set flags

  // movgt r0, #1      ; if LHS > RHS, set result to 1 (GT for signed, HI for

  // unsigned) mvnlt r0, #0      ; if LHS < RHS, set result to -1 (LT for

  // signed, LO for unsigned)

  //                   ; if LHS == RHS, result remains 0 from the subs


  // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC

  unsigned Opcode = ARMISD::SUBC;


  // Check if RHS is a subtraction against 0: (0 - X)

  if (RHS.getOpcode() == ISD::SUB) {

    SDValue SubLHS = RHS.getOperand(0);

    SDValue SubRHS = RHS.getOperand(1);


    // Check if it's 0 - X

    if (isNullConstant(SubLHS)) {

      bool CanUseAdd = false;

      if (IsSigned) {

        // For SCMP: only if X is known to never be INT_MIN (to avoid overflow)

        if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)

                                                      .getSignedMinValue()

                                                      .isMinSignedValue()) {

          CanUseAdd = true;

        }

      } else {

        // For UCMP: only if X is known to never be zero

        if (DAG.isKnownNeverZero(SubRHS)) {

          CanUseAdd = true;

        }

      }


      if (CanUseAdd) {

        Opcode = ARMISD::ADDC;

        RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of

                      // LHS - (0 - X)

      }

    }

  }


  // Generate the operation with flags

  SDValue OpWithFlags =

      DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);


  SDValue OpResult = OpWithFlags.getValue(0);

  SDValue Flags = OpWithFlags.getValue(1);


  // Constants for conditional moves

  SDValue One = DAG.getConstant(1, dl, MVT::i32);

  SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32);


  // Select condition codes based on signed vs unsigned

  ARMCC::CondCodes GTCond = IsSigned ? ARMCC::GT : ARMCC::HI;

  ARMCC::CondCodes LTCond = IsSigned ? ARMCC::LT : ARMCC::LO;


  // First conditional move: if greater than, set to 1

  SDValue GTCondValue = DAG.getConstant(GTCond, dl, MVT::i32);

  SDValue Result1 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One,

                                GTCondValue, Flags);


  // Second conditional move: if less than, set to -1

  SDValue LTCondValue = DAG.getConstant(LTCond, dl, MVT::i32);

  SDValue Result2 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne,

                                LTCondValue, Flags);


  if (Op.getValueType() != MVT::i32)

    Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());


  return Result2;

}


SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

  LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());

  switch (Op.getOpcode()) {

  default: llvm_unreachable("Don't know how to custom lower this!");

  case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);

  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);

  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);

  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);

  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);

  case ISD::SELECT:        return LowerSELECT(Op, DAG);

  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);

  case ISD::BRCOND:        return LowerBRCOND(Op, DAG);

  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);

  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);

  case ISD::VASTART:       return LowerVASTART(Op, DAG);

  case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);

  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);

  case ISD::SINT_TO_FP:

  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);

  case ISD::STRICT_FP_TO_SINT:

  case ISD::STRICT_FP_TO_UINT:

  case ISD::FP_TO_SINT:

  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);

  case ISD::FP_TO_SINT_SAT:

  case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);

  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);

  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);

  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);

  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);

  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);

  case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);

  case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);

  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,

                                                               Subtarget);

  case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG, Subtarget);

  case ISD::SHL:

  case ISD::SRL:

  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);

  case ISD::SREM:          return LowerREM(Op.getNode(), DAG);

  case ISD::UREM:          return LowerREM(Op.getNode(), DAG);

  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);

  case ISD::SRL_PARTS:

  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);

  case ISD::CTTZ:

  case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);

  case ISD::CTPOP:         return LowerCTPOP(Op.getNode(), DAG, Subtarget);

  case ISD::SETCC:         return LowerVSETCC(Op, DAG, Subtarget);

  case ISD::SETCCCARRY:    return LowerSETCCCARRY(Op, DAG);

  case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);

  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);

  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);

  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);

  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);

  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);

  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);

  case ISD::TRUNCATE:      return LowerTruncate(Op.getNode(), DAG, Subtarget);

  case ISD::SIGN_EXTEND:

  case ISD::ZERO_EXTEND:   return LowerVectorExtend(Op.getNode(), DAG, Subtarget);

  case ISD::GET_ROUNDING:  return LowerGET_ROUNDING(Op, DAG);

  case ISD::SET_ROUNDING:  return LowerSET_ROUNDING(Op, DAG);

  case ISD::SET_FPMODE:

    return LowerSET_FPMODE(Op, DAG);

  case ISD::RESET_FPMODE:

    return LowerRESET_FPMODE(Op, DAG);

  case ISD::MUL:           return LowerMUL(Op, DAG);

  case ISD::SDIV:

    if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())

      return LowerDIV_Windows(Op, DAG, /* Signed */ true);

    return LowerSDIV(Op, DAG, Subtarget);

  case ISD::UDIV:

    if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())

      return LowerDIV_Windows(Op, DAG, /* Signed */ false);

    return LowerUDIV(Op, DAG, Subtarget);

  case ISD::UADDO_CARRY:

  case ISD::USUBO_CARRY:

    return LowerUADDSUBO_CARRY(Op, DAG);

  case ISD::SADDO:

  case ISD::SSUBO:

    return LowerSignedALUO(Op, DAG);

  case ISD::UADDO:

  case ISD::USUBO:

    return LowerUnsignedALUO(Op, DAG);

  case ISD::SADDSAT:

  case ISD::SSUBSAT:

  case ISD::UADDSAT:

  case ISD::USUBSAT:

    return LowerADDSUBSAT(Op, DAG, Subtarget);

  case ISD::LOAD:

    return LowerPredicateLoad(Op, DAG);

  case ISD::STORE:

    return LowerSTORE(Op, DAG, Subtarget);

  case ISD::MLOAD:

    return LowerMLOAD(Op, DAG);

  case ISD::VECREDUCE_MUL:

  case ISD::VECREDUCE_AND:

  case ISD::VECREDUCE_OR:

  case ISD::VECREDUCE_XOR:

    return LowerVecReduce(Op, DAG, Subtarget);

  case ISD::VECREDUCE_FADD:

  case ISD::VECREDUCE_FMUL:

  case ISD::VECREDUCE_FMIN:

  case ISD::VECREDUCE_FMAX:

    return LowerVecReduceF(Op, DAG, Subtarget);

  case ISD::VECREDUCE_UMIN:

  case ISD::VECREDUCE_UMAX:

  case ISD::VECREDUCE_SMIN:

  case ISD::VECREDUCE_SMAX:

    return LowerVecReduceMinMax(Op, DAG, Subtarget);

  case ISD::ATOMIC_LOAD:

  case ISD::ATOMIC_STORE:

    return LowerAtomicLoadStore(Op, DAG);

  case ISD::SDIVREM:

  case ISD::UDIVREM:       return LowerDivRem(Op, DAG);

  case ISD::DYNAMIC_STACKALLOC:

    if (Subtarget->isTargetWindows())

      return LowerDYNAMIC_STACKALLOC(Op, DAG);

    llvm_unreachable("Don't know how to custom lower this!");

  case ISD::STRICT_FP_ROUND:

  case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);

  case ISD::STRICT_FP_EXTEND:

  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);

  case ISD::STRICT_FSETCC:

  case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);

  case ISD::SPONENTRY:

    return LowerSPONENTRY(Op, DAG);

  case ISD::FP_TO_BF16:

    return LowerFP_TO_BF16(Op, DAG);

  case ARMISD::WIN__DBZCHK: return SDValue();

  case ISD::UCMP:

  case ISD::SCMP:

    return LowerCMP(Op, DAG);

  case ISD::ABS:

    return LowerABS(Op, DAG);

  case ISD::STRICT_LROUND:

  case ISD::STRICT_LLROUND:

  case ISD::STRICT_LRINT:

  case ISD::STRICT_LLRINT: {

    assert((Op.getOperand(1).getValueType() == MVT::f16 ||

            Op.getOperand(1).getValueType() == MVT::bf16) &&

           "Expected custom lowering of rounding operations only for f16");

    SDLoc DL(Op);

    SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},

                              {Op.getOperand(0), Op.getOperand(1)});

    return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},

                       {Ext.getValue(1), Ext.getValue(0)});

  }

  }

}


static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,

                                 SelectionDAG &DAG) {

  unsigned IntNo = N->getConstantOperandVal(0);

  unsigned Opc = 0;

  if (IntNo == Intrinsic::arm_smlald)

    Opc = ARMISD::SMLALD;

  else if (IntNo == Intrinsic::arm_smlaldx)

    Opc = ARMISD::SMLALDX;

  else if (IntNo == Intrinsic::arm_smlsld)

    Opc = ARMISD::SMLSLD;

  else if (IntNo == Intrinsic::arm_smlsldx)

    Opc = ARMISD::SMLSLDX;

  else

    return;


  SDLoc dl(N);

  SDValue Lo, Hi;

  std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);


  SDValue LongMul = DAG.getNode(Opc, dl,

                                DAG.getVTList(MVT::i32, MVT::i32),

                                N->getOperand(1), N->getOperand(2),

                                Lo, Hi);

  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,

                                LongMul.getValue(0), LongMul.getValue(1)));

}


/// ReplaceNodeResults - Replace the results of node with an illegal result

/// type with new values built out of custom code.


void ARMTargetLowering::ReplaceNodeResults(SDNode *N,

                                           SmallVectorImpl<SDValue> &Results,

                                           SelectionDAG &DAG) const {

  SDValue Res;

  switch (N->getOpcode()) {

  default:

    llvm_unreachable("Don't know how to custom expand this!");

  case ISD::READ_REGISTER:

    ExpandREAD_REGISTER(N, Results, DAG);

    break;

  case ISD::BITCAST:

    Res = ExpandBITCAST(N, DAG, Subtarget);

    break;

  case ISD::SRL:

  case ISD::SRA:

  case ISD::SHL:

    Res = Expand64BitShift(N, DAG, Subtarget);

    break;

  case ISD::SREM:

  case ISD::UREM:

    Res = LowerREM(N, DAG);

    break;

  case ISD::SDIVREM:

  case ISD::UDIVREM:

    Res = LowerDivRem(SDValue(N, 0), DAG);

    assert(Res.getNumOperands() == 2 && "DivRem needs two values");

    Results.push_back(Res.getValue(0));

    Results.push_back(Res.getValue(1));

    return;

  case ISD::SADDSAT:

  case ISD::SSUBSAT:

  case ISD::UADDSAT:

  case ISD::USUBSAT:

    Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);

    break;

  case ISD::READCYCLECOUNTER:

    ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);

    return;

  case ISD::UDIV:

  case ISD::SDIV:

    assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");

    return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,

                             Results);

  case ISD::ATOMIC_CMP_SWAP:

    ReplaceCMP_SWAP_64Results(N, Results, DAG);

    return;

  case ISD::INTRINSIC_WO_CHAIN:

    return ReplaceLongIntrinsic(N, Results, DAG);

  case ISD::LOAD:

    LowerLOAD(N, Results, DAG);

    break;

  case ISD::TRUNCATE:

    Res = LowerTruncate(N, DAG, Subtarget);

    break;

  case ISD::SIGN_EXTEND:

  case ISD::ZERO_EXTEND:

    Res = LowerVectorExtend(N, DAG, Subtarget);

    break;

  case ISD::FP_TO_SINT_SAT:

  case ISD::FP_TO_UINT_SAT:

    Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);

    break;

  }

  if (Res.getNode())

    Results.push_back(Res);

}


//===----------------------------------------------------------------------===//

//                           ARM Scheduler Hooks

//===----------------------------------------------------------------------===//


/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and

/// registers the function context.

void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,

                                               MachineBasicBlock *MBB,

                                               MachineBasicBlock *DispatchBB,

                                               int FI) const {

  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&

         "ROPI/RWPI not currently supported with SjLj");

  const TargetInstrInfo *TII = Subtarget->getInstrInfo();

  DebugLoc dl = MI.getDebugLoc();

  MachineFunction *MF = MBB->getParent();

  MachineRegisterInfo *MRI = &MF->getRegInfo();

  MachineConstantPool *MCP = MF->getConstantPool();

  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();

  const Function &F = MF->getFunction();


  bool isThumb = Subtarget->isThumb();

  bool isThumb2 = Subtarget->isThumb2();


  unsigned PCLabelId = AFI->createPICLabelUId();

  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;

  ARMConstantPoolValue *CPV =

    ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);

  unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));


  const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass

                                           : &ARM::GPRRegClass;


  // Grab constant pool and fixed stack memory operands.

  MachineMemOperand *CPMMO =

      MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),

                               MachineMemOperand::MOLoad, 4, Align(4));


  MachineMemOperand *FIMMOSt =

      MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),

                               MachineMemOperand::MOStore, 4, Align(4));


  // Load the address of the dispatch MBB into the jump buffer.

  if (isThumb2) {

    // Incoming value: jbuf

    //   ldr.n  r5, LCPI1_1

    //   orr    r5, r5, #1

    //   add    r5, pc

    //   str    r5, [$jbuf, #+4] ; &jbuf[1]

    Register NewVReg1 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)

        .addConstantPoolIndex(CPI)

        .addMemOperand(CPMMO)

        .add(predOps(ARMCC::AL));

    // Set the low bit because of thumb mode.

    Register NewVReg2 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)

        .addReg(NewVReg1, RegState::Kill)

        .addImm(0x01)

        .add(predOps(ARMCC::AL))

        .add(condCodeOp());

    Register NewVReg3 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)

      .addReg(NewVReg2, RegState::Kill)

      .addImm(PCLabelId);

    BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))

        .addReg(NewVReg3, RegState::Kill)

        .addFrameIndex(FI)

        .addImm(36) // &jbuf[1] :: pc

        .addMemOperand(FIMMOSt)

        .add(predOps(ARMCC::AL));

  } else if (isThumb) {

    // Incoming value: jbuf

    //   ldr.n  r1, LCPI1_4

    //   add    r1, pc

    //   mov    r2, #1

    //   orrs   r1, r2

    //   add    r2, $jbuf, #+4 ; &jbuf[1]

    //   str    r1, [r2]

    Register NewVReg1 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)

        .addConstantPoolIndex(CPI)

        .addMemOperand(CPMMO)

        .add(predOps(ARMCC::AL));

    Register NewVReg2 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)

      .addReg(NewVReg1, RegState::Kill)

      .addImm(PCLabelId);

    // Set the low bit because of thumb mode.

    Register NewVReg3 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)

        .addReg(ARM::CPSR, RegState::Define)

        .addImm(1)

        .add(predOps(ARMCC::AL));

    Register NewVReg4 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)

        .addReg(ARM::CPSR, RegState::Define)

        .addReg(NewVReg2, RegState::Kill)

        .addReg(NewVReg3, RegState::Kill)

        .add(predOps(ARMCC::AL));

    Register NewVReg5 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)

            .addFrameIndex(FI)

            .addImm(36); // &jbuf[1] :: pc

    BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))

        .addReg(NewVReg4, RegState::Kill)

        .addReg(NewVReg5, RegState::Kill)

        .addImm(0)

        .addMemOperand(FIMMOSt)

        .add(predOps(ARMCC::AL));

  } else {

    // Incoming value: jbuf

    //   ldr  r1, LCPI1_1

    //   add  r1, pc, r1

    //   str  r1, [$jbuf, #+4] ; &jbuf[1]

    Register NewVReg1 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)

        .addConstantPoolIndex(CPI)

        .addImm(0)

        .addMemOperand(CPMMO)

        .add(predOps(ARMCC::AL));

    Register NewVReg2 = MRI->createVirtualRegister(TRC);

    BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)

        .addReg(NewVReg1, RegState::Kill)

        .addImm(PCLabelId)

        .add(predOps(ARMCC::AL));

    BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))

        .addReg(NewVReg2, RegState::Kill)

        .addFrameIndex(FI)

        .addImm(36) // &jbuf[1] :: pc

        .addMemOperand(FIMMOSt)

        .add(predOps(ARMCC::AL));

  }

}


void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,

                                              MachineBasicBlock *MBB) const {

  const TargetInstrInfo *TII = Subtarget->getInstrInfo();

  DebugLoc dl = MI.getDebugLoc();

  MachineFunction *MF = MBB->getParent();

  MachineRegisterInfo *MRI = &MF->getRegInfo();

  MachineFrameInfo &MFI = MF->getFrameInfo();

  int FI = MFI.getFunctionContextIndex();


  const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass

                                                        : &ARM::GPRnopcRegClass;


  // Get a mapping of the call site numbers to all of the landing pads they're

  // associated with.

  DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;

  unsigned MaxCSNum = 0;

  for (MachineBasicBlock &BB : *MF) {

    if (!BB.isEHPad())

      continue;


    // FIXME: We should assert that the EH_LABEL is the first MI in the landing

    // pad.

    for (MachineInstr &II : BB) {

      if (!II.isEHLabel())

        continue;


      MCSymbol *Sym = II.getOperand(0).getMCSymbol();

      if (!MF->hasCallSiteLandingPad(Sym)) continue;


      SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);

      for (unsigned Idx : CallSiteIdxs) {

        CallSiteNumToLPad[Idx].push_back(&BB);

        MaxCSNum = std::max(MaxCSNum, Idx);

      }

      break;

    }

  }


  // Get an ordered list of the machine basic blocks for the jump table.

  std::vector<MachineBasicBlock*> LPadList;

  SmallPtrSet<MachineBasicBlock*, 32> InvokeBBs;

  LPadList.reserve(CallSiteNumToLPad.size());

  for (unsigned I = 1; I <= MaxCSNum; ++I) {

    SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];

    for (MachineBasicBlock *MBB : MBBList) {

      LPadList.push_back(MBB);

      InvokeBBs.insert_range(MBB->predecessors());

    }

  }


  assert(!LPadList.empty() &&

         "No landing pad destinations for the dispatch jump table!");


  // Create the jump table and associated information.

  MachineJumpTableInfo *JTI =

    MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);

  unsigned MJTI = JTI->createJumpTableIndex(LPadList);


  // Create the MBBs for the dispatch code.


  // Shove the dispatch's address into the return slot in the function context.

  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();

  DispatchBB->setIsEHPad();


  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();


  BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));

  DispatchBB->addSuccessor(TrapBB);


  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();

  DispatchBB->addSuccessor(DispContBB);


  // Insert and MBBs.

  MF->insert(MF->end(), DispatchBB);

  MF->insert(MF->end(), DispContBB);

  MF->insert(MF->end(), TrapBB);


  // Insert code into the entry block that creates and registers the function

  // context.

  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);


  MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(

      MachinePointerInfo::getFixedStack(*MF, FI),

      MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, Align(4));


  MachineInstrBuilder MIB;

  MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));


  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);

  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();


  // Add a register mask with no preserved registers.  This results in all

  // registers being marked as clobbered. This can't work if the dispatch block

  // is in a Thumb1 function and is linked with ARM code which uses the FP

  // registers, as there is no way to preserve the FP registers in Thumb1 mode.

  MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF));


  bool IsPositionIndependent = isPositionIndependent();

  unsigned NumLPads = LPadList.size();

  if (Subtarget->isThumb2()) {

    Register NewVReg1 = MRI->createVirtualRegister(TRC);

    BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)

        .addFrameIndex(FI)

        .addImm(4)

        .addMemOperand(FIMMOLd)

        .add(predOps(ARMCC::AL));


    if (NumLPads < 256) {

      BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))

          .addReg(NewVReg1)

          .addImm(LPadList.size())

          .add(predOps(ARMCC::AL));

    } else {

      Register VReg1 = MRI->createVirtualRegister(TRC);

      BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)

          .addImm(NumLPads & 0xFFFF)

          .add(predOps(ARMCC::AL));


      unsigned VReg2 = VReg1;

      if ((NumLPads & 0xFFFF0000) != 0) {

        VReg2 = MRI->createVirtualRegister(TRC);

        BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)

            .addReg(VReg1)

            .addImm(NumLPads >> 16)

            .add(predOps(ARMCC::AL));

      }


      BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))

          .addReg(NewVReg1)

          .addReg(VReg2)

          .add(predOps(ARMCC::AL));

    }


    BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))

      .addMBB(TrapBB)

      .addImm(ARMCC::HI)

      .addReg(ARM::CPSR);


    Register NewVReg3 = MRI->createVirtualRegister(TRC);

    BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)

        .addJumpTableIndex(MJTI)

        .add(predOps(ARMCC::AL));


    Register NewVReg4 = MRI->createVirtualRegister(TRC);

    BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)

        .addReg(NewVReg3, RegState::Kill)

        .addReg(NewVReg1)

        .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))

        .add(predOps(ARMCC::AL))

        .add(condCodeOp());


    BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))

      .addReg(NewVReg4, RegState::Kill)

      .addReg(NewVReg1)

      .addJumpTableIndex(MJTI);

  } else if (Subtarget->isThumb()) {

    Register NewVReg1 = MRI->createVirtualRegister(TRC);

    BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)

        .addFrameIndex(FI)

        .addImm(1)

        .addMemOperand(FIMMOLd)

        .add(predOps(ARMCC::AL));


    if (NumLPads < 256) {

      BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))

          .addReg(NewVReg1)

          .addImm(NumLPads)

          .add(predOps(ARMCC::AL));

    } else {

      MachineConstantPool *ConstantPool = MF->getConstantPool();

      Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());

      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);


      // MachineConstantPool wants an explicit alignment.

      Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);

      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);


      Register VReg1 = MRI->createVirtualRegister(TRC);

      BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))

          .addReg(VReg1, RegState::Define)

          .addConstantPoolIndex(Idx)

          .add(predOps(ARMCC::AL));

      BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))

          .addReg(NewVReg1)

          .addReg(VReg1)

          .add(predOps(ARMCC::AL));

    }


    BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))

      .addMBB(TrapBB)

      .addImm(ARMCC::HI)

      .addReg(ARM::CPSR);


    Register NewVReg2 = MRI->createVirtualRegister(TRC);

    BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)

        .addReg(ARM::CPSR, RegState::Define)

        .addReg(NewVReg1)

        .addImm(2)

        .add(predOps(ARMCC::AL));


    Register NewVReg3 = MRI->createVirtualRegister(TRC);

    BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)

        .addJumpTableIndex(MJTI)

        .add(predOps(ARMCC::AL));


    Register NewVReg4 = MRI->createVirtualRegister(TRC);

    BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)

        .addReg(ARM::CPSR, RegState::Define)

        .addReg(NewVReg2, RegState::Kill)

        .addReg(NewVReg3)

        .add(predOps(ARMCC::AL));


    MachineMemOperand *JTMMOLd =

        MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),

                                 MachineMemOperand::MOLoad, 4, Align(4));


    Register NewVReg5 = MRI->createVirtualRegister(TRC);

    BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)

        .addReg(NewVReg4, RegState::Kill)

        .addImm(0)

        .addMemOperand(JTMMOLd)

        .add(predOps(ARMCC::AL));


    unsigned NewVReg6 = NewVReg5;

    if (IsPositionIndependent) {

      NewVReg6 = MRI->createVirtualRegister(TRC);

      BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)

          .addReg(ARM::CPSR, RegState::Define)

          .addReg(NewVReg5, RegState::Kill)

          .addReg(NewVReg3)

          .add(predOps(ARMCC::AL));

    }


    BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))

      .addReg(NewVReg6, RegState::Kill)

      .addJumpTableIndex(MJTI);

  } else {

    Register NewVReg1 = MRI->createVirtualRegister(TRC);

    BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)

        .addFrameIndex(FI)

        .addImm(4)

        .addMemOperand(FIMMOLd)

        .add(predOps(ARMCC::AL));


    if (NumLPads < 256) {

      BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))

          .addReg(NewVReg1)

          .addImm(NumLPads)

          .add(predOps(ARMCC::AL));

    } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {

      Register VReg1 = MRI->createVirtualRegister(TRC);

      BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)

          .addImm(NumLPads & 0xFFFF)

          .add(predOps(ARMCC::AL));


      unsigned VReg2 = VReg1;

      if ((NumLPads & 0xFFFF0000) != 0) {

        VReg2 = MRI->createVirtualRegister(TRC);

        BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)

            .addReg(VReg1)

            .addImm(NumLPads >> 16)

            .add(predOps(ARMCC::AL));

      }


      BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))

          .addReg(NewVReg1)

          .addReg(VReg2)

          .add(predOps(ARMCC::AL));

    } else {

      MachineConstantPool *ConstantPool = MF->getConstantPool();

      Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());

      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);


      // MachineConstantPool wants an explicit alignment.

      Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);

      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);


      Register VReg1 = MRI->createVirtualRegister(TRC);

      BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))

          .addReg(VReg1, RegState::Define)

          .addConstantPoolIndex(Idx)

          .addImm(0)

          .add(predOps(ARMCC::AL));

      BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))

          .addReg(NewVReg1)

          .addReg(VReg1, RegState::Kill)

          .add(predOps(ARMCC::AL));

    }


    BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))

      .addMBB(TrapBB)

      .addImm(ARMCC::HI)

      .addReg(ARM::CPSR);


    Register NewVReg3 = MRI->createVirtualRegister(TRC);

    BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)

        .addReg(NewVReg1)

        .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))

        .add(predOps(ARMCC::AL))

        .add(condCodeOp());

    Register NewVReg4 = MRI->createVirtualRegister(TRC);

    BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)

        .addJumpTableIndex(MJTI)

        .add(predOps(ARMCC::AL));


    MachineMemOperand *JTMMOLd =

        MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),

                                 MachineMemOperand::MOLoad, 4, Align(4));

    Register NewVReg5 = MRI->createVirtualRegister(TRC);

    BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)

        .addReg(NewVReg3, RegState::Kill)

        .addReg(NewVReg4)

        .addImm(0)

        .addMemOperand(JTMMOLd)

        .add(predOps(ARMCC::AL));


    if (IsPositionIndependent) {

      BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))

        .addReg(NewVReg5, RegState::Kill)

        .addReg(NewVReg4)

        .addJumpTableIndex(MJTI);

    } else {

      BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))

        .addReg(NewVReg5, RegState::Kill)

        .addJumpTableIndex(MJTI);

    }

  }


  // Add the jump table entries as successors to the MBB.

  SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;

  for (MachineBasicBlock *CurMBB : LPadList) {

    if (SeenMBBs.insert(CurMBB).second)

      DispContBB->addSuccessor(CurMBB);

  }


  // N.B. the order the invoke BBs are processed in doesn't matter here.

  const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);

  SmallVector<MachineBasicBlock*, 64> MBBLPads;

  for (MachineBasicBlock *BB : InvokeBBs) {


    // Remove the landing pad successor from the invoke block and replace it

    // with the new dispatch block.

    SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());

    while (!Successors.empty()) {

      MachineBasicBlock *SMBB = Successors.pop_back_val();

      if (SMBB->isEHPad()) {

        BB->removeSuccessor(SMBB);

        MBBLPads.push_back(SMBB);

      }

    }


    BB->addSuccessor(DispatchBB, BranchProbability::getZero());

    BB->normalizeSuccProbs();


    // Find the invoke call and mark all of the callee-saved registers as

    // 'implicit defined' so that they're spilled. This prevents code from

    // moving instructions to before the EH block, where they will never be

    // executed.

    for (MachineBasicBlock::reverse_iterator

           II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {

      if (!II->isCall()) continue;


      DenseSet<unsigned> DefRegs;

      for (MachineInstr::mop_iterator

             OI = II->operands_begin(), OE = II->operands_end();

           OI != OE; ++OI) {

        if (!OI->isReg()) continue;

        DefRegs.insert(OI->getReg());

      }


      MachineInstrBuilder MIB(*MF, &*II);


      for (unsigned i = 0; SavedRegs[i] != 0; ++i) {

        unsigned Reg = SavedRegs[i];

        if (Subtarget->isThumb2() &&

            !ARM::tGPRRegClass.contains(Reg) &&

            !ARM::hGPRRegClass.contains(Reg))

          continue;

        if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))

          continue;

        if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))

          continue;

        if (!DefRegs.contains(Reg))

          MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);

      }


      break;

    }

  }


  // Mark all former landing pads as non-landing pads. The dispatch is the only

  // landing pad now.

  for (MachineBasicBlock *MBBLPad : MBBLPads)

    MBBLPad->setIsEHPad(false);


  // The instruction is gone now.

  MI.eraseFromParent();

}


static


MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {

  for (MachineBasicBlock *S : MBB->successors())

    if (S != Succ)

      return S;

  llvm_unreachable("Expecting a BB with two successors!");

}


/// Return the load opcode for a given load size. If load size >= 8,

/// neon opcode will be returned.


static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {

  if (LdSize >= 8)

    return LdSize == 16 ? ARM::VLD1q32wb_fixed

                        : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;

  if (IsThumb1)

    return LdSize == 4 ? ARM::tLDRi

                       : LdSize == 2 ? ARM::tLDRHi

                                     : LdSize == 1 ? ARM::tLDRBi : 0;

  if (IsThumb2)

    return LdSize == 4 ? ARM::t2LDR_POST

                       : LdSize == 2 ? ARM::t2LDRH_POST

                                     : LdSize == 1 ? ARM::t2LDRB_POST : 0;

  return LdSize == 4 ? ARM::LDR_POST_IMM

                     : LdSize == 2 ? ARM::LDRH_POST

                                   : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;

}


/// Return the store opcode for a given store size. If store size >= 8,

/// neon opcode will be returned.


static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {

  if (StSize >= 8)

    return StSize == 16 ? ARM::VST1q32wb_fixed

                        : StSize == 8 ? ARM::VST1d32wb_fixed : 0;

  if (IsThumb1)

    return StSize == 4 ? ARM::tSTRi

                       : StSize == 2 ? ARM::tSTRHi

                                     : StSize == 1 ? ARM::tSTRBi : 0;

  if (IsThumb2)

    return StSize == 4 ? ARM::t2STR_POST

                       : StSize == 2 ? ARM::t2STRH_POST

                                     : StSize == 1 ? ARM::t2STRB_POST : 0;

  return StSize == 4 ? ARM::STR_POST_IMM

                     : StSize == 2 ? ARM::STRH_POST

                                   : StSize == 1 ? ARM::STRB_POST_IMM : 0;

}


/// Emit a post-increment load operation with given size. The instructions

/// will be added to BB at Pos.


static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,

                       const TargetInstrInfo *TII, const DebugLoc &dl,

                       unsigned LdSize, unsigned Data, unsigned AddrIn,

                       unsigned AddrOut, bool IsThumb1, bool IsThumb2) {

  unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);

  assert(LdOpc != 0 && "Should have a load opcode");

  if (LdSize >= 8) {

    BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)

        .addReg(AddrOut, RegState::Define)

        .addReg(AddrIn)

        .addImm(0)

        .add(predOps(ARMCC::AL));

  } else if (IsThumb1) {

    // load + update AddrIn

    BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)

        .addReg(AddrIn)

        .addImm(0)

        .add(predOps(ARMCC::AL));

    BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)

        .add(t1CondCodeOp())

        .addReg(AddrIn)

        .addImm(LdSize)

        .add(predOps(ARMCC::AL));

  } else if (IsThumb2) {

    BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)

        .addReg(AddrOut, RegState::Define)

        .addReg(AddrIn)

        .addImm(LdSize)

        .add(predOps(ARMCC::AL));

  } else { // arm

    BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)

        .addReg(AddrOut, RegState::Define)

        .addReg(AddrIn)

        .addReg(0)

        .addImm(LdSize)

        .add(predOps(ARMCC::AL));

  }

}


/// Emit a post-increment store operation with given size. The instructions

/// will be added to BB at Pos.


static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,

                       const TargetInstrInfo *TII, const DebugLoc &dl,

                       unsigned StSize, unsigned Data, unsigned AddrIn,

                       unsigned AddrOut, bool IsThumb1, bool IsThumb2) {

  unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);

  assert(StOpc != 0 && "Should have a store opcode");

  if (StSize >= 8) {

    BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)

        .addReg(AddrIn)

        .addImm(0)

        .addReg(Data)

        .add(predOps(ARMCC::AL));

  } else if (IsThumb1) {

    // store + update AddrIn

    BuildMI(*BB, Pos, dl, TII->get(StOpc))

        .addReg(Data)

        .addReg(AddrIn)

        .addImm(0)

        .add(predOps(ARMCC::AL));

    BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)

        .add(t1CondCodeOp())

        .addReg(AddrIn)

        .addImm(StSize)

        .add(predOps(ARMCC::AL));

  } else if (IsThumb2) {

    BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)

        .addReg(Data)

        .addReg(AddrIn)

        .addImm(StSize)

        .add(predOps(ARMCC::AL));

  } else { // arm

    BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)

        .addReg(Data)

        .addReg(AddrIn)

        .addReg(0)

        .addImm(StSize)

        .add(predOps(ARMCC::AL));

  }

}


MachineBasicBlock *

ARMTargetLowering::EmitStructByval(MachineInstr &MI,

                                   MachineBasicBlock *BB) const {

  // This pseudo instruction has 3 operands: dst, src, size

  // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().

  // Otherwise, we will generate unrolled scalar copies.

  const TargetInstrInfo *TII = Subtarget->getInstrInfo();

  const BasicBlock *LLVM_BB = BB->getBasicBlock();

  MachineFunction::iterator It = ++BB->getIterator();


  Register dest = MI.getOperand(0).getReg();

  Register src = MI.getOperand(1).getReg();

  unsigned SizeVal = MI.getOperand(2).getImm();

  unsigned Alignment = MI.getOperand(3).getImm();

  DebugLoc dl = MI.getDebugLoc();


  MachineFunction *MF = BB->getParent();

  MachineRegisterInfo &MRI = MF->getRegInfo();

  unsigned UnitSize = 0;

  const TargetRegisterClass *TRC = nullptr;

  const TargetRegisterClass *VecTRC = nullptr;


  bool IsThumb1 = Subtarget->isThumb1Only();

  bool IsThumb2 = Subtarget->isThumb2();

  bool IsThumb = Subtarget->isThumb();


  if (Alignment & 1) {

    UnitSize = 1;

  } else if (Alignment & 2) {

    UnitSize = 2;

  } else {

    // Check whether we can use NEON instructions.

    if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&

        Subtarget->hasNEON()) {

      if ((Alignment % 16 == 0) && SizeVal >= 16)

        UnitSize = 16;

      else if ((Alignment % 8 == 0) && SizeVal >= 8)

        UnitSize = 8;

    }

    // Can't use NEON instructions.

    if (UnitSize == 0)

      UnitSize = 4;

  }


  // Select the correct opcode and register class for unit size load/store

  bool IsNeon = UnitSize >= 8;

  TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;

  if (IsNeon)

    VecTRC = UnitSize == 16 ? &ARM::DPairRegClass

                            : UnitSize == 8 ? &ARM::DPRRegClass

                                            : nullptr;


  unsigned BytesLeft = SizeVal % UnitSize;

  unsigned LoopSize = SizeVal - BytesLeft;


  if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {

    // Use LDR and STR to copy.

    // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)

    // [destOut] = STR_POST(scratch, destIn, UnitSize)

    unsigned srcIn = src;

    unsigned destIn = dest;

    for (unsigned i = 0; i < LoopSize; i+=UnitSize) {

      Register srcOut = MRI.createVirtualRegister(TRC);

      Register destOut = MRI.createVirtualRegister(TRC);

      Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);

      emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,

                 IsThumb1, IsThumb2);

      emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,

                 IsThumb1, IsThumb2);

      srcIn = srcOut;

      destIn = destOut;

    }


    // Handle the leftover bytes with LDRB and STRB.

    // [scratch, srcOut] = LDRB_POST(srcIn, 1)

    // [destOut] = STRB_POST(scratch, destIn, 1)

    for (unsigned i = 0; i < BytesLeft; i++) {

      Register srcOut = MRI.createVirtualRegister(TRC);

      Register destOut = MRI.createVirtualRegister(TRC);

      Register scratch = MRI.createVirtualRegister(TRC);

      emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,

                 IsThumb1, IsThumb2);

      emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,

                 IsThumb1, IsThumb2);

      srcIn = srcOut;

      destIn = destOut;

    }

    MI.eraseFromParent(); // The instruction is gone now.

    return BB;

  }


  // Expand the pseudo op to a loop.

  // thisMBB:

  //   ...

  //   movw varEnd, # --> with thumb2

  //   movt varEnd, #

  //   ldrcp varEnd, idx --> without thumb2

  //   fallthrough --> loopMBB

  // loopMBB:

  //   PHI varPhi, varEnd, varLoop

  //   PHI srcPhi, src, srcLoop

  //   PHI destPhi, dst, destLoop

  //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)

  //   [destLoop] = STR_POST(scratch, destPhi, UnitSize)

  //   subs varLoop, varPhi, #UnitSize

  //   bne loopMBB

  //   fallthrough --> exitMBB

  // exitMBB:

  //   epilogue to handle left-over bytes

  //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)

  //   [destOut] = STRB_POST(scratch, destLoop, 1)

  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);

  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);

  MF->insert(It, loopMBB);

  MF->insert(It, exitMBB);


  // Set the call frame size on entry to the new basic blocks.

  unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);

  loopMBB->setCallFrameSize(CallFrameSize);

  exitMBB->setCallFrameSize(CallFrameSize);


  // Transfer the remainder of BB and its successor edges to exitMBB.

  exitMBB->splice(exitMBB->begin(), BB,

                  std::next(MachineBasicBlock::iterator(MI)), BB->end());

  exitMBB->transferSuccessorsAndUpdatePHIs(BB);


  // Load an immediate to varEnd.

  Register varEnd = MRI.createVirtualRegister(TRC);

  if (Subtarget->useMovt()) {

    BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),

            varEnd)

        .addImm(LoopSize);

  } else if (Subtarget->genExecuteOnly()) {

    assert(IsThumb && "Non-thumb expected to have used movt");

    BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);

  } else {

    MachineConstantPool *ConstantPool = MF->getConstantPool();

    Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());

    const Constant *C = ConstantInt::get(Int32Ty, LoopSize);


    // MachineConstantPool wants an explicit alignment.

    Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);

    unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);

    MachineMemOperand *CPMMO =

        MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),

                                 MachineMemOperand::MOLoad, 4, Align(4));


    if (IsThumb)

      BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))

          .addReg(varEnd, RegState::Define)

          .addConstantPoolIndex(Idx)

          .add(predOps(ARMCC::AL))

          .addMemOperand(CPMMO);

    else

      BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))

          .addReg(varEnd, RegState::Define)

          .addConstantPoolIndex(Idx)

          .addImm(0)

          .add(predOps(ARMCC::AL))

          .addMemOperand(CPMMO);

  }

  BB->addSuccessor(loopMBB);


  // Generate the loop body:

  //   varPhi = PHI(varLoop, varEnd)

  //   srcPhi = PHI(srcLoop, src)

  //   destPhi = PHI(destLoop, dst)

  MachineBasicBlock *entryBB = BB;

  BB = loopMBB;

  Register varLoop = MRI.createVirtualRegister(TRC);

  Register varPhi = MRI.createVirtualRegister(TRC);

  Register srcLoop = MRI.createVirtualRegister(TRC);

  Register srcPhi = MRI.createVirtualRegister(TRC);

  Register destLoop = MRI.createVirtualRegister(TRC);

  Register destPhi = MRI.createVirtualRegister(TRC);


  BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)

    .addReg(varLoop).addMBB(loopMBB)

    .addReg(varEnd).addMBB(entryBB);

  BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)

    .addReg(srcLoop).addMBB(loopMBB)

    .addReg(src).addMBB(entryBB);

  BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)

    .addReg(destLoop).addMBB(loopMBB)

    .addReg(dest).addMBB(entryBB);


  //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)

  //   [destLoop] = STR_POST(scratch, destPhi, UnitSiz)

  Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);

  emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,

             IsThumb1, IsThumb2);

  emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,

             IsThumb1, IsThumb2);


  // Decrement loop variable by UnitSize.

  if (IsThumb1) {

    BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)

        .add(t1CondCodeOp())

        .addReg(varPhi)

        .addImm(UnitSize)

        .add(predOps(ARMCC::AL));

  } else {

    MachineInstrBuilder MIB =

        BuildMI(*BB, BB->end(), dl,

                TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);

    MIB.addReg(varPhi)

        .addImm(UnitSize)

        .add(predOps(ARMCC::AL))

        .add(condCodeOp());

    MIB->getOperand(5).setReg(ARM::CPSR);

    MIB->getOperand(5).setIsDef(true);

  }

  BuildMI(*BB, BB->end(), dl,

          TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))

      .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);


  // loopMBB can loop back to loopMBB or fall through to exitMBB.

  BB->addSuccessor(loopMBB);

  BB->addSuccessor(exitMBB);


  // Add epilogue to handle BytesLeft.

  BB = exitMBB;

  auto StartOfExit = exitMBB->begin();


  //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)

  //   [destOut] = STRB_POST(scratch, destLoop, 1)

  unsigned srcIn = srcLoop;

  unsigned destIn = destLoop;

  for (unsigned i = 0; i < BytesLeft; i++) {

    Register srcOut = MRI.createVirtualRegister(TRC);

    Register destOut = MRI.createVirtualRegister(TRC);

    Register scratch = MRI.createVirtualRegister(TRC);

    emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,

               IsThumb1, IsThumb2);

    emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,

               IsThumb1, IsThumb2);

    srcIn = srcOut;

    destIn = destOut;

  }


  MI.eraseFromParent(); // The instruction is gone now.

  return BB;

}


MachineBasicBlock *

ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,

                                       MachineBasicBlock *MBB) const {

  const TargetMachine &TM = getTargetMachine();

  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();

  DebugLoc DL = MI.getDebugLoc();


  assert(Subtarget->isTargetWindows() &&

         "__chkstk is only supported on Windows");

  assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");


  // __chkstk takes the number of words to allocate on the stack in R4, and

  // returns the stack adjustment in number of bytes in R4.  This will not

  // clober any other registers (other than the obvious lr).

  //

  // Although, technically, IP should be considered a register which may be

  // clobbered, the call itself will not touch it.  Windows on ARM is a pure

  // thumb-2 environment, so there is no interworking required.  As a result, we

  // do not expect a veneer to be emitted by the linker, clobbering IP.

  //

  // Each module receives its own copy of __chkstk, so no import thunk is

  // required, again, ensuring that IP is not clobbered.

  //

  // Finally, although some linkers may theoretically provide a trampoline for

  // out of range calls (which is quite common due to a 32M range limitation of

  // branches for Thumb), we can generate the long-call version via

  // -mcmodel=large, alleviating the need for the trampoline which may clobber

  // IP.


  switch (TM.getCodeModel()) {

  case CodeModel::Tiny:

    llvm_unreachable("Tiny code model not available on ARM.");

  case CodeModel::Small:

  case CodeModel::Medium:

  case CodeModel::Kernel:

    BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))

        .add(predOps(ARMCC::AL))

        .addExternalSymbol("__chkstk")

        .addReg(ARM::R4, RegState::Implicit | RegState::Kill)

        .addReg(ARM::R4, RegState::Implicit | RegState::Define)

        .addReg(ARM::R12,

                RegState::Implicit | RegState::Define | RegState::Dead)

        .addReg(ARM::CPSR,

                RegState::Implicit | RegState::Define | RegState::Dead);

    break;

  case CodeModel::Large: {

    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();

    Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);


    BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)

      .addExternalSymbol("__chkstk");

    BuildMI(*MBB, MI, DL, TII.get(gettBLXrOpcode(*MBB->getParent())))

        .add(predOps(ARMCC::AL))

        .addReg(Reg, RegState::Kill)

        .addReg(ARM::R4, RegState::Implicit | RegState::Kill)

        .addReg(ARM::R4, RegState::Implicit | RegState::Define)

        .addReg(ARM::R12,

                RegState::Implicit | RegState::Define | RegState::Dead)

        .addReg(ARM::CPSR,

                RegState::Implicit | RegState::Define | RegState::Dead);

    break;

  }

  }


  BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)

      .addReg(ARM::SP, RegState::Kill)

      .addReg(ARM::R4, RegState::Kill)

      .setMIFlags(MachineInstr::FrameSetup)

      .add(predOps(ARMCC::AL))

      .add(condCodeOp());


  MI.eraseFromParent();

  return MBB;

}


MachineBasicBlock *

ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,

                                       MachineBasicBlock *MBB) const {

  DebugLoc DL = MI.getDebugLoc();

  MachineFunction *MF = MBB->getParent();

  const TargetInstrInfo *TII = Subtarget->getInstrInfo();


  MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();

  MF->insert(++MBB->getIterator(), ContBB);

  ContBB->splice(ContBB->begin(), MBB,

                 std::next(MachineBasicBlock::iterator(MI)), MBB->end());

  ContBB->transferSuccessorsAndUpdatePHIs(MBB);

  MBB->addSuccessor(ContBB);


  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();

  BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));

  MF->push_back(TrapBB);

  MBB->addSuccessor(TrapBB);


  BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))

      .addReg(MI.getOperand(0).getReg())

      .addImm(0)

      .add(predOps(ARMCC::AL));

  BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))

      .addMBB(TrapBB)

      .addImm(ARMCC::EQ)

      .addReg(ARM::CPSR);


  MI.eraseFromParent();

  return ContBB;

}


// The CPSR operand of SelectItr might be missing a kill marker

// because there were multiple uses of CPSR, and ISel didn't know

// which to mark. Figure out whether SelectItr should have had a

// kill marker, and set it if it should. Returns the correct kill

// marker value.


static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,

                                   MachineBasicBlock* BB,

                                   const TargetRegisterInfo* TRI) {

  // Scan forward through BB for a use/def of CPSR.

  MachineBasicBlock::iterator miI(std::next(SelectItr));

  for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {

    const MachineInstr& mi = *miI;

    if (mi.readsRegister(ARM::CPSR, /*TRI=*/nullptr))

      return false;

    if (mi.definesRegister(ARM::CPSR, /*TRI=*/nullptr))

      break; // Should have kill-flag - update below.

  }


  // If we hit the end of the block, check whether CPSR is live into a

  // successor.

  if (miI == BB->end()) {

    for (MachineBasicBlock *Succ : BB->successors())

      if (Succ->isLiveIn(ARM::CPSR))

        return false;

  }


  // We found a def, or hit the end of the basic block and CPSR wasn't live

  // out. SelectMI should have a kill flag on CPSR.

  SelectItr->addRegisterKilled(ARM::CPSR, TRI);

  return true;

}


/// Adds logic in loop entry MBB to calculate loop iteration count and adds

/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop


static Register genTPEntry(MachineBasicBlock *TpEntry,

                           MachineBasicBlock *TpLoopBody,

                           MachineBasicBlock *TpExit, Register OpSizeReg,

                           const TargetInstrInfo *TII, DebugLoc Dl,

                           MachineRegisterInfo &MRI) {

  // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.

  Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

  BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)

      .addUse(OpSizeReg)

      .addImm(15)

      .add(predOps(ARMCC::AL))

      .addReg(0);


  Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

  BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)

      .addUse(AddDestReg, RegState::Kill)

      .addImm(4)

      .add(predOps(ARMCC::AL))

      .addReg(0);


  Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);

  BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)

      .addUse(LsrDestReg, RegState::Kill);


  BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))

      .addUse(TotalIterationsReg)

      .addMBB(TpExit);


  BuildMI(TpEntry, Dl, TII->get(ARM::t2B))

      .addMBB(TpLoopBody)

      .add(predOps(ARMCC::AL));


  return TotalIterationsReg;

}


/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and

/// t2DoLoopEnd. These are used by later passes to generate tail predicated

/// loops.


static void genTPLoopBody(MachineBasicBlock *TpLoopBody,

                          MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,

                          const TargetInstrInfo *TII, DebugLoc Dl,

                          MachineRegisterInfo &MRI, Register OpSrcReg,

                          Register OpDestReg, Register ElementCountReg,

                          Register TotalIterationsReg, bool IsMemcpy) {

  // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest

  // array, loop iteration counter, predication counter.


  Register SrcPhiReg, CurrSrcReg;

  if (IsMemcpy) {

    //  Current position in the src array

    SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

    CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

    BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)

        .addUse(OpSrcReg)

        .addMBB(TpEntry)

        .addUse(CurrSrcReg)

        .addMBB(TpLoopBody);

  }


  // Current position in the dest array

  Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

  Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

  BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)

      .addUse(OpDestReg)

      .addMBB(TpEntry)

      .addUse(CurrDestReg)

      .addMBB(TpLoopBody);


  // Current loop counter

  Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);

  Register RemainingLoopIterationsReg =

      MRI.createVirtualRegister(&ARM::GPRlrRegClass);

  BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)

      .addUse(TotalIterationsReg)

      .addMBB(TpEntry)

      .addUse(RemainingLoopIterationsReg)

      .addMBB(TpLoopBody);


  // Predication counter

  Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

  Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);

  BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)

      .addUse(ElementCountReg)

      .addMBB(TpEntry)

      .addUse(RemainingElementsReg)

      .addMBB(TpLoopBody);


  // Pass predication counter to VCTP

  Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);

  BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)

      .addUse(PredCounterPhiReg)

      .addImm(ARMVCC::None)

      .addReg(0)

      .addReg(0);


  BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)

      .addUse(PredCounterPhiReg)

      .addImm(16)

      .add(predOps(ARMCC::AL))

      .addReg(0);


  // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR

  Register SrcValueReg;

  if (IsMemcpy) {

    SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);

    BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))

        .addDef(CurrSrcReg)

        .addDef(SrcValueReg)

        .addReg(SrcPhiReg)

        .addImm(16)

        .addImm(ARMVCC::Then)

        .addUse(VccrReg)

        .addReg(0);

  } else

    SrcValueReg = OpSrcReg;


  BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))

      .addDef(CurrDestReg)

      .addUse(SrcValueReg)

      .addReg(DestPhiReg)

      .addImm(16)

      .addImm(ARMVCC::Then)

      .addUse(VccrReg)

      .addReg(0);


  // Add the pseudoInstrs for decrementing the loop counter and marking the

  // end:t2DoLoopDec and t2DoLoopEnd

  BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)

      .addUse(LoopCounterPhiReg)

      .addImm(1);


  BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))

      .addUse(RemainingLoopIterationsReg)

      .addMBB(TpLoopBody);


  BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))

      .addMBB(TpExit)

      .add(predOps(ARMCC::AL));

}


bool ARMTargetLowering::supportKCFIBundles() const {

  // KCFI is supported in all ARM/Thumb modes

  return true;

}


MachineInstr *


ARMTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,

                                 MachineBasicBlock::instr_iterator &MBBI,

                                 const TargetInstrInfo *TII) const {

  assert(MBBI->isCall() && MBBI->getCFIType() &&

         "Invalid call instruction for a KCFI check");


  MachineOperand *TargetOp = nullptr;

  switch (MBBI->getOpcode()) {

  // ARM mode opcodes

  case ARM::BLX:

  case ARM::BLX_pred:

  case ARM::BLX_noip:

  case ARM::BLX_pred_noip:

  case ARM::BX_CALL:

    TargetOp = &MBBI->getOperand(0);

    break;

  case ARM::TCRETURNri:

  case ARM::TCRETURNrinotr12:

  case ARM::TAILJMPr:

  case ARM::TAILJMPr4:

    TargetOp = &MBBI->getOperand(0);

    break;

  // Thumb mode opcodes (Thumb1 and Thumb2)

  // Note: Most Thumb call instructions have predicate operands before the

  // target register Format: tBLXr pred, predreg, target_register, ...

  case ARM::tBLXr:      // Thumb1/Thumb2: BLX register (requires V5T)

  case ARM::tBLXr_noip: // Thumb1/Thumb2: BLX register, no IP clobber

  case ARM::tBX_CALL:   // Thumb1 only: BX call (push LR, BX)

    TargetOp = &MBBI->getOperand(2);

    break;

  // Tail call instructions don't have predicates, target is operand 0

  case ARM::tTAILJMPr: // Thumb1/Thumb2: Tail call via register

    TargetOp = &MBBI->getOperand(0);

    break;

  default:

    llvm_unreachable("Unexpected CFI call opcode");

  }


  assert(TargetOp && TargetOp->isReg() && "Invalid target operand");

  TargetOp->setIsRenamable(false);


  // Select the appropriate KCFI_CHECK variant based on the instruction set

  unsigned KCFICheckOpcode;

  if (Subtarget->isThumb()) {

    if (Subtarget->isThumb2()) {

      KCFICheckOpcode = ARM::KCFI_CHECK_Thumb2;

    } else {

      KCFICheckOpcode = ARM::KCFI_CHECK_Thumb1;

    }

  } else {

    KCFICheckOpcode = ARM::KCFI_CHECK_ARM;

  }


  return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(KCFICheckOpcode))

      .addReg(TargetOp->getReg())

      .addImm(MBBI->getCFIType())

      .getInstr();

}


MachineBasicBlock *


ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

                                               MachineBasicBlock *BB) const {

  const TargetInstrInfo *TII = Subtarget->getInstrInfo();

  DebugLoc dl = MI.getDebugLoc();

  bool isThumb2 = Subtarget->isThumb2();

  switch (MI.getOpcode()) {

  default: {

    MI.print(errs());

    llvm_unreachable("Unexpected instr type to insert");

  }


  // Thumb1 post-indexed loads are really just single-register LDMs.

  case ARM::tLDR_postidx: {

    MachineOperand Def(MI.getOperand(1));

    BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))

        .add(Def)  // Rn_wb

        .add(MI.getOperand(2))  // Rn

        .add(MI.getOperand(3))  // PredImm

        .add(MI.getOperand(4))  // PredReg

        .add(MI.getOperand(0))  // Rt

        .cloneMemRefs(MI);

    MI.eraseFromParent();

    return BB;

  }


  case ARM::MVE_MEMCPYLOOPINST:

  case ARM::MVE_MEMSETLOOPINST: {


    // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo

    // into a Tail Predicated (TP) Loop. It adds the instructions to calculate

    // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and

    // adds the relevant instructions in the TP loop Body for generation of a

    // WLSTP loop.


    // Below is relevant portion of the CFG after the transformation.

    // The Machine Basic Blocks are shown along with branch conditions (in

    // brackets). Note that TP entry/exit MBBs depict the entry/exit of this

    // portion of the CFG and may not necessarily be the entry/exit of the

    // function.


    //             (Relevant) CFG after transformation:

    //               TP entry MBB

    //                   |

    //          |-----------------|

    //       (n <= 0)          (n > 0)

    //          |                 |

    //          |         TP loop Body MBB<--|

    //          |                |           |

    //           \               |___________|

    //            \             /

    //              TP exit MBB


    MachineFunction *MF = BB->getParent();

    MachineFunctionProperties &Properties = MF->getProperties();

    MachineRegisterInfo &MRI = MF->getRegInfo();


    Register OpDestReg = MI.getOperand(0).getReg();

    Register OpSrcReg = MI.getOperand(1).getReg();

    Register OpSizeReg = MI.getOperand(2).getReg();


    // Allocate the required MBBs and add to parent function.

    MachineBasicBlock *TpEntry = BB;

    MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();

    MachineBasicBlock *TpExit;


    MF->push_back(TpLoopBody);


    // If any instructions are present in the current block after

    // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and

    // move the instructions into the newly created exit block. If there are no

    // instructions add an explicit branch to the FallThrough block and then

    // split.

    //

    // The split is required for two reasons:

    // 1) A terminator(t2WhileLoopStart) will be placed at that site.

    // 2) Since a TPLoopBody will be added later, any phis in successive blocks

    //    need to be updated. splitAt() already handles this.

    TpExit = BB->splitAt(MI, false);

    if (TpExit == BB) {

      assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "

                                     "block containing memcpy/memset Pseudo");

      TpExit = BB->getFallThrough();

      BuildMI(BB, dl, TII->get(ARM::t2B))

          .addMBB(TpExit)

          .add(predOps(ARMCC::AL));

      TpExit = BB->splitAt(MI, false);

    }


    // Add logic for iteration count

    Register TotalIterationsReg =

        genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);


    // Add the vectorized (and predicated) loads/store instructions

    bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;

    genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,

                  OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);


    // Required to avoid conflict with the MachineVerifier during testing.

    Properties.resetNoPHIs();


    // Connect the blocks

    TpEntry->addSuccessor(TpLoopBody);

    TpLoopBody->addSuccessor(TpLoopBody);

    TpLoopBody->addSuccessor(TpExit);


    // Reorder for a more natural layout

    TpLoopBody->moveAfter(TpEntry);

    TpExit->moveAfter(TpLoopBody);


    // Finally, remove the memcpy Pseudo Instruction

    MI.eraseFromParent();


    // Return the exit block as it may contain other instructions requiring a

    // custom inserter

    return TpExit;

  }


  // The Thumb2 pre-indexed stores have the same MI operands, they just

  // define them differently in the .td files from the isel patterns, so

  // they need pseudos.

  case ARM::t2STR_preidx:

    MI.setDesc(TII->get(ARM::t2STR_PRE));

    return BB;

  case ARM::t2STRB_preidx:

    MI.setDesc(TII->get(ARM::t2STRB_PRE));

    return BB;

  case ARM::t2STRH_preidx:

    MI.setDesc(TII->get(ARM::t2STRH_PRE));

    return BB;


  case ARM::STRi_preidx:

  case ARM::STRBi_preidx: {

    unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM

                                                         : ARM::STRB_PRE_IMM;

    // Decode the offset.

    unsigned Offset = MI.getOperand(4).getImm();

    bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;

    Offset = ARM_AM::getAM2Offset(Offset);

    if (isSub)

      Offset = -Offset;


    MachineMemOperand *MMO = *MI.memoperands_begin();

    BuildMI(*BB, MI, dl, TII->get(NewOpc))

        .add(MI.getOperand(0)) // Rn_wb

        .add(MI.getOperand(1)) // Rt

        .add(MI.getOperand(2)) // Rn

        .addImm(Offset)        // offset (skip GPR==zero_reg)

        .add(MI.getOperand(5)) // pred

        .add(MI.getOperand(6))

        .addMemOperand(MMO);

    MI.eraseFromParent();

    return BB;

  }

  case ARM::STRr_preidx:

  case ARM::STRBr_preidx:

  case ARM::STRH_preidx: {

    unsigned NewOpc;

    switch (MI.getOpcode()) {

    default: llvm_unreachable("unexpected opcode!");

    case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;

    case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;

    case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;

    }

    MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));

    for (const MachineOperand &MO : MI.operands())

      MIB.add(MO);

    MI.eraseFromParent();

    return BB;

  }


  case ARM::tMOVCCr_pseudo: {

    // To "insert" a SELECT_CC instruction, we actually have to insert the

    // diamond control-flow pattern.  The incoming instruction knows the

    // destination vreg to set, the condition code register to branch on, the

    // true/false values to select between, and a branch opcode to use.

    const BasicBlock *LLVM_BB = BB->getBasicBlock();

    MachineFunction::iterator It = ++BB->getIterator();


    //  thisMBB:

    //  ...

    //   TrueVal = ...

    //   cmpTY ccX, r1, r2

    //   bCC copy1MBB

    //   fallthrough --> copy0MBB

    MachineBasicBlock *thisMBB  = BB;

    MachineFunction *F = BB->getParent();

    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);

    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);

    F->insert(It, copy0MBB);

    F->insert(It, sinkMBB);


    // Set the call frame size on entry to the new basic blocks.

    unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);

    copy0MBB->setCallFrameSize(CallFrameSize);

    sinkMBB->setCallFrameSize(CallFrameSize);


    // Check whether CPSR is live past the tMOVCCr_pseudo.

    const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();

    if (!MI.killsRegister(ARM::CPSR, /*TRI=*/nullptr) &&

        !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {

      copy0MBB->addLiveIn(ARM::CPSR);

      sinkMBB->addLiveIn(ARM::CPSR);

    }


    // Transfer the remainder of BB and its successor edges to sinkMBB.

    sinkMBB->splice(sinkMBB->begin(), BB,

                    std::next(MachineBasicBlock::iterator(MI)), BB->end());

    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);


    BB->addSuccessor(copy0MBB);

    BB->addSuccessor(sinkMBB);


    BuildMI(BB, dl, TII->get(ARM::tBcc))

        .addMBB(sinkMBB)

        .addImm(MI.getOperand(3).getImm())

        .addReg(MI.getOperand(4).getReg());


    //  copy0MBB:

    //   %FalseValue = ...

    //   # fallthrough to sinkMBB

    BB = copy0MBB;


    // Update machine-CFG edges

    BB->addSuccessor(sinkMBB);


    //  sinkMBB:

    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]

    //  ...

    BB = sinkMBB;

    BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())

        .addReg(MI.getOperand(1).getReg())

        .addMBB(copy0MBB)

        .addReg(MI.getOperand(2).getReg())

        .addMBB(thisMBB);


    MI.eraseFromParent(); // The pseudo instruction is gone now.

    return BB;

  }


  case ARM::BCCi64:

  case ARM::BCCZi64: {

    // If there is an unconditional branch to the other successor, remove it.

    BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());


    // Compare both parts that make up the double comparison separately for

    // equality.

    bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;


    Register LHS1 = MI.getOperand(1).getReg();

    Register LHS2 = MI.getOperand(2).getReg();

    if (RHSisZero) {

      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))

          .addReg(LHS1)

          .addImm(0)

          .add(predOps(ARMCC::AL));

      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))

        .addReg(LHS2).addImm(0)

        .addImm(ARMCC::EQ).addReg(ARM::CPSR);

    } else {

      Register RHS1 = MI.getOperand(3).getReg();

      Register RHS2 = MI.getOperand(4).getReg();

      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))

          .addReg(LHS1)

          .addReg(RHS1)

          .add(predOps(ARMCC::AL));

      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))

        .addReg(LHS2).addReg(RHS2)

        .addImm(ARMCC::EQ).addReg(ARM::CPSR);

    }


    MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();

    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);

    if (MI.getOperand(0).getImm() == ARMCC::NE)

      std::swap(destMBB, exitMBB);


    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))

      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);

    if (isThumb2)

      BuildMI(BB, dl, TII->get(ARM::t2B))

          .addMBB(exitMBB)

          .add(predOps(ARMCC::AL));

    else

      BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);


    MI.eraseFromParent(); // The pseudo instruction is gone now.

    return BB;

  }


  case ARM::Int_eh_sjlj_setjmp:

  case ARM::Int_eh_sjlj_setjmp_nofp:

  case ARM::tInt_eh_sjlj_setjmp:

  case ARM::t2Int_eh_sjlj_setjmp:

  case ARM::t2Int_eh_sjlj_setjmp_nofp:

    return BB;


  case ARM::Int_eh_sjlj_setup_dispatch:

    EmitSjLjDispatchBlock(MI, BB);

    return BB;

  case ARM::COPY_STRUCT_BYVAL_I32:

    ++NumLoopByVals;

    return EmitStructByval(MI, BB);

  case ARM::WIN__CHKSTK:

    return EmitLowered__chkstk(MI, BB);

  case ARM::WIN__DBZCHK:

    return EmitLowered__dbzchk(MI, BB);

  }

}


/// Attaches vregs to MEMCPY that it will use as scratch registers

/// when it is expanded into LDM/STM. This is done as a post-isel lowering

/// instead of as a custom inserter because we need the use list from the SDNode.


static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,

                                    MachineInstr &MI, const SDNode *Node) {

  bool isThumb1 = Subtarget->isThumb1Only();


  MachineFunction *MF = MI.getParent()->getParent();

  MachineRegisterInfo &MRI = MF->getRegInfo();

  MachineInstrBuilder MIB(*MF, MI);


  // If the new dst/src is unused mark it as dead.

  if (!Node->hasAnyUseOfValue(0)) {

    MI.getOperand(0).setIsDead(true);

  }

  if (!Node->hasAnyUseOfValue(1)) {

    MI.getOperand(1).setIsDead(true);

  }


  // The MEMCPY both defines and kills the scratch registers.

  for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {

    Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass

                                                         : &ARM::GPRRegClass);

    MIB.addReg(TmpReg, RegState::Define|RegState::Dead);

  }

}


void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,

                                                      SDNode *Node) const {

  if (MI.getOpcode() == ARM::MEMCPY) {

    attachMEMCPYScratchRegs(Subtarget, MI, Node);

    return;

  }


  const MCInstrDesc *MCID = &MI.getDesc();

  // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,

  // RSC. Coming out of isel, they have an implicit CPSR def, but the optional

  // operand is still set to noreg. If needed, set the optional operand's

  // register to CPSR, and remove the redundant implicit def.

  //

  // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).


  // Rename pseudo opcodes.

  unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());

  unsigned ccOutIdx;

  if (NewOpc) {

    const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();

    MCID = &TII->get(NewOpc);


    assert(MCID->getNumOperands() ==

           MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()

        && "converted opcode should be the same except for cc_out"

           " (and, on Thumb1, pred)");


    MI.setDesc(*MCID);


    // Add the optional cc_out operand

    MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));


    // On Thumb1, move all input operands to the end, then add the predicate

    if (Subtarget->isThumb1Only()) {

      for (unsigned c = MCID->getNumOperands() - 4; c--;) {

        MI.addOperand(MI.getOperand(1));

        MI.removeOperand(1);

      }


      // Restore the ties

      for (unsigned i = MI.getNumOperands(); i--;) {

        const MachineOperand& op = MI.getOperand(i);

        if (op.isReg() && op.isUse()) {

          int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);

          if (DefIdx != -1)

            MI.tieOperands(DefIdx, i);

        }

      }


      MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));

      MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));

      ccOutIdx = 1;

    } else

      ccOutIdx = MCID->getNumOperands() - 1;

  } else

    ccOutIdx = MCID->getNumOperands() - 1;


  // Any ARM instruction that sets the 's' bit should specify an optional

  // "cc_out" operand in the last operand position.

  if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {

    assert(!NewOpc && "Optional cc_out operand required");

    return;

  }

  // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it

  // since we already have an optional CPSR def.

  bool definesCPSR = false;

  bool deadCPSR = false;

  for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;

       ++i) {

    const MachineOperand &MO = MI.getOperand(i);

    if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {

      definesCPSR = true;

      if (MO.isDead())

        deadCPSR = true;

      MI.removeOperand(i);

      break;

    }

  }

  if (!definesCPSR) {

    assert(!NewOpc && "Optional cc_out operand required");

    return;

  }

  assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");

  if (deadCPSR) {

    assert(!MI.getOperand(ccOutIdx).getReg() &&

           "expect uninitialized optional cc_out operand");

    // Thumb1 instructions must have the S bit even if the CPSR is dead.

    if (!Subtarget->isThumb1Only())

      return;

  }


  // If this instruction was defined with an optional CPSR def and its dag node

  // had a live implicit CPSR def, then activate the optional CPSR def.

  MachineOperand &MO = MI.getOperand(ccOutIdx);

  MO.setReg(ARM::CPSR);

  MO.setIsDef(true);

}


//===----------------------------------------------------------------------===//

//                           ARM Optimization Hooks

//===----------------------------------------------------------------------===//


// Helper function that checks if N is a null or all ones constant.


static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {

  return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);

}


// Return true if N is conditionally 0 or all ones.

// Detects these expressions where cc is an i1 value:

//

//   (select cc 0, y)   [AllOnes=0]

//   (select cc y, 0)   [AllOnes=0]

//   (zext cc)          [AllOnes=0]

//   (sext cc)          [AllOnes=0/1]

//   (select cc -1, y)  [AllOnes=1]

//   (select cc y, -1)  [AllOnes=1]

//

// Invert is set when N is the null/all ones constant when CC is false.

// OtherOp is set to the alternative value of N.


static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,

                                       SDValue &CC, bool &Invert,

                                       SDValue &OtherOp,

                                       SelectionDAG &DAG) {

  switch (N->getOpcode()) {

  default: return false;

  case ISD::SELECT: {

    CC = N->getOperand(0);

    SDValue N1 = N->getOperand(1);

    SDValue N2 = N->getOperand(2);

    if (isZeroOrAllOnes(N1, AllOnes)) {

      Invert = false;

      OtherOp = N2;

      return true;

    }

    if (isZeroOrAllOnes(N2, AllOnes)) {

      Invert = true;

      OtherOp = N1;

      return true;

    }

    return false;

  }

  case ISD::ZERO_EXTEND:

    // (zext cc) can never be the all ones value.

    if (AllOnes)

      return false;

    [[fallthrough]];

  case ISD::SIGN_EXTEND: {

    SDLoc dl(N);

    EVT VT = N->getValueType(0);

    CC = N->getOperand(0);

    if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)

      return false;

    Invert = !AllOnes;

    if (AllOnes)

      // When looking for an AllOnes constant, N is an sext, and the 'other'

      // value is 0.

      OtherOp = DAG.getConstant(0, dl, VT);

    else if (N->getOpcode() == ISD::ZERO_EXTEND)

      // When looking for a 0 constant, N can be zext or sext.

      OtherOp = DAG.getConstant(1, dl, VT);

    else

      OtherOp = DAG.getAllOnesConstant(dl, VT);

    return true;

  }

  }

}


// Combine a constant select operand into its use:

//

//   (add (select cc, 0, c), x)  -> (select cc, x, (add, x, c))

//   (sub x, (select cc, 0, c))  -> (select cc, x, (sub, x, c))

//   (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))  [AllOnes=1]

//   (or  (select cc, 0, c), x)  -> (select cc, x, (or, x, c))

//   (xor (select cc, 0, c), x)  -> (select cc, x, (xor, x, c))

//

// The transform is rejected if the select doesn't have a constant operand that

// is null, or all ones when AllOnes is set.

//

// Also recognize sext/zext from i1:

//

//   (add (zext cc), x) -> (select cc (add x, 1), x)

//   (add (sext cc), x) -> (select cc (add x, -1), x)

//

// These transformations eventually create predicated instructions.

//

// @param N       The node to transform.

// @param Slct    The N operand that is a select.

// @param OtherOp The other N operand (x above).

// @param DCI     Context.

// @param AllOnes Require the select constant to be all ones instead of null.

// @returns The new node, or SDValue() on failure.

static


SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,

                            TargetLowering::DAGCombinerInfo &DCI,

                            bool AllOnes = false) {

  SelectionDAG &DAG = DCI.DAG;

  EVT VT = N->getValueType(0);

  SDValue NonConstantVal;

  SDValue CCOp;

  bool SwapSelectOps;

  if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,

                                  NonConstantVal, DAG))

    return SDValue();


  // Slct is now know to be the desired identity constant when CC is true.

  SDValue TrueVal = OtherOp;

  SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,

                                 OtherOp, NonConstantVal);

  // Unless SwapSelectOps says CC should be false.

  if (SwapSelectOps)

    std::swap(TrueVal, FalseVal);


  return DAG.getNode(ISD::SELECT, SDLoc(N), VT,

                     CCOp, TrueVal, FalseVal);

}


// Attempt combineSelectAndUse on each operand of a commutative operator N.

static


SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,

                                       TargetLowering::DAGCombinerInfo &DCI) {

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  if (N0.getNode()->hasOneUse())

    if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))

      return Result;

  if (N1.getNode()->hasOneUse())

    if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))

      return Result;

  return SDValue();

}


static bool IsVUZPShuffleNode(SDNode *N) {

  // VUZP shuffle node.

  if (N->getOpcode() == ARMISD::VUZP)

    return true;


  // "VUZP" on i32 is an alias for VTRN.

  if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)

    return true;


  return false;

}


static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const ARMSubtarget *Subtarget) {

  // Look for ADD(VUZP.0, VUZP.1).

  if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||

      N0 == N1)

   return SDValue();


  // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.

  if (!N->getValueType(0).is64BitVector())

    return SDValue();


  // Generate vpadd.

  SelectionDAG &DAG = DCI.DAG;

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  SDLoc dl(N);

  SDNode *Unzip = N0.getNode();

  EVT VT = N->getValueType(0);


  SmallVector<SDValue, 8> Ops;

  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,

                                TLI.getPointerTy(DAG.getDataLayout())));

  Ops.push_back(Unzip->getOperand(0));

  Ops.push_back(Unzip->getOperand(1));


  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);

}


static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,

                                      TargetLowering::DAGCombinerInfo &DCI,

                                      const ARMSubtarget *Subtarget) {

  // Check for two extended operands.

  if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&

        N1.getOpcode() == ISD::SIGN_EXTEND) &&

      !(N0.getOpcode() == ISD::ZERO_EXTEND &&

        N1.getOpcode() == ISD::ZERO_EXTEND))

    return SDValue();


  SDValue N00 = N0.getOperand(0);

  SDValue N10 = N1.getOperand(0);


  // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))

  if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||

      N00 == N10)

    return SDValue();


  // We only recognize Q register paddl here; this can't be reached until

  // after type legalization.

  if (!N00.getValueType().is64BitVector() ||

      !N0.getValueType().is128BitVector())

    return SDValue();


  // Generate vpaddl.

  SelectionDAG &DAG = DCI.DAG;

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  SDLoc dl(N);

  EVT VT = N->getValueType(0);


  SmallVector<SDValue, 8> Ops;

  // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.

  unsigned Opcode;

  if (N0.getOpcode() == ISD::SIGN_EXTEND)

    Opcode = Intrinsic::arm_neon_vpaddls;

  else

    Opcode = Intrinsic::arm_neon_vpaddlu;

  Ops.push_back(DAG.getConstant(Opcode, dl,

                                TLI.getPointerTy(DAG.getDataLayout())));

  EVT ElemTy = N00.getValueType().getVectorElementType();

  unsigned NumElts = VT.getVectorNumElements();

  EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);

  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,

                               N00.getOperand(0), N00.getOperand(1));

  Ops.push_back(Concat);


  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);

}


// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in

// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is

// much easier to match.

static SDValue


AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,

                               TargetLowering::DAGCombinerInfo &DCI,

                               const ARMSubtarget *Subtarget) {

  // Only perform optimization if after legalize, and if NEON is available. We

  // also expected both operands to be BUILD_VECTORs.

  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()

      || N0.getOpcode() != ISD::BUILD_VECTOR

      || N1.getOpcode() != ISD::BUILD_VECTOR)

    return SDValue();


  // Check output type since VPADDL operand elements can only be 8, 16, or 32.

  EVT VT = N->getValueType(0);

  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)

    return SDValue();


  // Check that the vector operands are of the right form.

  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR

  // operands, where N is the size of the formed vector.

  // Each EXTRACT_VECTOR should have the same input vector and odd or even

  // index such that we have a pair wise add pattern.


  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.

  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)

    return SDValue();

  SDValue Vec = N0->getOperand(0)->getOperand(0);

  SDNode *V = Vec.getNode();

  unsigned nextIndex = 0;


  // For each operands to the ADD which are BUILD_VECTORs,

  // check to see if each of their operands are an EXTRACT_VECTOR with

  // the same vector and appropriate index.

  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {

    if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT

        && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {


      SDValue ExtVec0 = N0->getOperand(i);

      SDValue ExtVec1 = N1->getOperand(i);


      // First operand is the vector, verify its the same.

      if (V != ExtVec0->getOperand(0).getNode() ||

          V != ExtVec1->getOperand(0).getNode())

        return SDValue();


      // Second is the constant, verify its correct.

      ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));

      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));


      // For the constant, we want to see all the even or all the odd.

      if (!C0 || !C1 || C0->getZExtValue() != nextIndex

          || C1->getZExtValue() != nextIndex+1)

        return SDValue();


      // Increment index.

      nextIndex+=2;

    } else

      return SDValue();

  }


  // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure

  // we're using the entire input vector, otherwise there's a size/legality

  // mismatch somewhere.

  if (nextIndex != Vec.getValueType().getVectorNumElements() ||

      Vec.getValueType().getVectorElementType() == VT.getVectorElementType())

    return SDValue();


  // Create VPADDL node.

  SelectionDAG &DAG = DCI.DAG;

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();


  SDLoc dl(N);


  // Build operand list.

  SmallVector<SDValue, 8> Ops;

  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,

                                TLI.getPointerTy(DAG.getDataLayout())));


  // Input is the vector.

  Ops.push_back(Vec);


  // Get widened type and narrowed type.

  MVT widenType;

  unsigned numElem = VT.getVectorNumElements();


  EVT inputLaneType = Vec.getValueType().getVectorElementType();

  switch (inputLaneType.getSimpleVT().SimpleTy) {

    case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;

    case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;

    case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;

    default:

      llvm_unreachable("Invalid vector element type for padd optimization.");

  }


  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);

  unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;

  return DAG.getNode(ExtOp, dl, VT, tmp);

}


static SDValue findMUL_LOHI(SDValue V) {

  if (V->getOpcode() == ISD::UMUL_LOHI ||

      V->getOpcode() == ISD::SMUL_LOHI)

    return V;

  return SDValue();

}


static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,

                                        TargetLowering::DAGCombinerInfo &DCI,

                                        const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasBaseDSP())

    return SDValue();


  // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and

  // accumulates the product into a 64-bit value. The 16-bit values will

  // be sign extended somehow or SRA'd into 32-bit values

  // (addc (adde (mul 16bit, 16bit), lo), hi)

  SDValue Mul = AddcNode->getOperand(0);

  SDValue Lo = AddcNode->getOperand(1);

  if (Mul.getOpcode() != ISD::MUL) {

    Lo = AddcNode->getOperand(0);

    Mul = AddcNode->getOperand(1);

    if (Mul.getOpcode() != ISD::MUL)

      return SDValue();

  }


  SDValue SRA = AddeNode->getOperand(0);

  SDValue Hi = AddeNode->getOperand(1);

  if (SRA.getOpcode() != ISD::SRA) {

    SRA = AddeNode->getOperand(1);

    Hi = AddeNode->getOperand(0);

    if (SRA.getOpcode() != ISD::SRA)

      return SDValue();

  }

  if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {

    if (Const->getZExtValue() != 31)

      return SDValue();

  } else

    return SDValue();


  if (SRA.getOperand(0) != Mul)

    return SDValue();


  SelectionDAG &DAG = DCI.DAG;

  SDLoc dl(AddcNode);

  unsigned Opcode = 0;

  SDValue Op0;

  SDValue Op1;


  if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {

    Opcode = ARMISD::SMLALBB;

    Op0 = Mul.getOperand(0);

    Op1 = Mul.getOperand(1);

  } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {

    Opcode = ARMISD::SMLALBT;

    Op0 = Mul.getOperand(0);

    Op1 = Mul.getOperand(1).getOperand(0);

  } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {

    Opcode = ARMISD::SMLALTB;

    Op0 = Mul.getOperand(0).getOperand(0);

    Op1 = Mul.getOperand(1);

  } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {

    Opcode = ARMISD::SMLALTT;

    Op0 = Mul->getOperand(0).getOperand(0);

    Op1 = Mul->getOperand(1).getOperand(0);

  }


  if (!Op0 || !Op1)

    return SDValue();


  SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),

                              Op0, Op1, Lo, Hi);

  // Replace the ADDs' nodes uses by the MLA node's values.

  SDValue HiMLALResult(SMLAL.getNode(), 1);

  SDValue LoMLALResult(SMLAL.getNode(), 0);


  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);

  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);


  // Return original node to notify the driver to stop replacing.

  SDValue resNode(AddcNode, 0);

  return resNode;

}


static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode,

                                     TargetLowering::DAGCombinerInfo &DCI,

                                     const ARMSubtarget *Subtarget) {

  // Look for multiply add opportunities.

  // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where

  // each add nodes consumes a value from ISD::UMUL_LOHI and there is

  // a glue link from the first add to the second add.

  // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by

  // a S/UMLAL instruction.

  //                  UMUL_LOHI

  //                 / :lo    \ :hi

  //                V          \          [no multiline comment]

  //    loAdd ->  ADDC         |

  //                 \ :carry /

  //                  V      V

  //                    ADDE   <- hiAdd

  //

  // In the special case where only the higher part of a signed result is used

  // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts

  // a constant with the exact value of 0x80000000, we recognize we are dealing

  // with a "rounded multiply and add" (or subtract) and transform it into

  // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.


  assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||

          AddeSubeNode->getOpcode() == ARMISD::SUBE) &&

         "Expect an ADDE or SUBE");


  assert(AddeSubeNode->getNumOperands() == 3 &&

         AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&

         "ADDE node has the wrong inputs");


  // Check that we are chained to the right ADDC or SUBC node.

  SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();

  if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&

       AddcSubcNode->getOpcode() != ARMISD::ADDC) ||

      (AddeSubeNode->getOpcode() == ARMISD::SUBE &&

       AddcSubcNode->getOpcode() != ARMISD::SUBC))

    return SDValue();


  SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);

  SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);


  // Check if the two operands are from the same mul_lohi node.

  if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())

    return SDValue();


  assert(AddcSubcNode->getNumValues() == 2 &&

         AddcSubcNode->getValueType(0) == MVT::i32 &&

         "Expect ADDC with two result values. First: i32");


  // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it

  // maybe a SMLAL which multiplies two 16-bit values.

  if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&

      AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&

      AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&

      AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&

      AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)

    return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);


  // Check for the triangle shape.

  SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);

  SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);


  // Make sure that the ADDE/SUBE operands are not coming from the same node.

  if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())

    return SDValue();


  // Find the MUL_LOHI node walking up ADDE/SUBE's operands.

  bool IsLeftOperandMUL = false;

  SDValue MULOp = findMUL_LOHI(AddeSubeOp0);

  if (MULOp == SDValue())

    MULOp = findMUL_LOHI(AddeSubeOp1);

  else

    IsLeftOperandMUL = true;

  if (MULOp == SDValue())

    return SDValue();


  // Figure out the right opcode.

  unsigned Opc = MULOp->getOpcode();

  unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;


  // Figure out the high and low input values to the MLAL node.

  SDValue *HiAddSub = nullptr;

  SDValue *LoMul = nullptr;

  SDValue *LowAddSub = nullptr;


  // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.

  if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))

    return SDValue();


  if (IsLeftOperandMUL)

    HiAddSub = &AddeSubeOp1;

  else

    HiAddSub = &AddeSubeOp0;


  // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node

  // whose low result is fed to the ADDC/SUBC we are checking.


  if (AddcSubcOp0 == MULOp.getValue(0)) {

    LoMul = &AddcSubcOp0;

    LowAddSub = &AddcSubcOp1;

  }

  if (AddcSubcOp1 == MULOp.getValue(0)) {

    LoMul = &AddcSubcOp1;

    LowAddSub = &AddcSubcOp0;

  }


  if (!LoMul)

    return SDValue();


  // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC

  // the replacement below will create a cycle.

  if (AddcSubcNode == HiAddSub->getNode() ||

      AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))

    return SDValue();


  // Create the merged node.

  SelectionDAG &DAG = DCI.DAG;


  // Start building operand list.

  SmallVector<SDValue, 8> Ops;

  Ops.push_back(LoMul->getOperand(0));

  Ops.push_back(LoMul->getOperand(1));


  // Check whether we can use SMMLAR, SMMLSR or SMMULR instead.  For this to be

  // the case, we must be doing signed multiplication and only use the higher

  // part of the result of the MLAL, furthermore the LowAddSub must be a constant

  // addition or subtraction with the value of 0x800000.

  if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&

      FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&

      LowAddSub->getNode()->getOpcode() == ISD::Constant &&

      static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==

          0x80000000) {

    Ops.push_back(*HiAddSub);

    if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {

      FinalOpc = ARMISD::SMMLSR;

    } else {

      FinalOpc = ARMISD::SMMLAR;

    }

    SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);

    DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);


    return SDValue(AddeSubeNode, 0);

  } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)

    // SMMLS is generated during instruction selection and the rest of this

    // function can not handle the case where AddcSubcNode is a SUBC.

    return SDValue();


  // Finish building the operand list for {U/S}MLAL

  Ops.push_back(*LowAddSub);

  Ops.push_back(*HiAddSub);


  SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),

                                 DAG.getVTList(MVT::i32, MVT::i32), Ops);


  // Replace the ADDs' nodes uses by the MLA node's values.

  SDValue HiMLALResult(MLALNode.getNode(), 1);

  DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);


  SDValue LoMLALResult(MLALNode.getNode(), 0);

  DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);


  // Return original node to notify the driver to stop replacing.

  return SDValue(AddeSubeNode, 0);

}


static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,

                                      TargetLowering::DAGCombinerInfo &DCI,

                                      const ARMSubtarget *Subtarget) {

  // UMAAL is similar to UMLAL except that it adds two unsigned values.

  // While trying to combine for the other MLAL nodes, first search for the

  // chance to use UMAAL. Check if Addc uses a node which has already

  // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde

  // as the addend, and it's handled in PerformUMLALCombine.


  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())

    return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);


  // Check that we have a glued ADDC node.

  SDNode* AddcNode = AddeNode->getOperand(2).getNode();

  if (AddcNode->getOpcode() != ARMISD::ADDC)

    return SDValue();


  // Find the converted UMAAL or quit if it doesn't exist.

  SDNode *UmlalNode = nullptr;

  SDValue AddHi;

  if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {

    UmlalNode = AddcNode->getOperand(0).getNode();

    AddHi = AddcNode->getOperand(1);

  } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {

    UmlalNode = AddcNode->getOperand(1).getNode();

    AddHi = AddcNode->getOperand(0);

  } else {

    return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);

  }


  // The ADDC should be glued to an ADDE node, which uses the same UMLAL as

  // the ADDC as well as Zero.

  if (!isNullConstant(UmlalNode->getOperand(3)))

    return SDValue();


  if ((isNullConstant(AddeNode->getOperand(0)) &&

       AddeNode->getOperand(1).getNode() == UmlalNode) ||

      (AddeNode->getOperand(0).getNode() == UmlalNode &&

       isNullConstant(AddeNode->getOperand(1)))) {

    SelectionDAG &DAG = DCI.DAG;

    SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),

                      UmlalNode->getOperand(2), AddHi };

    SDValue UMAAL =  DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),

                                 DAG.getVTList(MVT::i32, MVT::i32), Ops);


    // Replace the ADDs' nodes uses by the UMAAL node's values.

    DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));

    DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));


    // Return original node to notify the driver to stop replacing.

    return SDValue(AddeNode, 0);

  }

  return SDValue();

}


static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,

                                   const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())

    return SDValue();


  // Check that we have a pair of ADDC and ADDE as operands.

  // Both addends of the ADDE must be zero.

  SDNode* AddcNode = N->getOperand(2).getNode();

  SDNode* AddeNode = N->getOperand(3).getNode();

  if ((AddcNode->getOpcode() == ARMISD::ADDC) &&

      (AddeNode->getOpcode() == ARMISD::ADDE) &&

      isNullConstant(AddeNode->getOperand(0)) &&

      isNullConstant(AddeNode->getOperand(1)) &&

      (AddeNode->getOperand(2).getNode() == AddcNode))

    return DAG.getNode(ARMISD::UMAAL, SDLoc(N),

                       DAG.getVTList(MVT::i32, MVT::i32),

                       {N->getOperand(0), N->getOperand(1),

                        AddcNode->getOperand(0), AddcNode->getOperand(1)});

  else

    return SDValue();

}


static SDValue PerformAddcSubcCombine(SDNode *N,

                                      TargetLowering::DAGCombinerInfo &DCI,

                                      const ARMSubtarget *Subtarget) {

  SelectionDAG &DAG(DCI.DAG);


  if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {

    // (SUBC (ADDE 0, 0, C), 1) -> C

    SDValue LHS = N->getOperand(0);

    SDValue RHS = N->getOperand(1);

    if (LHS->getOpcode() == ARMISD::ADDE &&

        isNullConstant(LHS->getOperand(0)) &&

        isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {

      return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));

    }

  }


  if (Subtarget->isThumb1Only()) {

    SDValue RHS = N->getOperand(1);

    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {

      int32_t imm = C->getSExtValue();

      if (imm < 0 && imm > std::numeric_limits<int>::min()) {

        SDLoc DL(N);

        RHS = DAG.getConstant(-imm, DL, MVT::i32);

        unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC

                                                           : ARMISD::ADDC;

        return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);

      }

    }

  }


  return SDValue();

}


static SDValue PerformAddeSubeCombine(SDNode *N,

                                      TargetLowering::DAGCombinerInfo &DCI,

                                      const ARMSubtarget *Subtarget) {

  if (Subtarget->isThumb1Only()) {

    SelectionDAG &DAG = DCI.DAG;

    SDValue RHS = N->getOperand(1);

    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {

      int64_t imm = C->getSExtValue();

      if (imm < 0) {

        SDLoc DL(N);


        // The with-carry-in form matches bitwise not instead of the negation.

        // Effectively, the inverse interpretation of the carry flag already

        // accounts for part of the negation.

        RHS = DAG.getConstant(~imm, DL, MVT::i32);


        unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE

                                                           : ARMISD::ADDE;

        return DAG.getNode(Opcode, DL, N->getVTList(),

                           N->getOperand(0), RHS, N->getOperand(2));

      }

    }

  } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {

    return AddCombineTo64bitMLAL(N, DCI, Subtarget);

  }

  return SDValue();

}


static SDValue PerformSELECTCombine(SDNode *N,

                                    TargetLowering::DAGCombinerInfo &DCI,

                                    const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasMVEIntegerOps())

    return SDValue();


  SDLoc dl(N);

  SDValue SetCC;

  SDValue LHS;

  SDValue RHS;

  ISD::CondCode CC;

  SDValue TrueVal;

  SDValue FalseVal;


  if (N->getOpcode() == ISD::SELECT &&

      N->getOperand(0)->getOpcode() == ISD::SETCC) {

    SetCC = N->getOperand(0);

    LHS = SetCC->getOperand(0);

    RHS = SetCC->getOperand(1);

    CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();

    TrueVal = N->getOperand(1);

    FalseVal = N->getOperand(2);

  } else if (N->getOpcode() == ISD::SELECT_CC) {

    LHS = N->getOperand(0);

    RHS = N->getOperand(1);

    CC = cast<CondCodeSDNode>(N->getOperand(4))->get();

    TrueVal = N->getOperand(2);

    FalseVal = N->getOperand(3);

  } else {

    return SDValue();

  }


  unsigned int Opcode = 0;

  if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||

       FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&

      (CC == ISD::SETULT || CC == ISD::SETUGT)) {

    Opcode = ARMISD::VMINVu;

    if (CC == ISD::SETUGT)

      std::swap(TrueVal, FalseVal);

  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||

              FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&

             (CC == ISD::SETLT || CC == ISD::SETGT)) {

    Opcode = ARMISD::VMINVs;

    if (CC == ISD::SETGT)

      std::swap(TrueVal, FalseVal);

  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||

              FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&

             (CC == ISD::SETUGT || CC == ISD::SETULT)) {

    Opcode = ARMISD::VMAXVu;

    if (CC == ISD::SETULT)

      std::swap(TrueVal, FalseVal);

  } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||

              FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&

             (CC == ISD::SETGT || CC == ISD::SETLT)) {

    Opcode = ARMISD::VMAXVs;

    if (CC == ISD::SETLT)

      std::swap(TrueVal, FalseVal);

  } else

    return SDValue();


  // Normalise to the right hand side being the vector reduction

  switch (TrueVal->getOpcode()) {

  case ISD::VECREDUCE_UMIN:

  case ISD::VECREDUCE_SMIN:

  case ISD::VECREDUCE_UMAX:

  case ISD::VECREDUCE_SMAX:

    std::swap(LHS, RHS);

    std::swap(TrueVal, FalseVal);

    break;

  }


  EVT VectorType = FalseVal->getOperand(0).getValueType();


  if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&

      VectorType != MVT::v4i32)

    return SDValue();


  EVT VectorScalarType = VectorType.getVectorElementType();


  // The values being selected must also be the ones being compared

  if (TrueVal != LHS || FalseVal != RHS)

    return SDValue();


  EVT LeftType = LHS->getValueType(0);

  EVT RightType = RHS->getValueType(0);


  // The types must match the reduced type too

  if (LeftType != VectorScalarType || RightType != VectorScalarType)

    return SDValue();


  // Legalise the scalar to an i32

  if (VectorScalarType != MVT::i32)

    LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);


  // Generate the reduction as an i32 for legalisation purposes

  auto Reduction =

      DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));


  // The result isn't actually an i32 so truncate it back to its original type

  if (VectorScalarType != MVT::i32)

    Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);


  return Reduction;

}


// A special combine for the vqdmulh family of instructions. This is one of the

// potential set of patterns that could patch this instruction. The base pattern

// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).

// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),

// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as

// the max is unnecessary.


static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {

  EVT VT = N->getValueType(0);

  SDValue Shft;

  ConstantSDNode *Clamp;


  if (!VT.isVector() || VT.getScalarSizeInBits() > 64)

    return SDValue();


  if (N->getOpcode() == ISD::SMIN) {

    Shft = N->getOperand(0);

    Clamp = isConstOrConstSplat(N->getOperand(1));

  } else if (N->getOpcode() == ISD::VSELECT) {

    // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.

    SDValue Cmp = N->getOperand(0);

    if (Cmp.getOpcode() != ISD::SETCC ||

        cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||

        Cmp.getOperand(0) != N->getOperand(1) ||

        Cmp.getOperand(1) != N->getOperand(2))

      return SDValue();

    Shft = N->getOperand(1);

    Clamp = isConstOrConstSplat(N->getOperand(2));

  } else

    return SDValue();


  if (!Clamp)

    return SDValue();


  MVT ScalarType;

  int ShftAmt = 0;

  switch (Clamp->getSExtValue()) {

  case (1 << 7) - 1:

    ScalarType = MVT::i8;

    ShftAmt = 7;

    break;

  case (1 << 15) - 1:

    ScalarType = MVT::i16;

    ShftAmt = 15;

    break;

  case (1ULL << 31) - 1:

    ScalarType = MVT::i32;

    ShftAmt = 31;

    break;

  default:

    return SDValue();

  }


  if (Shft.getOpcode() != ISD::SRA)

    return SDValue();

  ConstantSDNode *N1 = isConstOrConstSplat(Shft.getOperand(1));

  if (!N1 || N1->getSExtValue() != ShftAmt)

    return SDValue();


  SDValue Mul = Shft.getOperand(0);

  if (Mul.getOpcode() != ISD::MUL)

    return SDValue();


  SDValue Ext0 = Mul.getOperand(0);

  SDValue Ext1 = Mul.getOperand(1);

  if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||

      Ext1.getOpcode() != ISD::SIGN_EXTEND)

    return SDValue();

  EVT VecVT = Ext0.getOperand(0).getValueType();

  if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)

    return SDValue();

  if (Ext1.getOperand(0).getValueType() != VecVT ||

      VecVT.getScalarType() != ScalarType ||

      VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)

    return SDValue();


  SDLoc DL(Mul);

  unsigned LegalLanes = 128 / (ShftAmt + 1);

  EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);

  // For types smaller than legal vectors extend to be legal and only use needed

  // lanes.

  if (VecVT.getSizeInBits() < 128) {

    EVT ExtVecVT =

        MVT::getVectorVT(MVT::getIntegerVT(128 / VecVT.getVectorNumElements()),

                         VecVT.getVectorNumElements());

    SDValue Inp0 =

        DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));

    SDValue Inp1 =

        DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));

    Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);

    Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);

    SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);

    SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);

    Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);

    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);

  }


  // For larger types, split into legal sized chunks.

  assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");

  unsigned NumParts = VecVT.getSizeInBits() / 128;

  SmallVector<SDValue> Parts;

  for (unsigned I = 0; I < NumParts; ++I) {

    SDValue Inp0 =

        DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),

                    DAG.getVectorIdxConstant(I * LegalLanes, DL));

    SDValue Inp1 =

        DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),

                    DAG.getVectorIdxConstant(I * LegalLanes, DL));

    SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);

    Parts.push_back(VQDMULH);

  }

  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,

                     DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));

}


static SDValue PerformVSELECTCombine(SDNode *N,

                                     TargetLowering::DAGCombinerInfo &DCI,

                                     const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasMVEIntegerOps())

    return SDValue();


  if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))

    return V;


  // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).

  //

  // We need to re-implement this optimization here as the implementation in the

  // Target-Independent DAGCombiner does not handle the kind of constant we make

  // (it calls isConstOrConstSplat with AllowTruncation set to false - and for

  // good reason, allowing truncation there would break other targets).

  //

  // Currently, this is only done for MVE, as it's the only target that benefits

  // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).

  if (N->getOperand(0).getOpcode() != ISD::XOR)

    return SDValue();

  SDValue XOR = N->getOperand(0);


  // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.

  // It is important to check with truncation allowed as the BUILD_VECTORs we

  // generate in those situations will truncate their operands.

  ConstantSDNode *Const =

      isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,

                          /*AllowTruncation*/ true);

  if (!Const || !Const->isOne())

    return SDValue();


  // Rewrite into vselect(cond, rhs, lhs).

  SDValue Cond = XOR->getOperand(0);

  SDValue LHS = N->getOperand(1);

  SDValue RHS = N->getOperand(2);

  EVT Type = N->getValueType(0);

  return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);

}


// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n


static SDValue PerformVSetCCToVCTPCombine(SDNode *N,

                                          TargetLowering::DAGCombinerInfo &DCI,

                                          const ARMSubtarget *Subtarget) {

  SDValue Op0 = N->getOperand(0);

  SDValue Op1 = N->getOperand(1);

  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();

  EVT VT = N->getValueType(0);


  if (!Subtarget->hasMVEIntegerOps() ||

      !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))

    return SDValue();


  if (CC == ISD::SETUGE) {

    std::swap(Op0, Op1);

    CC = ISD::SETULT;

  }


  if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||

      Op0.getOpcode() != ISD::BUILD_VECTOR)

    return SDValue();


  // Check first operand is BuildVector of 0,1,2,...

  for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {

    if (!Op0.getOperand(I).isUndef() &&

        !(isa<ConstantSDNode>(Op0.getOperand(I)) &&

          Op0.getConstantOperandVal(I) == I))

      return SDValue();

  }


  // The second is a Splat of Op1S

  SDValue Op1S = DCI.DAG.getSplatValue(Op1);

  if (!Op1S)

    return SDValue();


  unsigned Opc;

  switch (VT.getVectorNumElements()) {

  case 2:

    Opc = Intrinsic::arm_mve_vctp64;

    break;

  case 4:

    Opc = Intrinsic::arm_mve_vctp32;

    break;

  case 8:

    Opc = Intrinsic::arm_mve_vctp16;

    break;

  case 16:

    Opc = Intrinsic::arm_mve_vctp8;

    break;

  default:

    return SDValue();

  }


  SDLoc DL(N);

  return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,

                         DCI.DAG.getConstant(Opc, DL, MVT::i32),

                         DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));

}


/// PerformADDECombine - Target-specific dag combine transform from

/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or

/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL


static SDValue PerformADDECombine(SDNode *N,

                                  TargetLowering::DAGCombinerInfo &DCI,

                                  const ARMSubtarget *Subtarget) {

  // Only ARM and Thumb2 support UMLAL/SMLAL.

  if (Subtarget->isThumb1Only())

    return PerformAddeSubeCombine(N, DCI, Subtarget);


  // Only perform the checks after legalize when the pattern is available.

  if (DCI.isBeforeLegalize()) return SDValue();


  return AddCombineTo64bitUMAAL(N, DCI, Subtarget);

}


/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with

/// operands N0 and N1.  This is a helper for PerformADDCombine that is

/// called with the default operands, and if that fails, with commuted

/// operands.


static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,

                                          TargetLowering::DAGCombinerInfo &DCI,

                                          const ARMSubtarget *Subtarget){

  // Attempt to create vpadd for this add.

  if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))

    return Result;


  // Attempt to create vpaddl for this add.

  if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))

    return Result;

  if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,

                                                      Subtarget))

    return Result;


  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))

  if (N0.getNode()->hasOneUse())

    if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))

      return Result;

  return SDValue();

}


static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) {

  EVT VT = N->getValueType(0);

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  SDLoc dl(N);


  auto IsVecReduce = [](SDValue Op) {

    switch (Op.getOpcode()) {

    case ISD::VECREDUCE_ADD:

    case ARMISD::VADDVs:

    case ARMISD::VADDVu:

    case ARMISD::VMLAVs:

    case ARMISD::VMLAVu:

      return true;

    }

    return false;

  };


  auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {

    // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->

    //   add(add(X, vecreduce(Y)), vecreduce(Z))

    // to make better use of vaddva style instructions.

    if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&

        IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&

        !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {

      SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));

      return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));

    }

    // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->

    //   add(add(add(A, C), reduce(B)), reduce(D))

    if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&

        N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {

      unsigned N0RedOp = 0;

      if (!IsVecReduce(N0.getOperand(N0RedOp))) {

        N0RedOp = 1;

        if (!IsVecReduce(N0.getOperand(N0RedOp)))

          return SDValue();

      }


      unsigned N1RedOp = 0;

      if (!IsVecReduce(N1.getOperand(N1RedOp)))

        N1RedOp = 1;

      if (!IsVecReduce(N1.getOperand(N1RedOp)))

        return SDValue();


      SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),

                                 N1.getOperand(1 - N1RedOp));

      SDValue Add1 =

          DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));

      return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));

    }

    return SDValue();

  };

  if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))

    return R;

  if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))

    return R;


  // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))

  // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))

  // by ascending load offsets. This can help cores prefetch if the order of

  // loads is more predictable.

  auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {

    // Check if two reductions are known to load data where one is before/after

    // another. Return negative if N0 loads data before N1, positive if N1 is

    // before N0 and 0 otherwise if nothing is known.

    auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {

      // Look through to the first operand of a MUL, for the VMLA case.

      // Currently only looks at the first operand, in the hope they are equal.

      if (N0.getOpcode() == ISD::MUL)

        N0 = N0.getOperand(0);

      if (N1.getOpcode() == ISD::MUL)

        N1 = N1.getOperand(0);


      // Return true if the two operands are loads to the same object and the

      // offset of the first is known to be less than the offset of the second.

      LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);

      LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);

      if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||

          !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||

          Load1->isIndexed())

        return 0;


      auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);

      auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);


      if (!BaseLocDecomp0.getBase() ||

          BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||

          !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())

        return 0;

      if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())

        return -1;

      if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())

        return 1;

      return 0;

    };


    SDValue X;

    if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {

      if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {

        int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),

                                         N0.getOperand(1).getOperand(0));

        if (IsBefore < 0) {

          X = N0.getOperand(0);

          N0 = N0.getOperand(1);

        } else if (IsBefore > 0) {

          X = N0.getOperand(1);

          N0 = N0.getOperand(0);

        } else

          return SDValue();

      } else if (IsVecReduce(N0.getOperand(0))) {

        X = N0.getOperand(1);

        N0 = N0.getOperand(0);

      } else if (IsVecReduce(N0.getOperand(1))) {

        X = N0.getOperand(0);

        N0 = N0.getOperand(1);

      } else

        return SDValue();

    } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&

               IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {

      // Note this is backward to how you would expect. We create

      // add(reduce(load + 16), reduce(load + 0)) so that the

      // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving

      // the X as VADDV(load + 0)

      return DAG.getNode(ISD::ADD, dl, VT, N1, N0);

    } else

      return SDValue();


    if (!IsVecReduce(N0) || !IsVecReduce(N1))

      return SDValue();


    if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)

      return SDValue();


    // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)

    SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);

    return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);

  };

  if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))

    return R;

  if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))

    return R;

  return SDValue();

}


static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG,

                                   const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasMVEIntegerOps())

    return SDValue();


  if (SDValue R = TryDistrubutionADDVecReduce(N, DAG))

    return R;


  EVT VT = N->getValueType(0);

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  SDLoc dl(N);


  if (VT != MVT::i64)

    return SDValue();


  // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this

  // will look like:

  //   t1: i32,i32 = ARMISD::VADDLVs x

  //   t2: i64 = build_pair t1, t1:1

  //   t3: i64 = add t2, y

  // Otherwise we try to push the add up above VADDLVAx, to potentially allow

  // the add to be simplified separately.

  // We also need to check for sext / zext and commutitive adds.

  auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,

                           SDValue NB) {

    if (NB->getOpcode() != ISD::BUILD_PAIR)

      return SDValue();

    SDValue VecRed = NB->getOperand(0);

    if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||

        VecRed.getResNo() != 0 ||

        NB->getOperand(1) != SDValue(VecRed.getNode(), 1))

      return SDValue();


    if (VecRed->getOpcode() == OpcodeA) {

      // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)

      SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,

                                VecRed.getOperand(0), VecRed.getOperand(1));

      NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);

    }


    SmallVector<SDValue, 4> Ops(2);

    std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);


    unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;

    for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)

      Ops.push_back(VecRed->getOperand(I));

    SDValue Red =

        DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);

    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,

                       SDValue(Red.getNode(), 1));

  };


  if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))

    return M;

  if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))

    return M;

  return SDValue();

}


bool


ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,

                                                 CombineLevel Level) const {

  assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||

          N->getOpcode() == ISD::SRL) &&

         "Expected shift op");


  SDValue ShiftLHS = N->getOperand(0);

  if (!ShiftLHS->hasOneUse())

    return false;


  if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&

      !ShiftLHS.getOperand(0)->hasOneUse())

    return false;


  if (Level == BeforeLegalizeTypes)

    return true;


  if (N->getOpcode() != ISD::SHL)

    return true;


  if (Subtarget->isThumb1Only()) {

    // Avoid making expensive immediates by commuting shifts. (This logic

    // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted

    // for free.)

    if (N->getOpcode() != ISD::SHL)

      return true;

    SDValue N1 = N->getOperand(0);

    if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&

        N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)

      return true;

    if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {

      if (Const->getAPIntValue().ult(256))

        return false;

      if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&

          Const->getAPIntValue().sgt(-256))

        return false;

    }

    return true;

  }


  // Turn off commute-with-shift transform after legalization, so it doesn't

  // conflict with PerformSHLSimplify.  (We could try to detect when

  // PerformSHLSimplify would trigger more precisely, but it isn't

  // really necessary.)

  return false;

}


bool ARMTargetLowering::isDesirableToCommuteXorWithShift(

    const SDNode *N) const {

  assert(N->getOpcode() == ISD::XOR &&

         (N->getOperand(0).getOpcode() == ISD::SHL ||

          N->getOperand(0).getOpcode() == ISD::SRL) &&

         "Expected XOR(SHIFT) pattern");


  // Only commute if the entire NOT mask is a hidden shifted mask.

  auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));

  auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));

  if (XorC && ShiftC) {

    unsigned MaskIdx, MaskLen;

    if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {

      unsigned ShiftAmt = ShiftC->getZExtValue();

      unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();

      if (N->getOperand(0).getOpcode() == ISD::SHL)

        return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);

      return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);

    }

  }


  return false;

}


bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(

    const SDNode *N) const {

  assert(((N->getOpcode() == ISD::SHL &&

           N->getOperand(0).getOpcode() == ISD::SRL) ||

          (N->getOpcode() == ISD::SRL &&

           N->getOperand(0).getOpcode() == ISD::SHL)) &&

         "Expected shift-shift mask");


  if (!Subtarget->isThumb1Only())

    return true;


  EVT VT = N->getValueType(0);

  if (VT.getScalarSizeInBits() > 32)

    return true;


  return false;

}


bool ARMTargetLowering::shouldFoldSelectWithIdentityConstant(

    unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,

    SDValue Y) const {

  return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT) &&

         SelectOpcode == ISD::VSELECT;

}


bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {

  if (!Subtarget->hasNEON()) {

    if (Subtarget->isThumb1Only())

      return VT.getScalarSizeInBits() <= 32;

    return true;

  }

  return VT.isScalarInteger();

}


bool ARMTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,

                                             EVT VT) const {

  if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())

    return false;


  switch (FPVT.getSimpleVT().SimpleTy) {

  case MVT::f16:

    return Subtarget->hasVFP2Base();

  case MVT::f32:

    return Subtarget->hasVFP2Base();

  case MVT::f64:

    return Subtarget->hasFP64();

  case MVT::v4f32:

  case MVT::v8f16:

    return Subtarget->hasMVEFloatOps();

  default:

    return false;

  }

}


static SDValue PerformSHLSimplify(SDNode *N,

                                TargetLowering::DAGCombinerInfo &DCI,

                                const ARMSubtarget *ST) {

  // Allow the generic combiner to identify potential bswaps.

  if (DCI.isBeforeLegalize())

    return SDValue();


  // DAG combiner will fold:

  // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)

  // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2

  // Other code patterns that can be also be modified have the following form:

  // b + ((a << 1) | 510)

  // b + ((a << 1) & 510)

  // b + ((a << 1) ^ 510)

  // b + ((a << 1) + 510)


  // Many instructions can  perform the shift for free, but it requires both

  // the operands to be registers. If c1 << c2 is too large, a mov immediate

  // instruction will needed. So, unfold back to the original pattern if:

  // - if c1 and c2 are small enough that they don't require mov imms.

  // - the user(s) of the node can perform an shl


  // No shifted operands for 16-bit instructions.

  if (ST->isThumb() && ST->isThumb1Only())

    return SDValue();


  // Check that all the users could perform the shl themselves.

  for (auto *U : N->users()) {

    switch(U->getOpcode()) {

    default:

      return SDValue();

    case ISD::SUB:

    case ISD::ADD:

    case ISD::AND:

    case ISD::OR:

    case ISD::XOR:

    case ISD::SETCC:

    case ARMISD::CMP:

      // Check that the user isn't already using a constant because there

      // aren't any instructions that support an immediate operand and a

      // shifted operand.

      if (isa<ConstantSDNode>(U->getOperand(0)) ||

          isa<ConstantSDNode>(U->getOperand(1)))

        return SDValue();


      // Check that it's not already using a shift.

      if (U->getOperand(0).getOpcode() == ISD::SHL ||

          U->getOperand(1).getOpcode() == ISD::SHL)

        return SDValue();

      break;

    }

  }


  if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&

      N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)

    return SDValue();


  if (N->getOperand(0).getOpcode() != ISD::SHL)

    return SDValue();


  SDValue SHL = N->getOperand(0);


  auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));

  auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));

  if (!C1ShlC2 || !C2)

    return SDValue();


  APInt C2Int = C2->getAPIntValue();

  APInt C1Int = C1ShlC2->getAPIntValue();

  unsigned C2Width = C2Int.getBitWidth();

  if (C2Int.uge(C2Width))

    return SDValue();

  uint64_t C2Value = C2Int.getZExtValue();


  // Check that performing a lshr will not lose any information.

  APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);

  if ((C1Int & Mask) != C1Int)

    return SDValue();


  // Shift the first constant.

  C1Int.lshrInPlace(C2Int);


  // The immediates are encoded as an 8-bit value that can be rotated.

  auto LargeImm = [](const APInt &Imm) {

    unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();

    return Imm.getBitWidth() - Zeros > 8;

  };


  if (LargeImm(C1Int) || LargeImm(C2Int))

    return SDValue();


  SelectionDAG &DAG = DCI.DAG;

  SDLoc dl(N);

  SDValue X = SHL.getOperand(0);

  SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,

                              DAG.getConstant(C1Int, dl, MVT::i32));

  // Shift left to compensate for the lshr of C1Int.

  SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));


  LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();

             SHL.dump(); N->dump());

  LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());

  return Res;

}


/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.

///


static SDValue PerformADDCombine(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const ARMSubtarget *Subtarget) {

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);


  // Only works one way, because it needs an immediate operand.

  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))

    return Result;


  if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))

    return Result;


  // First try with the default operand order.

  if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))

    return Result;


  // If that didn't work, try again with the operands commuted.

  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);

}


// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)

//   providing -X is as cheap as X (currently, just a constant).


static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) {

  if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))

    return SDValue();

  SDValue CSINC = N->getOperand(1);

  if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())

    return SDValue();


  ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));

  if (!X)

    return SDValue();


  return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,

                     DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),

                                 CSINC.getOperand(0)),

                     CSINC.getOperand(1), CSINC.getOperand(2),

                     CSINC.getOperand(3));

}


static bool isNegatedInteger(SDValue Op) {

  return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));

}


// Try to fold

//

// (neg (cmov X, Y)) -> (cmov (neg X), (neg Y))

//

// The folding helps cmov to be matched with csneg without generating

// redundant neg instruction.


static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG) {

  if (!isNegatedInteger(SDValue(N, 0)))

    return SDValue();


  SDValue CMov = N->getOperand(1);

  if (CMov.getOpcode() != ARMISD::CMOV || !CMov->hasOneUse())

    return SDValue();


  SDValue N0 = CMov.getOperand(0);

  SDValue N1 = CMov.getOperand(1);


  // If neither of them are negations, it's not worth the folding as it

  // introduces two additional negations while reducing one negation.

  if (!isNegatedInteger(N0) && !isNegatedInteger(N1))

    return SDValue();


  SDLoc DL(N);

  EVT VT = CMov.getValueType();


  SDValue N0N = DAG.getNegative(N0, DL, VT);

  SDValue N1N = DAG.getNegative(N1, DL, VT);

  return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CMov.getOperand(2),

                     CMov.getOperand(3));

}


/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.

///


static SDValue PerformSUBCombine(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const ARMSubtarget *Subtarget) {

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);


  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))

  if (N1.getNode()->hasOneUse())

    if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))

      return Result;


  if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))

    return R;


  if (SDValue Val = performNegCMovCombine(N, DCI.DAG))

    return Val;


  if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())

    return SDValue();


  // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))

  // so that we can readily pattern match more mve instructions which can use

  // a scalar operand.

  SDValue VDup = N->getOperand(1);

  if (VDup->getOpcode() != ARMISD::VDUP)

    return SDValue();


  SDValue VMov = N->getOperand(0);

  if (VMov->getOpcode() == ISD::BITCAST)

    VMov = VMov->getOperand(0);


  if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))

    return SDValue();


  SDLoc dl(N);

  SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,

                                   DCI.DAG.getConstant(0, dl, MVT::i32),

                                   VDup->getOperand(0));

  return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);

}


/// PerformVMULCombine

/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the

/// special multiplier accumulator forwarding.

///   vmul d3, d0, d2

///   vmla d3, d1, d2

/// is faster than

///   vadd d3, d0, d1

///   vmul d3, d3, d2

//  However, for (A + B) * (A + B),

//    vadd d2, d0, d1

//    vmul d3, d0, d2

//    vmla d3, d1, d2

//  is slower than

//    vadd d2, d0, d1

//    vmul d3, d2, d2


static SDValue PerformVMULCombine(SDNode *N,

                                  TargetLowering::DAGCombinerInfo &DCI,

                                  const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasVMLxForwarding())

    return SDValue();


  SelectionDAG &DAG = DCI.DAG;

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  unsigned Opcode = N0.getOpcode();

  if (Opcode != ISD::ADD && Opcode != ISD::SUB &&

      Opcode != ISD::FADD && Opcode != ISD::FSUB) {

    Opcode = N1.getOpcode();

    if (Opcode != ISD::ADD && Opcode != ISD::SUB &&

        Opcode != ISD::FADD && Opcode != ISD::FSUB)

      return SDValue();

    std::swap(N0, N1);

  }


  if (N0 == N1)

    return SDValue();


  EVT VT = N->getValueType(0);

  SDLoc DL(N);

  SDValue N00 = N0->getOperand(0);

  SDValue N01 = N0->getOperand(1);

  return DAG.getNode(Opcode, DL, VT,

                     DAG.getNode(ISD::MUL, DL, VT, N00, N1),

                     DAG.getNode(ISD::MUL, DL, VT, N01, N1));

}


static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG,

                                      const ARMSubtarget *Subtarget) {

  EVT VT = N->getValueType(0);

  if (VT != MVT::v2i64)

    return SDValue();


  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);


  auto IsSignExt = [&](SDValue Op) {

    if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)

      return SDValue();

    EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();

    if (VT.getScalarSizeInBits() == 32)

      return Op->getOperand(0);

    return SDValue();

  };

  auto IsZeroExt = [&](SDValue Op) {

    // Zero extends are a little more awkward. At the point we are matching

    // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.

    // That might be before of after a bitcast depending on how the and is

    // placed. Because this has to look through bitcasts, it is currently only

    // supported on LE.

    if (!Subtarget->isLittle())

      return SDValue();


    SDValue And = Op;

    if (And->getOpcode() == ISD::BITCAST)

      And = And->getOperand(0);

    if (And->getOpcode() != ISD::AND)

      return SDValue();

    SDValue Mask = And->getOperand(1);

    if (Mask->getOpcode() == ISD::BITCAST)

      Mask = Mask->getOperand(0);


    if (Mask->getOpcode() != ISD::BUILD_VECTOR ||

        Mask.getValueType() != MVT::v4i32)

      return SDValue();

    if (isAllOnesConstant(Mask->getOperand(0)) &&

        isNullConstant(Mask->getOperand(1)) &&

        isAllOnesConstant(Mask->getOperand(2)) &&

        isNullConstant(Mask->getOperand(3)))

      return And->getOperand(0);

    return SDValue();

  };


  SDLoc dl(N);

  if (SDValue Op0 = IsSignExt(N0)) {

    if (SDValue Op1 = IsSignExt(N1)) {

      SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);

      SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);

      return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);

    }

  }

  if (SDValue Op0 = IsZeroExt(N0)) {

    if (SDValue Op1 = IsZeroExt(N1)) {

      SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);

      SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);

      return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);

    }

  }


  return SDValue();

}


static SDValue PerformMULCombine(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const ARMSubtarget *Subtarget) {

  SelectionDAG &DAG = DCI.DAG;


  EVT VT = N->getValueType(0);

  if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)

    return PerformMVEVMULLCombine(N, DAG, Subtarget);


  if (Subtarget->isThumb1Only())

    return SDValue();


  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())

    return SDValue();


  if (VT.is64BitVector() || VT.is128BitVector())

    return PerformVMULCombine(N, DCI, Subtarget);

  if (VT != MVT::i32)

    return SDValue();


  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));

  if (!C)

    return SDValue();


  int64_t MulAmt = C->getSExtValue();

  unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);


  ShiftAmt = ShiftAmt & (32 - 1);

  SDValue V = N->getOperand(0);

  SDLoc DL(N);


  SDValue Res;

  MulAmt >>= ShiftAmt;


  if (MulAmt >= 0) {

    if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {

      // (mul x, 2^N + 1) => (add (shl x, N), x)

      Res = DAG.getNode(ISD::ADD, DL, VT,

                        V,

                        DAG.getNode(ISD::SHL, DL, VT,

                                    V,

                                    DAG.getConstant(Log2_32(MulAmt - 1), DL,

                                                    MVT::i32)));

    } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {

      // (mul x, 2^N - 1) => (sub (shl x, N), x)

      Res = DAG.getNode(ISD::SUB, DL, VT,

                        DAG.getNode(ISD::SHL, DL, VT,

                                    V,

                                    DAG.getConstant(Log2_32(MulAmt + 1), DL,

                                                    MVT::i32)),

                        V);

    } else

      return SDValue();

  } else {

    uint64_t MulAmtAbs = -MulAmt;

    if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {

      // (mul x, -(2^N - 1)) => (sub x, (shl x, N))

      Res = DAG.getNode(ISD::SUB, DL, VT,

                        V,

                        DAG.getNode(ISD::SHL, DL, VT,

                                    V,

                                    DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,

                                                    MVT::i32)));

    } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {

      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)

      Res = DAG.getNode(ISD::ADD, DL, VT,

                        V,

                        DAG.getNode(ISD::SHL, DL, VT,

                                    V,

                                    DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,

                                                    MVT::i32)));

      Res = DAG.getNode(ISD::SUB, DL, VT,

                        DAG.getConstant(0, DL, MVT::i32), Res);

    } else

      return SDValue();

  }


  if (ShiftAmt != 0)

    Res = DAG.getNode(ISD::SHL, DL, VT,

                      Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));


  // Do not add new nodes to DAG combiner worklist.

  DCI.CombineTo(N, Res, false);

  return SDValue();

}


static SDValue CombineANDShift(SDNode *N,

                               TargetLowering::DAGCombinerInfo &DCI,

                               const ARMSubtarget *Subtarget) {

  // Allow DAGCombine to pattern-match before we touch the canonical form.

  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())

    return SDValue();


  if (N->getValueType(0) != MVT::i32)

    return SDValue();


  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));

  if (!N1C)

    return SDValue();


  uint32_t C1 = (uint32_t)N1C->getZExtValue();

  // Don't transform uxtb/uxth.

  if (C1 == 255 || C1 == 65535)

    return SDValue();


  SDNode *N0 = N->getOperand(0).getNode();

  if (!N0->hasOneUse())

    return SDValue();


  if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)

    return SDValue();


  bool LeftShift = N0->getOpcode() == ISD::SHL;


  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));

  if (!N01C)

    return SDValue();


  uint32_t C2 = (uint32_t)N01C->getZExtValue();

  if (!C2 || C2 >= 32)

    return SDValue();


  // Clear irrelevant bits in the mask.

  if (LeftShift)

    C1 &= (-1U << C2);

  else

    C1 &= (-1U >> C2);


  SelectionDAG &DAG = DCI.DAG;

  SDLoc DL(N);


  // We have a pattern of the form "(and (shl x, c2) c1)" or

  // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to

  // transform to a pair of shifts, to save materializing c1.


  // First pattern: right shift, then mask off leading bits.

  // FIXME: Use demanded bits?

  if (!LeftShift && isMask_32(C1)) {

    uint32_t C3 = llvm::countl_zero(C1);

    if (C2 < C3) {

      SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),

                                DAG.getConstant(C3 - C2, DL, MVT::i32));

      return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,

                         DAG.getConstant(C3, DL, MVT::i32));

    }

  }


  // First pattern, reversed: left shift, then mask off trailing bits.

  if (LeftShift && isMask_32(~C1)) {

    uint32_t C3 = llvm::countr_zero(C1);

    if (C2 < C3) {

      SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),

                                DAG.getConstant(C3 - C2, DL, MVT::i32));

      return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,

                         DAG.getConstant(C3, DL, MVT::i32));

    }

  }


  // Second pattern: left shift, then mask off leading bits.

  // FIXME: Use demanded bits?

  if (LeftShift && isShiftedMask_32(C1)) {

    uint32_t Trailing = llvm::countr_zero(C1);

    uint32_t C3 = llvm::countl_zero(C1);

    if (Trailing == C2 && C2 + C3 < 32) {

      SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),

                                DAG.getConstant(C2 + C3, DL, MVT::i32));

      return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,

                        DAG.getConstant(C3, DL, MVT::i32));

    }

  }


  // Second pattern, reversed: right shift, then mask off trailing bits.

  // FIXME: Handle other patterns of known/demanded bits.

  if (!LeftShift && isShiftedMask_32(C1)) {

    uint32_t Leading = llvm::countl_zero(C1);

    uint32_t C3 = llvm::countr_zero(C1);

    if (Leading == C2 && C2 + C3 < 32) {

      SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),

                                DAG.getConstant(C2 + C3, DL, MVT::i32));

      return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,

                         DAG.getConstant(C3, DL, MVT::i32));

    }

  }


  // Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"

  // if "c1 >> c2" is a cheaper immediate than "c1"

  if (LeftShift &&

      HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {


    SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),

                              DAG.getConstant(C1 >> C2, DL, MVT::i32));

    return DAG.getNode(ISD::SHL, DL, MVT::i32, And,

                       DAG.getConstant(C2, DL, MVT::i32));

  }


  return SDValue();

}


static SDValue PerformANDCombine(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const ARMSubtarget *Subtarget) {

  // Attempt to use immediate-form VBIC

  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));

  SDLoc dl(N);

  EVT VT = N->getValueType(0);

  SelectionDAG &DAG = DCI.DAG;


  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||

      VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)

    return SDValue();


  APInt SplatBits, SplatUndef;

  unsigned SplatBitSize;

  bool HasAnyUndefs;

  if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&

      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {

    if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||

        SplatBitSize == 64) {

      EVT VbicVT;

      SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),

                                      SplatUndef.getZExtValue(), SplatBitSize,

                                      DAG, dl, VbicVT, VT, OtherModImm);

      if (Val.getNode()) {

        SDValue Input =

            DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0));

        SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);

        return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);

      }

    }

  }


  if (!Subtarget->isThumb1Only()) {

    // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))

    if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))

      return Result;


    if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))

      return Result;

  }


  if (Subtarget->isThumb1Only())

    if (SDValue Result = CombineANDShift(N, DCI, Subtarget))

      return Result;


  return SDValue();

}


// Try combining OR nodes to SMULWB, SMULWT.


static SDValue PerformORCombineToSMULWBT(SDNode *OR,

                                         TargetLowering::DAGCombinerInfo &DCI,

                                         const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasV6Ops() ||

      (Subtarget->isThumb() &&

       (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))

    return SDValue();


  SDValue SRL = OR->getOperand(0);

  SDValue SHL = OR->getOperand(1);


  if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {

    SRL = OR->getOperand(1);

    SHL = OR->getOperand(0);

  }

  if (!isSRL16(SRL) || !isSHL16(SHL))

    return SDValue();


  // The first operands to the shifts need to be the two results from the

  // same smul_lohi node.

  if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||

       SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)

    return SDValue();


  SDNode *SMULLOHI = SRL.getOperand(0).getNode();

  if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||

      SHL.getOperand(0) != SDValue(SMULLOHI, 1))

    return SDValue();


  // Now we have:

  // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))

  // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.

  // For SMUWB the 16-bit value will signed extended somehow.

  // For SMULWT only the SRA is required.

  // Check both sides of SMUL_LOHI

  SDValue OpS16 = SMULLOHI->getOperand(0);

  SDValue OpS32 = SMULLOHI->getOperand(1);


  SelectionDAG &DAG = DCI.DAG;

  if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {

    OpS16 = OpS32;

    OpS32 = SMULLOHI->getOperand(0);

  }


  SDLoc dl(OR);

  unsigned Opcode = 0;

  if (isS16(OpS16, DAG))

    Opcode = ARMISD::SMULWB;

  else if (isSRA16(OpS16)) {

    Opcode = ARMISD::SMULWT;

    OpS16 = OpS16->getOperand(0);

  }

  else

    return SDValue();


  SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);

  DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);

  return SDValue(OR, 0);

}


static SDValue PerformORCombineToBFI(SDNode *N,

                                     TargetLowering::DAGCombinerInfo &DCI,

                                     const ARMSubtarget *Subtarget) {

  // BFI is only available on V6T2+

  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())

    return SDValue();


  EVT VT = N->getValueType(0);

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);

  SelectionDAG &DAG = DCI.DAG;

  SDLoc DL(N);

  // 1) or (and A, mask), val => ARMbfi A, val, mask

  //      iff (val & mask) == val

  //

  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask

  //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)

  //          && mask == ~mask2

  //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)

  //          && ~mask == mask2

  //  (i.e., copy a bitfield value into another bitfield of the same width)


  if (VT != MVT::i32)

    return SDValue();


  SDValue N00 = N0.getOperand(0);


  // The value and the mask need to be constants so we can verify this is

  // actually a bitfield set. If the mask is 0xffff, we can do better

  // via a movt instruction, so don't use BFI in that case.

  SDValue MaskOp = N0.getOperand(1);

  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);

  if (!MaskC)

    return SDValue();

  unsigned Mask = MaskC->getZExtValue();

  if (Mask == 0xffff)

    return SDValue();

  SDValue Res;

  // Case (1): or (and A, mask), val => ARMbfi A, val, mask

  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);

  if (N1C) {

    unsigned Val = N1C->getZExtValue();

    if ((Val & ~Mask) != Val)

      return SDValue();


    if (ARM::isBitFieldInvertedMask(Mask)) {

      Val >>= llvm::countr_zero(~Mask);


      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,

                        DAG.getConstant(Val, DL, MVT::i32),

                        DAG.getConstant(Mask, DL, MVT::i32));


      DCI.CombineTo(N, Res, false);

      // Return value from the original node to inform the combiner than N is

      // now dead.

      return SDValue(N, 0);

    }

  } else if (N1.getOpcode() == ISD::AND) {

    // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask

    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));

    if (!N11C)

      return SDValue();

    unsigned Mask2 = N11C->getZExtValue();


    // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern

    // as is to match.

    if (ARM::isBitFieldInvertedMask(Mask) &&

        (Mask == ~Mask2)) {

      // The pack halfword instruction works better for masks that fit it,

      // so use that when it's available.

      if (Subtarget->hasDSP() &&

          (Mask == 0xffff || Mask == 0xffff0000))

        return SDValue();

      // 2a

      unsigned amt = llvm::countr_zero(Mask2);

      Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),

                        DAG.getConstant(amt, DL, MVT::i32));

      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,

                        DAG.getConstant(Mask, DL, MVT::i32));

      DCI.CombineTo(N, Res, false);

      // Return value from the original node to inform the combiner than N is

      // now dead.

      return SDValue(N, 0);

    } else if (ARM::isBitFieldInvertedMask(~Mask) &&

               (~Mask == Mask2)) {

      // The pack halfword instruction works better for masks that fit it,

      // so use that when it's available.

      if (Subtarget->hasDSP() &&

          (Mask2 == 0xffff || Mask2 == 0xffff0000))

        return SDValue();

      // 2b

      unsigned lsb = llvm::countr_zero(Mask);

      Res = DAG.getNode(ISD::SRL, DL, VT, N00,

                        DAG.getConstant(lsb, DL, MVT::i32));

      Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,

                        DAG.getConstant(Mask2, DL, MVT::i32));

      DCI.CombineTo(N, Res, false);

      // Return value from the original node to inform the combiner than N is

      // now dead.

      return SDValue(N, 0);

    }

  }


  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&

      N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&

      ARM::isBitFieldInvertedMask(~Mask)) {

    // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask

    // where lsb(mask) == #shamt and masked bits of B are known zero.

    SDValue ShAmt = N00.getOperand(1);

    unsigned ShAmtC = ShAmt->getAsZExtVal();

    unsigned LSB = llvm::countr_zero(Mask);

    if (ShAmtC != LSB)

      return SDValue();


    Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),

                      DAG.getConstant(~Mask, DL, MVT::i32));


    DCI.CombineTo(N, Res, false);

    // Return value from the original node to inform the combiner than N is

    // now dead.

    return SDValue(N, 0);

  }


  return SDValue();

}


static bool isValidMVECond(unsigned CC, bool IsFloat) {

  switch (CC) {

  case ARMCC::EQ:

  case ARMCC::NE:

  case ARMCC::LE:

  case ARMCC::GT:

  case ARMCC::GE:

  case ARMCC::LT:

    return true;

  case ARMCC::HS:

  case ARMCC::HI:

    return !IsFloat;

  default:

    return false;

  };

}


static ARMCC::CondCodes getVCMPCondCode(SDValue N) {

  if (N->getOpcode() == ARMISD::VCMP)

    return (ARMCC::CondCodes)N->getConstantOperandVal(2);

  else if (N->getOpcode() == ARMISD::VCMPZ)

    return (ARMCC::CondCodes)N->getConstantOperandVal(1);

  else

    llvm_unreachable("Not a VCMP/VCMPZ!");

}


static bool CanInvertMVEVCMP(SDValue N) {

  ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N));

  return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());

}


static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG,

                                   const ARMSubtarget *Subtarget) {

  // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain

  // together with predicates

  EVT VT = N->getValueType(0);

  SDLoc DL(N);

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);


  auto IsFreelyInvertable = [&](SDValue V) {

    if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)

      return CanInvertMVEVCMP(V);

    return false;

  };


  // At least one operand must be freely invertable.

  if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))

    return SDValue();


  SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);

  SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);

  SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);

  return DAG.getLogicalNOT(DL, And, VT);

}


/// PerformORCombine - Target-specific dag combine xforms for ISD::OR


static SDValue PerformORCombine(SDNode *N,

                                TargetLowering::DAGCombinerInfo &DCI,

                                const ARMSubtarget *Subtarget) {

  // Attempt to use immediate-form VORR

  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));

  SDLoc dl(N);

  EVT VT = N->getValueType(0);

  SelectionDAG &DAG = DCI.DAG;


  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))

    return SDValue();


  if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||

                                        VT == MVT::v8i1 || VT == MVT::v16i1))

    return PerformORCombine_i1(N, DAG, Subtarget);


  APInt SplatBits, SplatUndef;

  unsigned SplatBitSize;

  bool HasAnyUndefs;

  if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&

      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {

    if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||

        SplatBitSize == 64) {

      EVT VorrVT;

      SDValue Val =

          isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),

                            SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);

      if (Val.getNode()) {

        SDValue Input =

            DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0));

        SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);

        return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);

      }

    }

  }


  if (!Subtarget->isThumb1Only()) {

    // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))

    if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))

      return Result;

    if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))

      return Result;

  }


  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);


  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.

  if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&

      DAG.getTargetLoweringInfo().isTypeLegal(VT)) {


    // The code below optimizes (or (and X, Y), Z).

    // The AND operand needs to have a single user to make these optimizations

    // profitable.

    if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())

      return SDValue();


    APInt SplatUndef;

    unsigned SplatBitSize;

    bool HasAnyUndefs;


    APInt SplatBits0, SplatBits1;

    BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));

    BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));

    // Ensure that the second operand of both ands are constants

    if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,

                                      HasAnyUndefs) && !HasAnyUndefs) {

        if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,

                                          HasAnyUndefs) && !HasAnyUndefs) {

            // Ensure that the bit width of the constants are the same and that

            // the splat arguments are logical inverses as per the pattern we

            // are trying to simplify.

            if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&

                SplatBits0 == ~SplatBits1) {

                // Canonicalize the vector type to make instruction selection

                // simpler.

                EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;

                SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,

                                             N0->getOperand(1),

                                             N0->getOperand(0),

                                             N1->getOperand(0));

                return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Result);

            }

        }

    }

  }


  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when

  // reasonable.

  if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {

    if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))

      return Res;

  }


  if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))

    return Result;


  return SDValue();

}


static SDValue PerformXORCombine(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI,

                                 const ARMSubtarget *Subtarget) {

  EVT VT = N->getValueType(0);

  SelectionDAG &DAG = DCI.DAG;


  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))

    return SDValue();


  if (!Subtarget->isThumb1Only()) {

    // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))

    if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))

      return Result;


    if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))

      return Result;

  }


  if (Subtarget->hasMVEIntegerOps()) {

    // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.

    SDValue N0 = N->getOperand(0);

    SDValue N1 = N->getOperand(1);

    const TargetLowering *TLI = Subtarget->getTargetLowering();

    if (TLI->isConstTrueVal(N1) &&

        (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {

      if (CanInvertMVEVCMP(N0)) {

        SDLoc DL(N0);

        ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N0));


        SmallVector<SDValue, 4> Ops;

        Ops.push_back(N0->getOperand(0));

        if (N0->getOpcode() == ARMISD::VCMP)

          Ops.push_back(N0->getOperand(1));

        Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));

        return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);

      }

    }

  }


  return SDValue();

}


// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,

// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and

// their position in "to" (Rd).


static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {

  assert(N->getOpcode() == ARMISD::BFI);


  SDValue From = N->getOperand(1);

  ToMask = ~N->getConstantOperandAPInt(2);

  FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());


  // If the Base came from a SHR #C, we can deduce that it is really testing bit

  // #C in the base of the SHR.

  if (From->getOpcode() == ISD::SRL &&

      isa<ConstantSDNode>(From->getOperand(1))) {

    APInt Shift = From->getConstantOperandAPInt(1);

    assert(Shift.getLimitedValue() < 32 && "Shift too large!");

    FromMask <<= Shift.getLimitedValue(31);

    From = From->getOperand(0);

  }


  return From;

}


// If A and B contain one contiguous set of bits, does A | B == A . B?

//

// Neither A nor B must be zero.


static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {

  unsigned LastActiveBitInA = A.countr_zero();

  unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;

  return LastActiveBitInA - 1 == FirstActiveBitInB;

}


static SDValue FindBFIToCombineWith(SDNode *N) {

  // We have a BFI in N. Find a BFI it can combine with, if one exists.

  APInt ToMask, FromMask;

  SDValue From = ParseBFI(N, ToMask, FromMask);

  SDValue To = N->getOperand(0);


  SDValue V = To;

  if (V.getOpcode() != ARMISD::BFI)

    return SDValue();


  APInt NewToMask, NewFromMask;

  SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);

  if (NewFrom != From)

    return SDValue();


  // Do the written bits conflict with any we've seen so far?

  if ((NewToMask & ToMask).getBoolValue())

    // Conflicting bits.

    return SDValue();


  // Are the new bits contiguous when combined with the old bits?

  if (BitsProperlyConcatenate(ToMask, NewToMask) &&

      BitsProperlyConcatenate(FromMask, NewFromMask))

    return V;

  if (BitsProperlyConcatenate(NewToMask, ToMask) &&

      BitsProperlyConcatenate(NewFromMask, FromMask))

    return V;


  return SDValue();

}


static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {

  SDValue N0 = N->getOperand(0);

  SDValue N1 = N->getOperand(1);


  if (N1.getOpcode() == ISD::AND) {

    // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff

    // the bits being cleared by the AND are not demanded by the BFI.

    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));

    if (!N11C)

      return SDValue();

    unsigned InvMask = N->getConstantOperandVal(2);

    unsigned LSB = llvm::countr_zero(~InvMask);

    unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;

    assert(Width <

               static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&

           "undefined behavior");

    unsigned Mask = (1u << Width) - 1;

    unsigned Mask2 = N11C->getZExtValue();

    if ((Mask & (~Mask2)) == 0)

      return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),

                         N->getOperand(0), N1.getOperand(0), N->getOperand(2));

    return SDValue();

  }


  // Look for another BFI to combine with.

  if (SDValue CombineBFI = FindBFIToCombineWith(N)) {

    // We've found a BFI.

    APInt ToMask1, FromMask1;

    SDValue From1 = ParseBFI(N, ToMask1, FromMask1);


    APInt ToMask2, FromMask2;

    SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);

    assert(From1 == From2);

    (void)From2;


    // Create a new BFI, combining the two together.

    APInt NewFromMask = FromMask1 | FromMask2;

    APInt NewToMask = ToMask1 | ToMask2;


    EVT VT = N->getValueType(0);

    SDLoc dl(N);


    if (NewFromMask[0] == 0)

      From1 = DAG.getNode(ISD::SRL, dl, VT, From1,

                          DAG.getConstant(NewFromMask.countr_zero(), dl, VT));

    return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,

                       DAG.getConstant(~NewToMask, dl, VT));

  }


  // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so

  // that lower bit insertions are performed first, providing that M1 and M2

  // do no overlap. This can allow multiple BFI instructions to be combined

  // together by the other folds above.

  if (N->getOperand(0).getOpcode() == ARMISD::BFI) {

    APInt ToMask1 = ~N->getConstantOperandAPInt(2);

    APInt ToMask2 = ~N0.getConstantOperandAPInt(2);


    if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||

        ToMask1.countl_zero() < ToMask2.countl_zero())

      return SDValue();


    EVT VT = N->getValueType(0);

    SDLoc dl(N);

    SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),

                               N->getOperand(1), N->getOperand(2));

    return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),

                       N0.getOperand(2));

  }


  return SDValue();

}


// Check that N is CMPZ(CSINC(0, 0, CC, X)),

//              or CMPZ(CMOV(1, 0, CC, X))

// return X if valid.


static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {

  if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))

    return SDValue();

  SDValue CSInc = Cmp->getOperand(0);


  // Ignore any `And 1` nodes that may not yet have been removed. We are

  // looking for a value that produces 1/0, so these have no effect on the

  // code.

  while (CSInc.getOpcode() == ISD::AND &&

         isa<ConstantSDNode>(CSInc.getOperand(1)) &&

         CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())

    CSInc = CSInc.getOperand(0);


  if (CSInc.getOpcode() == ARMISD::CSINC &&

      isNullConstant(CSInc.getOperand(0)) &&

      isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {

    CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);

    return CSInc.getOperand(3);

  }

  if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&

      isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {

    CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);

    return CSInc.getOperand(3);

  }

  if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&

      isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {

    CC = ARMCC::getOppositeCondition(

        (ARMCC::CondCodes)CSInc.getConstantOperandVal(2));

    return CSInc.getOperand(3);

  }

  return SDValue();

}


static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {

  // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in

  //       t92: flags = ARMISD::CMPZ t74, 0

  //     t93: i32 = ARMISD::CSINC 0, 0, 1, t92

  //   t96: flags = ARMISD::CMPZ t93, 0

  // t114: i32 = ARMISD::CSINV 0, 0, 0, t96

  ARMCC::CondCodes Cond;

  if (SDValue C = IsCMPZCSINC(N, Cond))

    if (Cond == ARMCC::EQ)

      return C;

  return SDValue();

}


static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG) {

  // Fold away an unneccessary CMPZ/CSINC

  // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->

  // if C1==EQ -> CSXYZ A, B, C2, D

  // if C1==NE -> CSXYZ A, B, NOT(C2), D

  ARMCC::CondCodes Cond;

  if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {

    if (N->getConstantOperandVal(2) == ARMCC::EQ)

      return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),

                         N->getOperand(1),

                         DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);

    if (N->getConstantOperandVal(2) == ARMCC::NE)

      return DAG.getNode(

          N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),

          N->getOperand(1),

          DAG.getConstant(ARMCC::getOppositeCondition(Cond), SDLoc(N), MVT::i32), C);

  }

  return SDValue();

}


/// PerformVMOVRRDCombine - Target-specific dag combine xforms for

/// ARMISD::VMOVRRD.


static SDValue PerformVMOVRRDCombine(SDNode *N,

                                     TargetLowering::DAGCombinerInfo &DCI,

                                     const ARMSubtarget *Subtarget) {

  // vmovrrd(vmovdrr x, y) -> x,y

  SDValue InDouble = N->getOperand(0);

  if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())

    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));


  // vmovrrd(load f64) -> (load i32), (load i32)

  SDNode *InNode = InDouble.getNode();

  if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&

      InNode->getValueType(0) == MVT::f64 &&

      InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&

      !cast<LoadSDNode>(InNode)->isVolatile()) {

    // TODO: Should this be done for non-FrameIndex operands?

    LoadSDNode *LD = cast<LoadSDNode>(InNode);


    SelectionDAG &DAG = DCI.DAG;

    SDLoc DL(LD);

    SDValue BasePtr = LD->getBasePtr();

    SDValue NewLD1 =

        DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),

                    LD->getAlign(), LD->getMemOperand()->getFlags());


    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,

                                    DAG.getConstant(4, DL, MVT::i32));


    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,

                                 LD->getPointerInfo().getWithOffset(4),

                                 commonAlignment(LD->getAlign(), 4),

                                 LD->getMemOperand()->getFlags());


    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));

    if (DCI.DAG.getDataLayout().isBigEndian())

      std::swap (NewLD1, NewLD2);

    SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);

    return Result;

  }


  // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d

  // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b

  if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

      isa<ConstantSDNode>(InDouble.getOperand(1))) {

    SDValue BV = InDouble.getOperand(0);

    // Look up through any nop bitcasts and vector_reg_casts. bitcasts may

    // change lane order under big endian.

    bool BVSwap = BV.getOpcode() == ISD::BITCAST;

    while (

        (BV.getOpcode() == ISD::BITCAST ||

         BV.getOpcode() == ARMISD::VECTOR_REG_CAST) &&

        (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {

      BVSwap = BV.getOpcode() == ISD::BITCAST;

      BV = BV.getOperand(0);

    }

    if (BV.getValueType() != MVT::v4i32)

      return SDValue();


    // Handle buildvectors, pulling out the correct lane depending on

    // endianness.

    unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;

    if (BV.getOpcode() == ISD::BUILD_VECTOR) {

      SDValue Op0 = BV.getOperand(Offset);

      SDValue Op1 = BV.getOperand(Offset + 1);

      if (!Subtarget->isLittle() && BVSwap)

        std::swap(Op0, Op1);


      return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));

    }


    // A chain of insert_vectors, grabbing the correct value of the chain of

    // inserts.

    SDValue Op0, Op1;

    while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {

      if (isa<ConstantSDNode>(BV.getOperand(2))) {

        if (BV.getConstantOperandVal(2) == Offset && !Op0)

          Op0 = BV.getOperand(1);

        if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1)

          Op1 = BV.getOperand(1);

      }

      BV = BV.getOperand(0);

    }

    if (!Subtarget->isLittle() && BVSwap)

      std::swap(Op0, Op1);

    if (Op0 && Op1)

      return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));

  }


  return SDValue();

}


/// PerformVMOVDRRCombine - Target-specific dag combine xforms for

/// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.


static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {

  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)

  SDValue Op0 = N->getOperand(0);

  SDValue Op1 = N->getOperand(1);

  if (Op0.getOpcode() == ISD::BITCAST)

    Op0 = Op0.getOperand(0);

  if (Op1.getOpcode() == ISD::BITCAST)

    Op1 = Op1.getOperand(0);

  if (Op0.getOpcode() == ARMISD::VMOVRRD &&

      Op0.getNode() == Op1.getNode() &&

      Op0.getResNo() == 0 && Op1.getResNo() == 1)

    return DAG.getNode(ISD::BITCAST, SDLoc(N),

                       N->getValueType(0), Op0.getOperand(0));

  return SDValue();

}


static SDValue PerformVMOVhrCombine(SDNode *N,

                                    TargetLowering::DAGCombinerInfo &DCI) {

  SDValue Op0 = N->getOperand(0);


  // VMOVhr (VMOVrh (X)) -> X

  if (Op0->getOpcode() == ARMISD::VMOVrh)

    return Op0->getOperand(0);


  // FullFP16: half values are passed in S-registers, and we don't

  // need any of the bitcast and moves:

  //

  //     t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?

  //   t5: i32 = bitcast t2

  // t18: f16 = ARMISD::VMOVhr t5

  // =>

  // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?

  if (Op0->getOpcode() == ISD::BITCAST) {

    SDValue Copy = Op0->getOperand(0);

    if (Copy.getValueType() == MVT::f32 &&

        Copy->getOpcode() == ISD::CopyFromReg) {

      bool HasGlue = Copy->getNumOperands() == 3;

      SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),

                       HasGlue ? Copy->getOperand(2) : SDValue()};

      EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};

      SDValue NewCopy =

          DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N),

                          DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),

                          ArrayRef(Ops, HasGlue ? 3 : 2));


      // Update Users, Chains, and Potential Glue.

      DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));

      DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));

      if (HasGlue)

        DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),

                                          NewCopy.getValue(2));


      return NewCopy;

    }

  }


  // fold (VMOVhr (load x)) -> (load (f16*)x)

  if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {

    if (LN0->hasOneUse() && LN0->isUnindexed() &&

        LN0->getMemoryVT() == MVT::i16) {

      SDValue Load =

          DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),

                          LN0->getBasePtr(), LN0->getMemOperand());

      DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));

      DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));

      return Load;

    }

  }


  // Only the bottom 16 bits of the source register are used.

  APInt DemandedMask = APInt::getLowBitsSet(32, 16);

  const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();

  if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))

    return SDValue(N, 0);


  return SDValue();

}


static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG) {

  SDValue N0 = N->getOperand(0);

  EVT VT = N->getValueType(0);


  // fold (VMOVrh (fpconst x)) -> const x

  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {

    APFloat V = C->getValueAPF();

    return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);

  }


  // fold (VMOVrh (load x)) -> (zextload (i16*)x)

  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {

    LoadSDNode *LN0 = cast<LoadSDNode>(N0);


    SDValue Load =

        DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),

                       LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());

    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));

    DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));

    return Load;

  }


  // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)

  if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

      isa<ConstantSDNode>(N0->getOperand(1)))

    return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),

                       N0->getOperand(1));


  return SDValue();

}


/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node

/// are normal, non-volatile loads.  If so, it is profitable to bitcast an

/// i64 vector to have f64 elements, since the value can then be loaded

/// directly into a VFP register.


static bool hasNormalLoadOperand(SDNode *N) {

  unsigned NumElts = N->getValueType(0).getVectorNumElements();

  for (unsigned i = 0; i < NumElts; ++i) {

    SDNode *Elt = N->getOperand(i).getNode();

    if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())

      return true;

  }

  return false;

}


/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for

/// ISD::BUILD_VECTOR.


static SDValue PerformBUILD_VECTORCombine(SDNode *N,

                                          TargetLowering::DAGCombinerInfo &DCI,

                                          const ARMSubtarget *Subtarget) {

  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):

  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value

  // into a pair of GPRs, which is fine when the value is used as a scalar,

  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.

  SelectionDAG &DAG = DCI.DAG;

  if (N->getNumOperands() == 2)

    if (SDValue RV = PerformVMOVDRRCombine(N, DAG))

      return RV;


  // Load i64 elements as f64 values so that type legalization does not split

  // them up into i32 values.

  EVT VT = N->getValueType(0);

  if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))

    return SDValue();

  SDLoc dl(N);

  SmallVector<SDValue, 8> Ops;

  unsigned NumElts = VT.getVectorNumElements();

  for (unsigned i = 0; i < NumElts; ++i) {

    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));

    Ops.push_back(V);

    // Make the DAGCombiner fold the bitcast.

    DCI.AddToWorklist(V.getNode());

  }

  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);

  SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);

  return DAG.getNode(ISD::BITCAST, dl, VT, BV);

}


/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.

static SDValue


PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

  // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.

  // At that time, we may have inserted bitcasts from integer to float.

  // If these bitcasts have survived DAGCombine, change the lowering of this

  // BUILD_VECTOR in something more vector friendly, i.e., that does not

  // force to use floating point types.


  // Make sure we can change the type of the vector.

  // This is possible iff:

  // 1. The vector is only used in a bitcast to a integer type. I.e.,

  //    1.1. Vector is used only once.

  //    1.2. Use is a bit convert to an integer type.

  // 2. The size of its operands are 32-bits (64-bits are not legal).

  EVT VT = N->getValueType(0);

  EVT EltVT = VT.getVectorElementType();


  // Check 1.1. and 2.

  if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())

    return SDValue();


  // By construction, the input type must be float.

  assert(EltVT == MVT::f32 && "Unexpected type!");


  // Check 1.2.

  SDNode *Use = *N->user_begin();

  if (Use->getOpcode() != ISD::BITCAST ||

      Use->getValueType(0).isFloatingPoint())

    return SDValue();


  // Check profitability.

  // Model is, if more than half of the relevant operands are bitcast from

  // i32, turn the build_vector into a sequence of insert_vector_elt.

  // Relevant operands are everything that is not statically

  // (i.e., at compile time) bitcasted.

  unsigned NumOfBitCastedElts = 0;

  unsigned NumElts = VT.getVectorNumElements();

  unsigned NumOfRelevantElts = NumElts;

  for (unsigned Idx = 0; Idx < NumElts; ++Idx) {

    SDValue Elt = N->getOperand(Idx);

    if (Elt->getOpcode() == ISD::BITCAST) {

      // Assume only bit cast to i32 will go away.

      if (Elt->getOperand(0).getValueType() == MVT::i32)

        ++NumOfBitCastedElts;

    } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))

      // Constants are statically casted, thus do not count them as

      // relevant operands.

      --NumOfRelevantElts;

  }


  // Check if more than half of the elements require a non-free bitcast.

  if (NumOfBitCastedElts <= NumOfRelevantElts / 2)

    return SDValue();


  SelectionDAG &DAG = DCI.DAG;

  // Create the new vector type.

  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);

  // Check if the type is legal.

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  if (!TLI.isTypeLegal(VecVT))

    return SDValue();


  // Combine:

  // ARMISD::BUILD_VECTOR E1, E2, ..., EN.

  // => BITCAST INSERT_VECTOR_ELT

  //                      (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),

  //                      (BITCAST EN), N.

  SDValue Vec = DAG.getUNDEF(VecVT);

  SDLoc dl(N);

  for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {

    SDValue V = N->getOperand(Idx);

    if (V.isUndef())

      continue;

    if (V.getOpcode() == ISD::BITCAST &&

        V->getOperand(0).getValueType() == MVT::i32)

      // Fold obvious case.

      V = V.getOperand(0);

    else {

      V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);

      // Make the DAGCombiner fold the bitcasts.

      DCI.AddToWorklist(V.getNode());

    }

    SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);

    Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);

  }

  Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);

  // Make the DAGCombiner fold the bitcasts.

  DCI.AddToWorklist(Vec.getNode());

  return Vec;

}


static SDValue


PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

  EVT VT = N->getValueType(0);

  SDValue Op = N->getOperand(0);

  SDLoc dl(N);


  // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)

  if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {

    // If the valuetypes are the same, we can remove the cast entirely.

    if (Op->getOperand(0).getValueType() == VT)

      return Op->getOperand(0);

    return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));

  }


  // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce

  // more VPNOT which might get folded as else predicates.

  if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {

    SDValue X =

        DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));

    SDValue C = DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT,

                                DCI.DAG.getConstant(65535, dl, MVT::i32));

    return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);

  }


  // Only the bottom 16 bits of the source register are used.

  if (Op.getValueType() == MVT::i32) {

    APInt DemandedMask = APInt::getLowBitsSet(32, 16);

    const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();

    if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))

      return SDValue(N, 0);

  }

  return SDValue();

}


static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG,

                                             const ARMSubtarget *ST) {

  EVT VT = N->getValueType(0);

  SDValue Op = N->getOperand(0);

  SDLoc dl(N);


  // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST

  if (ST->isLittle())

    return DAG.getNode(ISD::BITCAST, dl, VT, Op);


  // VT VECTOR_REG_CAST (VT Op) -> Op

  if (Op.getValueType() == VT)

    return Op;

  // VECTOR_REG_CAST undef -> undef

  if (Op.isUndef())

    return DAG.getUNDEF(VT);


  // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)

  if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {

    // If the valuetypes are the same, we can remove the cast entirely.

    if (Op->getOperand(0).getValueType() == VT)

      return Op->getOperand(0);

    return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));

  }


  return SDValue();

}


static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG,

                                  const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasMVEIntegerOps())

    return SDValue();


  EVT VT = N->getValueType(0);

  SDValue Op0 = N->getOperand(0);

  SDValue Op1 = N->getOperand(1);

  ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);

  SDLoc dl(N);


  // vcmp X, 0, cc -> vcmpz X, cc

  if (isZeroVector(Op1))

    return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));


  unsigned SwappedCond = getSwappedCondition(Cond);

  if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {

    // vcmp 0, X, cc -> vcmpz X, reversed(cc)

    if (isZeroVector(Op0))

      return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,

                         DAG.getConstant(SwappedCond, dl, MVT::i32));

    // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)

    if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)

      return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,

                         DAG.getConstant(SwappedCond, dl, MVT::i32));

  }


  return SDValue();

}


/// PerformInsertEltCombine - Target-specific dag combine xforms for

/// ISD::INSERT_VECTOR_ELT.


static SDValue PerformInsertEltCombine(SDNode *N,

                                       TargetLowering::DAGCombinerInfo &DCI) {

  // Bitcast an i64 load inserted into a vector to f64.

  // Otherwise, the i64 value will be legalized to a pair of i32 values.

  EVT VT = N->getValueType(0);

  SDNode *Elt = N->getOperand(1).getNode();

  if (VT.getVectorElementType() != MVT::i64 ||

      !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())

    return SDValue();


  SelectionDAG &DAG = DCI.DAG;

  SDLoc dl(N);

  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,

                                 VT.getVectorNumElements());

  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));

  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));

  // Make the DAGCombiner fold the bitcasts.

  DCI.AddToWorklist(Vec.getNode());

  DCI.AddToWorklist(V.getNode());

  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,

                               Vec, V, N->getOperand(2));

  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);

}


// Convert a pair of extracts from the same base vector to a VMOVRRD. Either

// directly or bitcast to an integer if the original is a float vector.

// extract(x, n); extract(x, n+1)  ->  VMOVRRD(extract v2f64 x, n/2)

// bitcast(extract(x, n)); bitcast(extract(x, n+1))  ->  VMOVRRD(extract x, n/2)

static SDValue


PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

  EVT VT = N->getValueType(0);

  SDLoc dl(N);


  if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||

      !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))

    return SDValue();


  SDValue Ext = SDValue(N, 0);

  if (Ext.getOpcode() == ISD::BITCAST &&

      Ext.getOperand(0).getValueType() == MVT::f32)

    Ext = Ext.getOperand(0);

  if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||

      !isa<ConstantSDNode>(Ext.getOperand(1)) ||

      Ext.getConstantOperandVal(1) % 2 != 0)

    return SDValue();

  if (Ext->hasOneUse() && (Ext->user_begin()->getOpcode() == ISD::SINT_TO_FP ||

                           Ext->user_begin()->getOpcode() == ISD::UINT_TO_FP))

    return SDValue();


  SDValue Op0 = Ext.getOperand(0);

  EVT VecVT = Op0.getValueType();

  unsigned ResNo = Op0.getResNo();

  unsigned Lane = Ext.getConstantOperandVal(1);

  if (VecVT.getVectorNumElements() != 4)

    return SDValue();


  // Find another extract, of Lane + 1

  auto OtherIt = find_if(Op0->users(), [&](SDNode *V) {

    return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

           isa<ConstantSDNode>(V->getOperand(1)) &&

           V->getConstantOperandVal(1) == Lane + 1 &&

           V->getOperand(0).getResNo() == ResNo;

  });

  if (OtherIt == Op0->users().end())

    return SDValue();


  // For float extracts, we need to be converting to a i32 for both vector

  // lanes.

  SDValue OtherExt(*OtherIt, 0);

  if (OtherExt.getValueType() != MVT::i32) {

    if (!OtherExt->hasOneUse() ||

        OtherExt->user_begin()->getOpcode() != ISD::BITCAST ||

        OtherExt->user_begin()->getValueType(0) != MVT::i32)

      return SDValue();

    OtherExt = SDValue(*OtherExt->user_begin(), 0);

  }


  // Convert the type to a f64 and extract with a VMOVRRD.

  SDValue F64 = DCI.DAG.getNode(

      ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,

      DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),

      DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));

  SDValue VMOVRRD =

      DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);


  DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));

  return VMOVRRD;

}


static SDValue PerformExtractEltCombine(SDNode *N,

                                        TargetLowering::DAGCombinerInfo &DCI,

                                        const ARMSubtarget *ST) {

  SDValue Op0 = N->getOperand(0);

  EVT VT = N->getValueType(0);

  SDLoc dl(N);


  // extract (vdup x) -> x

  if (Op0->getOpcode() == ARMISD::VDUP) {

    SDValue X = Op0->getOperand(0);

    if (VT == MVT::f16 && X.getValueType() == MVT::i32)

      return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);

    if (VT == MVT::i32 && X.getValueType() == MVT::f16)

      return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);

    if (VT == MVT::f32 && X.getValueType() == MVT::i32)

      return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);


    while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)

      X = X->getOperand(0);

    if (X.getValueType() == VT)

      return X;

  }


  // extract ARM_BUILD_VECTOR -> x

  if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&

      isa<ConstantSDNode>(N->getOperand(1)) &&

      N->getConstantOperandVal(1) < Op0.getNumOperands()) {

    return Op0.getOperand(N->getConstantOperandVal(1));

  }


  // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b

  if (Op0.getValueType() == MVT::v4i32 &&

      isa<ConstantSDNode>(N->getOperand(1)) &&

      Op0.getOpcode() == ISD::BITCAST &&

      Op0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&

      Op0.getOperand(0).getValueType() == MVT::v2f64) {

    SDValue BV = Op0.getOperand(0);

    unsigned Offset = N->getConstantOperandVal(1);

    SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);

    if (MOV.getOpcode() == ARMISD::VMOVDRR)

      return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);

  }


  // extract x, n; extract x, n+1  ->  VMOVRRD x

  if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))

    return R;


  // extract (MVETrunc(x)) -> extract x

  if (Op0->getOpcode() == ARMISD::MVETRUNC) {

    unsigned Idx = N->getConstantOperandVal(1);

    unsigned Vec =

        Idx / Op0->getOperand(0).getValueType().getVectorNumElements();

    unsigned SubIdx =

        Idx % Op0->getOperand(0).getValueType().getVectorNumElements();

    return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),

                           DCI.DAG.getConstant(SubIdx, dl, MVT::i32));

  }


  return SDValue();

}


static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) {

  SDValue Op = N->getOperand(0);

  EVT VT = N->getValueType(0);


  // sext_inreg(VGETLANEu) -> VGETLANEs

  if (Op.getOpcode() == ARMISD::VGETLANEu &&

      cast<VTSDNode>(N->getOperand(1))->getVT() ==

          Op.getOperand(0).getValueType().getScalarType())

    return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),

                       Op.getOperand(1));


  return SDValue();

}


static SDValue


PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

  SDValue Vec = N->getOperand(0);

  SDValue SubVec = N->getOperand(1);

  uint64_t IdxVal = N->getConstantOperandVal(2);

  EVT VecVT = Vec.getValueType();

  EVT SubVT = SubVec.getValueType();


  // Only do this for legal fixed vector types.

  if (!VecVT.isFixedLengthVector() ||

      !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||

      !DCI.DAG.getTargetLoweringInfo().isTypeLegal(SubVT))

    return SDValue();


  // Ignore widening patterns.

  if (IdxVal == 0 && Vec.isUndef())

    return SDValue();


  // Subvector must be half the width and an "aligned" insertion.

  unsigned NumSubElts = SubVT.getVectorNumElements();

  if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||

      (IdxVal != 0 && IdxVal != NumSubElts))

    return SDValue();


  // Fold insert_subvector -> concat_vectors

  // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))

  // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)

  SDLoc DL(N);

  SDValue Lo, Hi;

  if (IdxVal == 0) {

    Lo = SubVec;

    Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,

                         DCI.DAG.getVectorIdxConstant(NumSubElts, DL));

  } else {

    Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,

                         DCI.DAG.getVectorIdxConstant(0, DL));

    Hi = SubVec;

  }

  return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);

}


// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)


static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N,

                                          SelectionDAG &DAG) {

  SDValue Trunc = N->getOperand(0);

  EVT VT = Trunc.getValueType();

  if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())

    return SDValue();


  SDLoc DL(Trunc);

  if (isVMOVNTruncMask(N->getMask(), VT, false))

    return DAG.getNode(

        ARMISD::VMOVN, DL, VT,

        DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),

        DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),

        DAG.getConstant(1, DL, MVT::i32));

  else if (isVMOVNTruncMask(N->getMask(), VT, true))

    return DAG.getNode(

        ARMISD::VMOVN, DL, VT,

        DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),

        DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),

        DAG.getConstant(1, DL, MVT::i32));

  return SDValue();

}


/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for

/// ISD::VECTOR_SHUFFLE.


static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {

  if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))

    return R;


  // The LLVM shufflevector instruction does not require the shuffle mask

  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does

  // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the

  // operands do not match the mask length, they are extended by concatenating

  // them with undef vectors.  That is probably the right thing for other

  // targets, but for NEON it is better to concatenate two double-register

  // size vector operands into a single quad-register size vector.  Do that

  // transformation here:

  //   shuffle(concat(v1, undef), concat(v2, undef)) ->

  //   shuffle(concat(v1, v2), undef)

  SDValue Op0 = N->getOperand(0);

  SDValue Op1 = N->getOperand(1);

  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||

      Op1.getOpcode() != ISD::CONCAT_VECTORS ||

      Op0.getNumOperands() != 2 ||

      Op1.getNumOperands() != 2)

    return SDValue();

  SDValue Concat0Op1 = Op0.getOperand(1);

  SDValue Concat1Op1 = Op1.getOperand(1);

  if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())

    return SDValue();

  // Skip the transformation if any of the types are illegal.

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  EVT VT = N->getValueType(0);

  if (!TLI.isTypeLegal(VT) ||

      !TLI.isTypeLegal(Concat0Op1.getValueType()) ||

      !TLI.isTypeLegal(Concat1Op1.getValueType()))

    return SDValue();


  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,

                                  Op0.getOperand(0), Op1.getOperand(0));

  // Translate the shuffle mask.

  SmallVector<int, 16> NewMask;

  unsigned NumElts = VT.getVectorNumElements();

  unsigned HalfElts = NumElts/2;

  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);

  for (unsigned n = 0; n < NumElts; ++n) {

    int MaskElt = SVN->getMaskElt(n);

    int NewElt = -1;

    if (MaskElt < (int)HalfElts)

      NewElt = MaskElt;

    else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))

      NewElt = HalfElts + MaskElt - NumElts;

    NewMask.push_back(NewElt);

  }

  return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,

                              DAG.getUNDEF(VT), NewMask);

}


/// Load/store instruction that can be merged with a base address

/// update


struct BaseUpdateTarget {

  SDNode *N;

  bool isIntrinsic;

  bool isStore;

  unsigned AddrOpIdx;

};


struct BaseUpdateUser {

  /// Instruction that updates a pointer

  SDNode *N;

  /// Pointer increment operand

  SDValue Inc;

  /// Pointer increment value if it is a constant, or 0 otherwise

  unsigned ConstInc;

};


static bool isValidBaseUpdate(SDNode *N, SDNode *User) {

  // Check that the add is independent of the load/store.

  // Otherwise, folding it would create a cycle. Search through Addr

  // as well, since the User may not be a direct user of Addr and

  // only share a base pointer.

  SmallPtrSet<const SDNode *, 32> Visited;

  SmallVector<const SDNode *, 16> Worklist;

  Worklist.push_back(N);

  Worklist.push_back(User);

  const unsigned MaxSteps = 1024;

  if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||

      SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))

    return false;

  return true;

}


static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,

                                 struct BaseUpdateUser &User,

                                 bool SimpleConstIncOnly,

                                 TargetLowering::DAGCombinerInfo &DCI) {

  SelectionDAG &DAG = DCI.DAG;

  SDNode *N = Target.N;

  MemSDNode *MemN = cast<MemSDNode>(N);

  SDLoc dl(N);


  // Find the new opcode for the updating load/store.

  bool isLoadOp = true;

  bool isLaneOp = false;

  // Workaround for vst1x and vld1x intrinsics which do not have alignment

  // as an operand.

  bool hasAlignment = true;

  unsigned NewOpc = 0;

  unsigned NumVecs = 0;

  if (Target.isIntrinsic) {

    unsigned IntNo = N->getConstantOperandVal(1);

    switch (IntNo) {

    default:

      llvm_unreachable("unexpected intrinsic for Neon base update");

    case Intrinsic::arm_neon_vld1:

      NewOpc = ARMISD::VLD1_UPD;

      NumVecs = 1;

      break;

    case Intrinsic::arm_neon_vld2:

      NewOpc = ARMISD::VLD2_UPD;

      NumVecs = 2;

      break;

    case Intrinsic::arm_neon_vld3:

      NewOpc = ARMISD::VLD3_UPD;

      NumVecs = 3;

      break;

    case Intrinsic::arm_neon_vld4:

      NewOpc = ARMISD::VLD4_UPD;

      NumVecs = 4;

      break;

    case Intrinsic::arm_neon_vld1x2:

      NewOpc = ARMISD::VLD1x2_UPD;

      NumVecs = 2;

      hasAlignment = false;

      break;

    case Intrinsic::arm_neon_vld1x3:

      NewOpc = ARMISD::VLD1x3_UPD;

      NumVecs = 3;

      hasAlignment = false;

      break;

    case Intrinsic::arm_neon_vld1x4:

      NewOpc = ARMISD::VLD1x4_UPD;

      NumVecs = 4;

      hasAlignment = false;

      break;

    case Intrinsic::arm_neon_vld2dup:

      NewOpc = ARMISD::VLD2DUP_UPD;

      NumVecs = 2;

      break;

    case Intrinsic::arm_neon_vld3dup:

      NewOpc = ARMISD::VLD3DUP_UPD;

      NumVecs = 3;

      break;

    case Intrinsic::arm_neon_vld4dup:

      NewOpc = ARMISD::VLD4DUP_UPD;

      NumVecs = 4;

      break;

    case Intrinsic::arm_neon_vld2lane:

      NewOpc = ARMISD::VLD2LN_UPD;

      NumVecs = 2;

      isLaneOp = true;

      break;

    case Intrinsic::arm_neon_vld3lane:

      NewOpc = ARMISD::VLD3LN_UPD;

      NumVecs = 3;

      isLaneOp = true;

      break;

    case Intrinsic::arm_neon_vld4lane:

      NewOpc = ARMISD::VLD4LN_UPD;

      NumVecs = 4;

      isLaneOp = true;

      break;

    case Intrinsic::arm_neon_vst1:

      NewOpc = ARMISD::VST1_UPD;

      NumVecs = 1;

      isLoadOp = false;

      break;

    case Intrinsic::arm_neon_vst2:

      NewOpc = ARMISD::VST2_UPD;

      NumVecs = 2;

      isLoadOp = false;

      break;

    case Intrinsic::arm_neon_vst3:

      NewOpc = ARMISD::VST3_UPD;

      NumVecs = 3;

      isLoadOp = false;

      break;

    case Intrinsic::arm_neon_vst4:

      NewOpc = ARMISD::VST4_UPD;

      NumVecs = 4;

      isLoadOp = false;

      break;

    case Intrinsic::arm_neon_vst2lane:

      NewOpc = ARMISD::VST2LN_UPD;

      NumVecs = 2;

      isLoadOp = false;

      isLaneOp = true;

      break;

    case Intrinsic::arm_neon_vst3lane:

      NewOpc = ARMISD::VST3LN_UPD;

      NumVecs = 3;

      isLoadOp = false;

      isLaneOp = true;

      break;

    case Intrinsic::arm_neon_vst4lane:

      NewOpc = ARMISD::VST4LN_UPD;

      NumVecs = 4;

      isLoadOp = false;

      isLaneOp = true;

      break;

    case Intrinsic::arm_neon_vst1x2:

      NewOpc = ARMISD::VST1x2_UPD;

      NumVecs = 2;

      isLoadOp = false;

      hasAlignment = false;

      break;

    case Intrinsic::arm_neon_vst1x3:

      NewOpc = ARMISD::VST1x3_UPD;

      NumVecs = 3;

      isLoadOp = false;

      hasAlignment = false;

      break;

    case Intrinsic::arm_neon_vst1x4:

      NewOpc = ARMISD::VST1x4_UPD;

      NumVecs = 4;

      isLoadOp = false;

      hasAlignment = false;

      break;

    }

  } else {

    isLaneOp = true;

    switch (N->getOpcode()) {

    default:

      llvm_unreachable("unexpected opcode for Neon base update");

    case ARMISD::VLD1DUP:

      NewOpc = ARMISD::VLD1DUP_UPD;

      NumVecs = 1;

      break;

    case ARMISD::VLD2DUP:

      NewOpc = ARMISD::VLD2DUP_UPD;

      NumVecs = 2;

      break;

    case ARMISD::VLD3DUP:

      NewOpc = ARMISD::VLD3DUP_UPD;

      NumVecs = 3;

      break;

    case ARMISD::VLD4DUP:

      NewOpc = ARMISD::VLD4DUP_UPD;

      NumVecs = 4;

      break;

    case ISD::LOAD:

      NewOpc = ARMISD::VLD1_UPD;

      NumVecs = 1;

      isLaneOp = false;

      break;

    case ISD::STORE:

      NewOpc = ARMISD::VST1_UPD;

      NumVecs = 1;

      isLaneOp = false;

      isLoadOp = false;

      break;

    }

  }


  // Find the size of memory referenced by the load/store.

  EVT VecTy;

  if (isLoadOp) {

    VecTy = N->getValueType(0);

  } else if (Target.isIntrinsic) {

    VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();

  } else {

    assert(Target.isStore &&

           "Node has to be a load, a store, or an intrinsic!");

    VecTy = N->getOperand(1).getValueType();

  }


  bool isVLDDUPOp =

      NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||

      NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;


  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;

  if (isLaneOp || isVLDDUPOp)

    NumBytes /= VecTy.getVectorNumElements();


  if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {

    // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two

    // separate instructions that make it harder to use a non-constant update.

    return false;

  }


  if (SimpleConstIncOnly && User.ConstInc != NumBytes)

    return false;


  if (!isValidBaseUpdate(N, User.N))

    return false;


  // OK, we found an ADD we can fold into the base update.

  // Now, create a _UPD node, taking care of not breaking alignment.


  EVT AlignedVecTy = VecTy;

  Align Alignment = MemN->getAlign();


  // If this is a less-than-standard-aligned load/store, change the type to

  // match the standard alignment.

  // The alignment is overlooked when selecting _UPD variants; and it's

  // easier to introduce bitcasts here than fix that.

  // There are 3 ways to get to this base-update combine:

  // - intrinsics: they are assumed to be properly aligned (to the standard

  //   alignment of the memory type), so we don't need to do anything.

  // - ARMISD::VLDx nodes: they are only generated from the aforementioned

  //   intrinsics, so, likewise, there's nothing to do.

  // - generic load/store instructions: the alignment is specified as an

  //   explicit operand, rather than implicitly as the standard alignment

  //   of the memory type (like the intrisics).  We need to change the

  //   memory type to match the explicit alignment.  That way, we don't

  //   generate non-standard-aligned ARMISD::VLDx nodes.

  if (isa<LSBaseSDNode>(N)) {

    if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {

      MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);

      assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");

      assert(!isLaneOp && "Unexpected generic load/store lane.");

      unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);

      AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);

    }

    // Don't set an explicit alignment on regular load/stores that we want

    // to transform to VLD/VST 1_UPD nodes.

    // This matches the behavior of regular load/stores, which only get an

    // explicit alignment if the MMO alignment is larger than the standard

    // alignment of the memory type.

    // Intrinsics, however, always get an explicit alignment, set to the

    // alignment of the MMO.

    Alignment = Align(1);

  }


  // Create the new updating load/store node.

  // First, create an SDVTList for the new updating node's results.

  EVT Tys[6];

  unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);

  unsigned n;

  for (n = 0; n < NumResultVecs; ++n)

    Tys[n] = AlignedVecTy;

  Tys[n++] = MVT::i32;

  Tys[n] = MVT::Other;

  SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));


  // Then, gather the new node's operands.

  SmallVector<SDValue, 8> Ops;

  Ops.push_back(N->getOperand(0)); // incoming chain

  Ops.push_back(N->getOperand(Target.AddrOpIdx));

  Ops.push_back(User.Inc);


  if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {

    // Try to match the intrinsic's signature

    Ops.push_back(StN->getValue());

  } else {

    // Loads (and of course intrinsics) match the intrinsics' signature,

    // so just add all but the alignment operand.

    unsigned LastOperand =

        hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();

    for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)

      Ops.push_back(N->getOperand(i));

  }


  // For all node types, the alignment operand is always the last one.

  Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));


  // If this is a non-standard-aligned STORE, the penultimate operand is the

  // stored value.  Bitcast it to the aligned type.

  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {

    SDValue &StVal = Ops[Ops.size() - 2];

    StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);

  }


  EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;

  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,

                                         MemN->getMemOperand());


  // Update the uses.

  SmallVector<SDValue, 5> NewResults;

  for (unsigned i = 0; i < NumResultVecs; ++i)

    NewResults.push_back(SDValue(UpdN.getNode(), i));


  // If this is an non-standard-aligned LOAD, the first result is the loaded

  // value.  Bitcast it to the expected result type.

  if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {

    SDValue &LdVal = NewResults[0];

    LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);

  }


  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain

  DCI.CombineTo(N, NewResults);

  DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));


  return true;

}


// If (opcode ptr inc) is and ADD-like instruction, return the

// increment value. Otherwise return 0.


static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,

                                         SDValue Inc, const SelectionDAG &DAG) {

  ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());

  if (!CInc)

    return 0;


  switch (Opcode) {

  case ARMISD::VLD1_UPD:

  case ISD::ADD:

    return CInc->getZExtValue();

  case ISD::OR: {

    if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {

      // (OR ptr inc) is the same as (ADD ptr inc)

      return CInc->getZExtValue();

    }

    return 0;

  }

  default:

    return 0;

  }

}


static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {

  switch (N->getOpcode()) {

  case ISD::ADD:

  case ISD::OR: {

    if (isa<ConstantSDNode>(N->getOperand(1))) {

      *Ptr = N->getOperand(0);

      *CInc = N->getOperand(1);

      return true;

    }

    return false;

  }

  case ARMISD::VLD1_UPD: {

    if (isa<ConstantSDNode>(N->getOperand(2))) {

      *Ptr = N->getOperand(1);

      *CInc = N->getOperand(2);

      return true;

    }

    return false;

  }

  default:

    return false;

  }

}


/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,

/// NEON load/store intrinsics, and generic vector load/stores, to merge

/// base address updates.

/// For generic load/stores, the memory type is assumed to be a vector.

/// The caller is assumed to have checked legality.


static SDValue CombineBaseUpdate(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI) {

  const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||

                            N->getOpcode() == ISD::INTRINSIC_W_CHAIN);

  const bool isStore = N->getOpcode() == ISD::STORE;

  const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);

  BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};


  // Limit the number of possible base-updates we look at to prevent degenerate

  // cases.

  unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;


  SDValue Addr = N->getOperand(AddrOpIdx);


  SmallVector<BaseUpdateUser, 8> BaseUpdates;


  // Search for a use of the address operand that is an increment.

  for (SDUse &Use : Addr->uses()) {

    SDNode *User = Use.getUser();

    if (Use.getResNo() != Addr.getResNo() || User->getNumOperands() != 2)

      continue;


    SDValue Inc = User->getOperand(Use.getOperandNo() == 1 ? 0 : 1);

    unsigned ConstInc =

        getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);


    if (ConstInc || User->getOpcode() == ISD::ADD) {

      BaseUpdates.push_back({User, Inc, ConstInc});

      if (BaseUpdates.size() >= MaxBaseUpdates)

        break;

    }

  }


  // If the address is a constant pointer increment itself, find

  // another constant increment that has the same base operand

  SDValue Base;

  SDValue CInc;

  if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {

    unsigned Offset =

        getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);

    for (SDUse &Use : Base->uses()) {


      SDNode *User = Use.getUser();

      if (Use.getResNo() != Base.getResNo() || User == Addr.getNode() ||

          User->getNumOperands() != 2)

        continue;


      SDValue UserInc = User->getOperand(Use.getOperandNo() == 0 ? 1 : 0);

      unsigned UserOffset =

          getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);


      if (!UserOffset || UserOffset <= Offset)

        continue;


      unsigned NewConstInc = UserOffset - Offset;

      SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);

      BaseUpdates.push_back({User, NewInc, NewConstInc});

      if (BaseUpdates.size() >= MaxBaseUpdates)

        break;

    }

  }


  // Try to fold the load/store with an update that matches memory

  // access size. This should work well for sequential loads.

  unsigned NumValidUpd = BaseUpdates.size();

  for (unsigned I = 0; I < NumValidUpd; I++) {

    BaseUpdateUser &User = BaseUpdates[I];

    if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))

      return SDValue();

  }


  // Try to fold with other users. Non-constant updates are considered

  // first, and constant updates are sorted to not break a sequence of

  // strided accesses (if there is any).

  llvm::stable_sort(BaseUpdates,

                    [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {

                      return LHS.ConstInc < RHS.ConstInc;

                    });

  for (BaseUpdateUser &User : BaseUpdates) {

    if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))

      return SDValue();

  }

  return SDValue();

}


static SDValue PerformVLDCombine(SDNode *N,

                                 TargetLowering::DAGCombinerInfo &DCI) {

  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())

    return SDValue();


  return CombineBaseUpdate(N, DCI);

}


static SDValue PerformMVEVLDCombine(SDNode *N,

                                    TargetLowering::DAGCombinerInfo &DCI) {

  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())

    return SDValue();


  SelectionDAG &DAG = DCI.DAG;

  SDValue Addr = N->getOperand(2);

  MemSDNode *MemN = cast<MemSDNode>(N);

  SDLoc dl(N);


  // For the stores, where there are multiple intrinsics we only actually want

  // to post-inc the last of the them.

  unsigned IntNo = N->getConstantOperandVal(1);

  if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)

    return SDValue();

  if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)

    return SDValue();


  // Search for a use of the address operand that is an increment.

  for (SDUse &Use : Addr->uses()) {

    SDNode *User = Use.getUser();

    if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())

      continue;


    // Check that the add is independent of the load/store.  Otherwise, folding

    // it would create a cycle. We can avoid searching through Addr as it's a

    // predecessor to both.

    SmallPtrSet<const SDNode *, 32> Visited;

    SmallVector<const SDNode *, 16> Worklist;

    Visited.insert(Addr.getNode());

    Worklist.push_back(N);

    Worklist.push_back(User);

    const unsigned MaxSteps = 1024;

    if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||

        SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))

      continue;


    // Find the new opcode for the updating load/store.

    bool isLoadOp = true;

    unsigned NewOpc = 0;

    unsigned NumVecs = 0;

    switch (IntNo) {

    default:

      llvm_unreachable("unexpected intrinsic for MVE VLDn combine");

    case Intrinsic::arm_mve_vld2q:

      NewOpc = ARMISD::VLD2_UPD;

      NumVecs = 2;

      break;

    case Intrinsic::arm_mve_vld4q:

      NewOpc = ARMISD::VLD4_UPD;

      NumVecs = 4;

      break;

    case Intrinsic::arm_mve_vst2q:

      NewOpc = ARMISD::VST2_UPD;

      NumVecs = 2;

      isLoadOp = false;

      break;

    case Intrinsic::arm_mve_vst4q:

      NewOpc = ARMISD::VST4_UPD;

      NumVecs = 4;

      isLoadOp = false;

      break;

    }


    // Find the size of memory referenced by the load/store.

    EVT VecTy;

    if (isLoadOp) {

      VecTy = N->getValueType(0);

    } else {

      VecTy = N->getOperand(3).getValueType();

    }


    unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;


    // If the increment is a constant, it must match the memory ref size.

    SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);

    ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());

    if (!CInc || CInc->getZExtValue() != NumBytes)

      continue;


    // Create the new updating load/store node.

    // First, create an SDVTList for the new updating node's results.

    EVT Tys[6];

    unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);

    unsigned n;

    for (n = 0; n < NumResultVecs; ++n)

      Tys[n] = VecTy;

    Tys[n++] = MVT::i32;

    Tys[n] = MVT::Other;

    SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));


    // Then, gather the new node's operands.

    SmallVector<SDValue, 8> Ops;

    Ops.push_back(N->getOperand(0)); // incoming chain

    Ops.push_back(N->getOperand(2)); // ptr

    Ops.push_back(Inc);


    for (unsigned i = 3; i < N->getNumOperands(); ++i)

      Ops.push_back(N->getOperand(i));


    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,

                                           MemN->getMemOperand());


    // Update the uses.

    SmallVector<SDValue, 5> NewResults;

    for (unsigned i = 0; i < NumResultVecs; ++i)

      NewResults.push_back(SDValue(UpdN.getNode(), i));


    NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain

    DCI.CombineTo(N, NewResults);

    DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));


    break;

  }


  return SDValue();

}


/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a

/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic

/// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and

/// return true.


static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

  SelectionDAG &DAG = DCI.DAG;

  EVT VT = N->getValueType(0);

  // vldN-dup instructions only support 64-bit vectors for N > 1.

  if (!VT.is64BitVector())

    return false;


  // Check if the VDUPLANE operand is a vldN-dup intrinsic.

  SDNode *VLD = N->getOperand(0).getNode();

  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)

    return false;

  unsigned NumVecs = 0;

  unsigned NewOpc = 0;

  unsigned IntNo = VLD->getConstantOperandVal(1);

  if (IntNo == Intrinsic::arm_neon_vld2lane) {

    NumVecs = 2;

    NewOpc = ARMISD::VLD2DUP;

  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {

    NumVecs = 3;

    NewOpc = ARMISD::VLD3DUP;

  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {

    NumVecs = 4;

    NewOpc = ARMISD::VLD4DUP;

  } else {

    return false;

  }


  // First check that all the vldN-lane uses are VDUPLANEs and that the lane

  // numbers match the load.

  unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);

  for (SDUse &Use : VLD->uses()) {

    // Ignore uses of the chain result.

    if (Use.getResNo() == NumVecs)

      continue;

    SDNode *User = Use.getUser();

    if (User->getOpcode() != ARMISD::VDUPLANE ||

        VLDLaneNo != User->getConstantOperandVal(1))

      return false;

  }


  // Create the vldN-dup node.

  EVT Tys[5];

  unsigned n;

  for (n = 0; n < NumVecs; ++n)

    Tys[n] = VT;

  Tys[n] = MVT::Other;

  SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));

  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };

  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);

  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,

                                           Ops, VLDMemInt->getMemoryVT(),

                                           VLDMemInt->getMemOperand());


  // Update the uses.

  for (SDUse &Use : VLD->uses()) {

    unsigned ResNo = Use.getResNo();

    // Ignore uses of the chain result.

    if (ResNo == NumVecs)

      continue;

    DCI.CombineTo(Use.getUser(), SDValue(VLDDup.getNode(), ResNo));

  }


  // Now the vldN-lane intrinsic is dead except for its chain result.

  // Update uses of the chain.

  std::vector<SDValue> VLDDupResults;

  for (unsigned n = 0; n < NumVecs; ++n)

    VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));

  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));

  DCI.CombineTo(VLD, VLDDupResults);


  return true;

}


/// PerformVDUPLANECombine - Target-specific dag combine xforms for

/// ARMISD::VDUPLANE.


static SDValue PerformVDUPLANECombine(SDNode *N,

                                      TargetLowering::DAGCombinerInfo &DCI,

                                      const ARMSubtarget *Subtarget) {

  SDValue Op = N->getOperand(0);

  EVT VT = N->getValueType(0);


  // On MVE, we just convert the VDUPLANE to a VDUP with an extract.

  if (Subtarget->hasMVEIntegerOps()) {

    EVT ExtractVT = VT.getVectorElementType();

    // We need to ensure we are creating a legal type.

    if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))

      ExtractVT = MVT::i32;

    SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,

                              N->getOperand(0), N->getOperand(1));

    return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);

  }


  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses

  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.

  if (CombineVLDDUP(N, DCI))

    return SDValue(N, 0);


  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is

  // redundant.  Ignore bit_converts for now; element sizes are checked below.

  while (Op.getOpcode() == ISD::BITCAST)

    Op = Op.getOperand(0);

  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)

    return SDValue();


  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.

  unsigned EltSize = Op.getScalarValueSizeInBits();

  // The canonical VMOV for a zero vector uses a 32-bit element size.

  unsigned Imm = Op.getConstantOperandVal(0);

  unsigned EltBits;

  if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)

    EltSize = 8;

  if (EltSize > VT.getScalarSizeInBits())

    return SDValue();


  return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);

}


/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.


static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG,

                                  const ARMSubtarget *Subtarget) {

  SDValue Op = N->getOperand(0);

  SDLoc dl(N);


  if (Subtarget->hasMVEIntegerOps()) {

    // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will

    // need to come from a GPR.

    if (Op.getValueType() == MVT::f32)

      return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),

                         DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));

    else if (Op.getValueType() == MVT::f16)

      return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),

                         DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));

  }


  if (!Subtarget->hasNEON())

    return SDValue();


  // Match VDUP(LOAD) -> VLD1DUP.

  // We match this pattern here rather than waiting for isel because the

  // transform is only legal for unindexed loads.

  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());

  if (LD && Op.hasOneUse() && LD->isUnindexed() &&

      LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {

    SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),

                     DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};

    SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);

    SDValue VLDDup =

        DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,

                                LD->getMemoryVT(), LD->getMemOperand());

    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));

    return VLDDup;

  }


  return SDValue();

}


static SDValue PerformLOADCombine(SDNode *N,

                                  TargetLowering::DAGCombinerInfo &DCI,

                                  const ARMSubtarget *Subtarget) {

  EVT VT = N->getValueType(0);


  // If this is a legal vector load, try to combine it into a VLD1_UPD.

  if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&

      DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))

    return CombineBaseUpdate(N, DCI);


  return SDValue();

}


// Optimize trunc store (of multiple scalars) to shuffle and store.  First,

// pack all of the elements in one place.  Next, store to memory in fewer

// chunks.


static SDValue PerformTruncatingStoreCombine(StoreSDNode *St,

                                             SelectionDAG &DAG) {

  SDValue StVal = St->getValue();

  EVT VT = StVal.getValueType();

  if (!St->isTruncatingStore() || !VT.isVector())

    return SDValue();

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  EVT StVT = St->getMemoryVT();

  unsigned NumElems = VT.getVectorNumElements();

  assert(StVT != VT && "Cannot truncate to the same type");

  unsigned FromEltSz = VT.getScalarSizeInBits();

  unsigned ToEltSz = StVT.getScalarSizeInBits();


  // From, To sizes and ElemCount must be pow of two

  if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))

    return SDValue();


  // We are going to use the original vector elt for storing.

  // Accumulated smaller vector elements must be a multiple of the store size.

  if (0 != (NumElems * FromEltSz) % ToEltSz)

    return SDValue();


  unsigned SizeRatio = FromEltSz / ToEltSz;

  assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());


  // Create a type on which we perform the shuffle.

  EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),

                                   NumElems * SizeRatio);

  assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());


  SDLoc DL(St);

  SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);

  SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);

  for (unsigned i = 0; i < NumElems; ++i)

    ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1

                                                      : i * SizeRatio;


  // Can't shuffle using an illegal type.

  if (!TLI.isTypeLegal(WideVecVT))

    return SDValue();


  SDValue Shuff = DAG.getVectorShuffle(

      WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);

  // At this point all of the data is stored at the bottom of the

  // register. We now need to save it to mem.


  // Find the largest store unit

  MVT StoreType = MVT::i8;

  for (MVT Tp : MVT::integer_valuetypes()) {

    if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)

      StoreType = Tp;

  }

  // Didn't find a legal store type.

  if (!TLI.isTypeLegal(StoreType))

    return SDValue();


  // Bitcast the original vector into a vector of store-size units

  EVT StoreVecVT =

      EVT::getVectorVT(*DAG.getContext(), StoreType,

                       VT.getSizeInBits() / EVT(StoreType).getSizeInBits());

  assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());

  SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);

  SmallVector<SDValue, 8> Chains;

  SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,

                                      TLI.getPointerTy(DAG.getDataLayout()));

  SDValue BasePtr = St->getBasePtr();


  // Perform one or more big stores into memory.

  unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();

  for (unsigned I = 0; I < E; I++) {

    SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,

                                 ShuffWide, DAG.getIntPtrConstant(I, DL));

    SDValue Ch =

        DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),

                     St->getAlign(), St->getMemOperand()->getFlags());

    BasePtr =

        DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);

    Chains.push_back(Ch);

  }

  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);

}


// Try taking a single vector store from an fpround (which would otherwise turn

// into an expensive buildvector) and splitting it into a series of narrowing

// stores.


static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St,

                                                 SelectionDAG &DAG) {

  if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())

    return SDValue();

  SDValue Trunc = St->getValue();

  if (Trunc->getOpcode() != ISD::FP_ROUND)

    return SDValue();

  EVT FromVT = Trunc->getOperand(0).getValueType();

  EVT ToVT = Trunc.getValueType();

  if (!ToVT.isVector())

    return SDValue();

  assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements());

  EVT ToEltVT = ToVT.getVectorElementType();

  EVT FromEltVT = FromVT.getVectorElementType();


  if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)

    return SDValue();


  unsigned NumElements = 4;

  if (FromVT.getVectorNumElements() % NumElements != 0)

    return SDValue();


  // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so

  // use the VMOVN over splitting the store. We are looking for patterns of:

  // !rev: 0 N 1 N+1 2 N+2 ...

  //  rev: N 0 N+1 1 N+2 2 ...

  // The shuffle may either be a single source (in which case N = NumElts/2) or

  // two inputs extended with concat to the same size (in which case N =

  // NumElts).

  auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {

    ArrayRef<int> M = SVN->getMask();

    unsigned NumElts = ToVT.getVectorNumElements();

    if (SVN->getOperand(1).isUndef())

      NumElts /= 2;


    unsigned Off0 = Rev ? NumElts : 0;

    unsigned Off1 = Rev ? 0 : NumElts;


    for (unsigned I = 0; I < NumElts; I += 2) {

      if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))

        return false;

      if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))

        return false;

    }


    return true;

  };


  if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))

    if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))

      return SDValue();


  LLVMContext &C = *DAG.getContext();

  SDLoc DL(St);

  // Details about the old store

  SDValue Ch = St->getChain();

  SDValue BasePtr = St->getBasePtr();

  Align Alignment = St->getBaseAlign();

  MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();

  AAMDNodes AAInfo = St->getAAInfo();


  // We split the store into slices of NumElements. fp16 trunc stores are vcvt

  // and then stored as truncating integer stores.

  EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);

  EVT NewToVT = EVT::getVectorVT(

      C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);


  SmallVector<SDValue, 4> Stores;

  for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {

    unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;

    SDValue NewPtr =

        DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));


    SDValue Extract =

        DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),

                    DAG.getConstant(i * NumElements, DL, MVT::i32));


    SDValue FPTrunc =

        DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),

                    Extract, DAG.getConstant(0, DL, MVT::i32));

    Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);


    SDValue Store = DAG.getTruncStore(

        Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),

        NewToVT, Alignment, MMOFlags, AAInfo);

    Stores.push_back(Store);

  }

  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);

}


// Try taking a single vector store from an MVETRUNC (which would otherwise turn

// into an expensive buildvector) and splitting it into a series of narrowing

// stores.


static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St,

                                                         SelectionDAG &DAG) {

  if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())

    return SDValue();

  SDValue Trunc = St->getValue();

  if (Trunc->getOpcode() != ARMISD::MVETRUNC)

    return SDValue();

  EVT FromVT = Trunc->getOperand(0).getValueType();

  EVT ToVT = Trunc.getValueType();


  LLVMContext &C = *DAG.getContext();

  SDLoc DL(St);

  // Details about the old store

  SDValue Ch = St->getChain();

  SDValue BasePtr = St->getBasePtr();

  Align Alignment = St->getBaseAlign();

  MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();

  AAMDNodes AAInfo = St->getAAInfo();


  EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),

                                 FromVT.getVectorNumElements());


  SmallVector<SDValue, 4> Stores;

  for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {

    unsigned NewOffset =

        i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;

    SDValue NewPtr =

        DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));


    SDValue Extract = Trunc.getOperand(i);

    SDValue Store = DAG.getTruncStore(

        Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),

        NewToVT, Alignment, MMOFlags, AAInfo);

    Stores.push_back(Store);

  }

  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);

}


// Given a floating point store from an extracted vector, with an integer

// VGETLANE that already exists, store the existing VGETLANEu directly. This can

// help reduce fp register pressure, doesn't require the fp extract and allows

// use of more integer post-inc stores not available with vstr.


static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG) {

  if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())

    return SDValue();

  SDValue Extract = St->getValue();

  EVT VT = Extract.getValueType();

  // For now only uses f16. This may be useful for f32 too, but that will

  // be bitcast(extract), not the VGETLANEu we currently check here.

  if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)

    return SDValue();


  SDNode *GetLane =

      DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),

                          {Extract.getOperand(0), Extract.getOperand(1)});

  if (!GetLane)

    return SDValue();


  LLVMContext &C = *DAG.getContext();

  SDLoc DL(St);

  // Create a new integer store to replace the existing floating point version.

  SDValue Ch = St->getChain();

  SDValue BasePtr = St->getBasePtr();

  Align Alignment = St->getBaseAlign();

  MachineMemOperand::Flags MMOFlags = St->getMemOperand()->getFlags();

  AAMDNodes AAInfo = St->getAAInfo();

  EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());

  SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,

                                    St->getPointerInfo(), NewToVT, Alignment,

                                    MMOFlags, AAInfo);


  return Store;

}


/// PerformSTORECombine - Target-specific dag combine xforms for

/// ISD::STORE.


static SDValue PerformSTORECombine(SDNode *N,

                                   TargetLowering::DAGCombinerInfo &DCI,

                                   const ARMSubtarget *Subtarget) {

  StoreSDNode *St = cast<StoreSDNode>(N);

  if (St->isVolatile())

    return SDValue();

  SDValue StVal = St->getValue();

  EVT VT = StVal.getValueType();


  if (Subtarget->hasNEON())

    if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))

      return Store;


  if (Subtarget->hasMVEFloatOps())

    if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))

      return NewToken;


  if (Subtarget->hasMVEIntegerOps()) {

    if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))

      return NewChain;

    if (SDValue NewToken =

            PerformSplittingMVETruncToNarrowingStores(St, DCI.DAG))

      return NewToken;

  }


  if (!ISD::isNormalStore(St))

    return SDValue();


  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and

  // ARM stores of arguments in the same cache line.

  if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&

      StVal.getNode()->hasOneUse()) {

    SelectionDAG  &DAG = DCI.DAG;

    bool isBigEndian = DAG.getDataLayout().isBigEndian();

    SDLoc DL(St);

    SDValue BasePtr = St->getBasePtr();

    SDValue NewST1 = DAG.getStore(

        St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),

        BasePtr, St->getPointerInfo(), St->getBaseAlign(),

        St->getMemOperand()->getFlags());


    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,

                                    DAG.getConstant(4, DL, MVT::i32));

    return DAG.getStore(NewST1.getValue(0), DL,

                        StVal.getNode()->getOperand(isBigEndian ? 0 : 1),

                        OffsetPtr, St->getPointerInfo().getWithOffset(4),

                        St->getBaseAlign(), St->getMemOperand()->getFlags());

  }


  if (StVal.getValueType() == MVT::i64 &&

      StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {


    // Bitcast an i64 store extracted from a vector to f64.

    // Otherwise, the i64 value will be legalized to a pair of i32 values.

    SelectionDAG &DAG = DCI.DAG;

    SDLoc dl(StVal);

    SDValue IntVec = StVal.getOperand(0);

    EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,

                                   IntVec.getValueType().getVectorNumElements());

    SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);

    SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,

                                 Vec, StVal.getOperand(1));

    dl = SDLoc(N);

    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);

    // Make the DAGCombiner fold the bitcasts.

    DCI.AddToWorklist(Vec.getNode());

    DCI.AddToWorklist(ExtElt.getNode());

    DCI.AddToWorklist(V.getNode());

    return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),

                        St->getPointerInfo(), St->getAlign(),

                        St->getMemOperand()->getFlags(), St->getAAInfo());

  }


  // If this is a legal vector store, try to combine it into a VST1_UPD.

  if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&

      DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))

    return CombineBaseUpdate(N, DCI);


  return SDValue();

}


/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)

/// can replace combinations of VMUL and VCVT (floating-point to integer)

/// when the VMUL has a constant operand that is a power of 2.

///

/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):

///  vmul.f32        d16, d17, d16

///  vcvt.s32.f32    d16, d16

/// becomes:

///  vcvt.s32.f32    d16, d16, #3


static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,

                                  const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasNEON())

    return SDValue();


  SDValue Op = N->getOperand(0);

  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||

      Op.getOpcode() != ISD::FMUL)

    return SDValue();


  SDValue ConstVec = Op->getOperand(1);

  if (!isa<BuildVectorSDNode>(ConstVec))

    return SDValue();


  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();

  uint32_t FloatBits = FloatTy.getSizeInBits();

  MVT IntTy = N->getSimpleValueType(0).getVectorElementType();

  uint32_t IntBits = IntTy.getSizeInBits();

  unsigned NumLanes = Op.getValueType().getVectorNumElements();

  if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {

    // These instructions only exist converting from f32 to i32. We can handle

    // smaller integers by generating an extra truncate, but larger ones would

    // be lossy. We also can't handle anything other than 2 or 4 lanes, since

    // these intructions only support v2i32/v4i32 types.

    return SDValue();

  }


  BitVector UndefElements;

  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);

  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);

  if (C == -1 || C == 0 || C > 32)

    return SDValue();


  SDLoc dl(N);

  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;

  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :

    Intrinsic::arm_neon_vcvtfp2fxu;

  SDValue FixConv = DAG.getNode(

      ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,

      DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),

      DAG.getConstant(C, dl, MVT::i32));


  if (IntBits < FloatBits)

    FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);


  return FixConv;

}


static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,

                                         const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasMVEFloatOps())

    return SDValue();


  // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)

  // The second form can be more easily turned into a predicated vadd, and

  // possibly combined into a fma to become a predicated vfma.

  SDValue Op0 = N->getOperand(0);

  SDValue Op1 = N->getOperand(1);

  EVT VT = N->getValueType(0);

  SDLoc DL(N);


  // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,

  // which these VMOV's represent.

  auto isIdentitySplat = [&](SDValue Op, bool NSZ) {

    if (Op.getOpcode() != ISD::BITCAST ||

        Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)

      return false;

    uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);

    if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))

      return true;

    if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))

      return true;

    return false;

  };


  if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)

    std::swap(Op0, Op1);


  if (Op1.getOpcode() != ISD::VSELECT)

    return SDValue();


  SDNodeFlags FaddFlags = N->getFlags();

  bool NSZ = FaddFlags.hasNoSignedZeros();

  if (!isIdentitySplat(Op1.getOperand(2), NSZ))

    return SDValue();


  SDValue FAdd =

      DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);

  return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);

}


static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG) {

  SDValue LHS = N->getOperand(0);

  SDValue RHS = N->getOperand(1);

  EVT VT = N->getValueType(0);

  SDLoc DL(N);


  if (!N->getFlags().hasAllowReassociation())

    return SDValue();


  // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)

  auto ReassocComplex = [&](SDValue A, SDValue B) {

    if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)

      return SDValue();

    unsigned Opc = A.getConstantOperandVal(0);

    if (Opc != Intrinsic::arm_mve_vcmlaq)

      return SDValue();

    SDValue VCMLA = DAG.getNode(

        ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),

        DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),

        A.getOperand(3), A.getOperand(4));

    VCMLA->setFlags(A->getFlags());

    return VCMLA;

  };

  if (SDValue R = ReassocComplex(LHS, RHS))

    return R;

  if (SDValue R = ReassocComplex(RHS, LHS))

    return R;


  return SDValue();

}


static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,

                                  const ARMSubtarget *Subtarget) {

  if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))

    return S;

  if (SDValue S = PerformFADDVCMLACombine(N, DAG))

    return S;

  return SDValue();

}


/// PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD)

/// can replace combinations of VCVT (integer to floating-point) and VMUL

/// when the VMUL has a constant operand that is a power of 2.

///

/// Example (assume d17 = <float 0.125, float 0.125>):

///  vcvt.f32.s32    d16, d16

///  vmul.f32        d16, d16, d17

/// becomes:

///  vcvt.f32.s32    d16, d16, #3


static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG,

                                      const ARMSubtarget *Subtarget) {

  if (!Subtarget->hasNEON())

    return SDValue();


  SDValue Op = N->getOperand(0);

  unsigned OpOpcode = Op.getNode()->getOpcode();

  if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||

      (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))

    return SDValue();


  SDValue ConstVec = N->getOperand(1);

  if (!isa<BuildVectorSDNode>(ConstVec))

    return SDValue();


  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();

  uint32_t FloatBits = FloatTy.getSizeInBits();

  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();

  uint32_t IntBits = IntTy.getSizeInBits();

  unsigned NumLanes = Op.getValueType().getVectorNumElements();

  if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {

    // These instructions only exist converting from i32 to f32. We can handle

    // smaller integers by generating an extra extend, but larger ones would

    // be lossy. We also can't handle anything other than 2 or 4 lanes, since

    // these intructions only support v2i32/v4i32 types.

    return SDValue();

  }


  ConstantFPSDNode *CN = isConstOrConstSplatFP(ConstVec, true);

  APFloat Recip(0.0f);

  if (!CN || !CN->getValueAPF().getExactInverse(&Recip))

    return SDValue();


  bool IsExact;

  APSInt IntVal(33);

  if (Recip.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=

          APFloat::opOK ||

      !IsExact)

    return SDValue();


  int32_t C = IntVal.exactLogBase2();

  if (C == -1 || C == 0 || C > 32)

    return SDValue();


  SDLoc DL(N);

  bool isSigned = OpOpcode == ISD::SINT_TO_FP;

  SDValue ConvInput = Op.getOperand(0);

  if (IntBits < FloatBits)

    ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,

                            NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput);


  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp

                                      : Intrinsic::arm_neon_vcvtfxu2fp;

  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),

                     DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,

                     DAG.getConstant(C, DL, MVT::i32));

}


static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,

                                           const ARMSubtarget *ST) {

  if (!ST->hasMVEIntegerOps())

    return SDValue();


  assert(N->getOpcode() == ISD::VECREDUCE_ADD);

  EVT ResVT = N->getValueType(0);

  SDValue N0 = N->getOperand(0);

  SDLoc dl(N);


  // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)

  if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&

      (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||

       N0.getValueType() == MVT::v16i8)) {

    SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));

    SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));

    return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);

  }


  // We are looking for something that will have illegal types if left alone,

  // but that we can convert to a single instruction under MVE. For example

  // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A

  // or

  // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B


  // The legal cases are:

  //   VADDV u/s 8/16/32

  //   VMLAV u/s 8/16/32

  //   VADDLV u/s 32

  //   VMLALV u/s 16/32


  // If the input vector is smaller than legal (v4i8/v4i16 for example) we can

  // extend it and use v4i32 instead.

  auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {

    EVT AVT = A.getValueType();

    return any_of(ExtTypes, [&](MVT Ty) {

      return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&

             AVT.bitsLE(Ty);

    });

  };

  auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {

    EVT AVT = A.getValueType();

    if (!AVT.is128BitVector())

      A = DAG.getNode(ExtendCode, dl,

                      AVT.changeVectorElementType(MVT::getIntegerVT(

                          128 / AVT.getVectorMinNumElements())),

                      A);

    return A;

  };

  auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {

    if (ResVT != RetTy || N0->getOpcode() != ExtendCode)

      return SDValue();

    SDValue A = N0->getOperand(0);

    if (ExtTypeMatches(A, ExtTypes))

      return ExtendIfNeeded(A, ExtendCode);

    return SDValue();

  };

  auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,

                         ArrayRef<MVT> ExtTypes, SDValue &Mask) {

    if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||

        !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))

      return SDValue();

    Mask = N0->getOperand(0);

    SDValue Ext = N0->getOperand(1);

    if (Ext->getOpcode() != ExtendCode)

      return SDValue();

    SDValue A = Ext->getOperand(0);

    if (ExtTypeMatches(A, ExtTypes))

      return ExtendIfNeeded(A, ExtendCode);

    return SDValue();

  };

  auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,

                     SDValue &A, SDValue &B) {

    // For a vmla we are trying to match a larger pattern:

    // ExtA = sext/zext A

    // ExtB = sext/zext B

    // Mul = mul ExtA, ExtB

    // vecreduce.add Mul

    // There might also be en extra extend between the mul and the addreduce, so

    // long as the bitwidth is high enough to make them equivalent (for example

    // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).

    if (ResVT != RetTy)

      return false;

    SDValue Mul = N0;

    if (Mul->getOpcode() == ExtendCode &&

        Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=

            ResVT.getScalarSizeInBits())

      Mul = Mul->getOperand(0);

    if (Mul->getOpcode() != ISD::MUL)

      return false;

    SDValue ExtA = Mul->getOperand(0);

    SDValue ExtB = Mul->getOperand(1);

    if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)

      return false;

    A = ExtA->getOperand(0);

    B = ExtB->getOperand(0);

    if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {

      A = ExtendIfNeeded(A, ExtendCode);

      B = ExtendIfNeeded(B, ExtendCode);

      return true;

    }

    return false;

  };

  auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,

                     SDValue &A, SDValue &B, SDValue &Mask) {

    // Same as the pattern above with a select for the zero predicated lanes

    // ExtA = sext/zext A

    // ExtB = sext/zext B

    // Mul = mul ExtA, ExtB

    // N0 = select Mask, Mul, 0

    // vecreduce.add N0

    if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||

        !ISD::isBuildVectorAllZeros(N0->getOperand(2).getNode()))

      return false;

    Mask = N0->getOperand(0);

    SDValue Mul = N0->getOperand(1);

    if (Mul->getOpcode() == ExtendCode &&

        Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=

            ResVT.getScalarSizeInBits())

      Mul = Mul->getOperand(0);

    if (Mul->getOpcode() != ISD::MUL)

      return false;

    SDValue ExtA = Mul->getOperand(0);

    SDValue ExtB = Mul->getOperand(1);

    if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)

      return false;

    A = ExtA->getOperand(0);

    B = ExtB->getOperand(0);

    if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {

      A = ExtendIfNeeded(A, ExtendCode);

      B = ExtendIfNeeded(B, ExtendCode);

      return true;

    }

    return false;

  };

  auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {

    // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64

    // reductions. The operands are extended with MVEEXT, but as they are

    // reductions the lane orders do not matter. MVEEXT may be combined with

    // loads to produce two extending loads, or else they will be expanded to

    // VREV/VMOVL.

    EVT VT = Ops[0].getValueType();

    if (VT == MVT::v16i8) {

      assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&

             "Unexpected illegal long reduction opcode");

      bool IsUnsigned = Opcode == ARMISD::VMLALVu;


      SDValue Ext0 =

          DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,

                      DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);

      SDValue Ext1 =

          DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,

                      DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);


      SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),

                                 Ext0, Ext1);

      SDValue MLA1 =

          DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,

                      DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),

                      Ext0.getValue(1), Ext1.getValue(1));

      return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));

    }

    SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);

    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,

                       SDValue(Node.getNode(), 1));

  };


  SDValue A, B;

  SDValue Mask;

  if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))

    return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);

  if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))

    return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);

  if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},

              A, B))

    return Create64bitNode(ARMISD::VMLALVs, {A, B});

  if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},

              A, B))

    return Create64bitNode(ARMISD::VMLALVu, {A, B});

  if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))

    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,

                       DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));

  if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))

    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,

                       DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));


  if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,

                  Mask))

    return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);

  if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,

                  Mask))

    return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);

  if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,

                  Mask))

    return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});

  if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,

                  Mask))

    return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});

  if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))

    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,

                       DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));

  if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))

    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,

                       DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));


  if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))

    return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);

  if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))

    return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);

  if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))

    return Create64bitNode(ARMISD::VADDLVs, {A});

  if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))

    return Create64bitNode(ARMISD::VADDLVu, {A});

  if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))

    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,

                       DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));

  if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))

    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,

                       DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));


  if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))

    return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);

  if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))

    return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);

  if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))

    return Create64bitNode(ARMISD::VADDLVps, {A, Mask});

  if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))

    return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});

  if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))

    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,

                       DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));

  if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))

    return DAG.getNode(ISD::TRUNCATE, dl, ResVT,

                       DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));


  // Some complications. We can get a case where the two inputs of the mul are

  // the same, then the output sext will have been helpfully converted to a

  // zext. Turn it back.

  SDValue Op = N0;

  if (Op->getOpcode() == ISD::VSELECT)

    Op = Op->getOperand(1);

  if (Op->getOpcode() == ISD::ZERO_EXTEND &&

      Op->getOperand(0)->getOpcode() == ISD::MUL) {

    SDValue Mul = Op->getOperand(0);

    if (Mul->getOperand(0) == Mul->getOperand(1) &&

        Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {

      SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);

      if (Op != N0)

        Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),

                          N0->getOperand(0), Ext, N0->getOperand(2));

      return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);

    }

  }


  return SDValue();

}


// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all

// the lanes are used. Due to the reduction being commutative the shuffle can be

// removed.


static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG) {

  unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;

  auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));

  if (!Shuf || !Shuf->getOperand(1).isUndef())

    return SDValue();


  // Check all elements are used once in the mask.

  ArrayRef<int> Mask = Shuf->getMask();

  APInt SetElts(Mask.size(), 0);

  for (int E : Mask) {

    if (E < 0 || E >= (int)Mask.size())

      return SDValue();

    SetElts.setBit(E);

  }

  if (!SetElts.isAllOnes())

    return SDValue();


  if (N->getNumOperands() != VecOp + 1) {

    auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));

    if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)

      return SDValue();

  }


  SmallVector<SDValue> Ops;

  for (SDValue Op : N->ops()) {

    if (Op.getValueType().isVector())

      Ops.push_back(Op.getOperand(0));

    else

      Ops.push_back(Op);

  }

  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);

}


static SDValue PerformVMOVNCombine(SDNode *N,

                                   TargetLowering::DAGCombinerInfo &DCI) {

  SDValue Op0 = N->getOperand(0);

  SDValue Op1 = N->getOperand(1);

  unsigned IsTop = N->getConstantOperandVal(2);


  // VMOVNT a undef -> a

  // VMOVNB a undef -> a

  // VMOVNB undef a -> a

  if (Op1->isUndef())

    return Op0;

  if (Op0->isUndef() && !IsTop)

    return Op1;


  // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)

  // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)

  if ((Op1->getOpcode() == ARMISD::VQMOVNs ||

       Op1->getOpcode() == ARMISD::VQMOVNu) &&

      Op1->getConstantOperandVal(2) == 0)

    return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),

                           Op0, Op1->getOperand(1), N->getOperand(2));


  // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from

  // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting

  // into the top or bottom lanes.

  unsigned NumElts = N->getValueType(0).getVectorNumElements();

  APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));

  APInt Op0DemandedElts =

      IsTop ? Op1DemandedElts

            : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));


  const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();

  if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))

    return SDValue(N, 0);

  if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))

    return SDValue(N, 0);


  return SDValue();

}


static SDValue PerformVQMOVNCombine(SDNode *N,

                                    TargetLowering::DAGCombinerInfo &DCI) {

  SDValue Op0 = N->getOperand(0);

  unsigned IsTop = N->getConstantOperandVal(2);


  unsigned NumElts = N->getValueType(0).getVectorNumElements();

  APInt Op0DemandedElts =

      APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)

                                     : APInt::getHighBitsSet(2, 1));


  const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();

  if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))

    return SDValue(N, 0);

  return SDValue();

}


static SDValue PerformVQDMULHCombine(SDNode *N,

                                     TargetLowering::DAGCombinerInfo &DCI) {

  EVT VT = N->getValueType(0);

  SDValue LHS = N->getOperand(0);

  SDValue RHS = N->getOperand(1);


  auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);

  auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);

  // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)

  if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&

      LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&

      (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {

    SDLoc DL(N);

    SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,

                                       LHS.getOperand(0), RHS.getOperand(0));

    SDValue UndefV = LHS.getOperand(1);

    return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());

  }

  return SDValue();

}


static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {

  SDLoc DL(N);

  SDValue Op0 = N->getOperand(0);

  SDValue Op1 = N->getOperand(1);


  // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from

  // uses of the intrinsics.

  if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {

    int ShiftAmt = C->getSExtValue();

    if (ShiftAmt == 0) {

      SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);

      DAG.ReplaceAllUsesWith(N, Merge.getNode());

      return SDValue();

    }


    if (ShiftAmt >= -32 && ShiftAmt < 0) {

      unsigned NewOpcode =

          N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;

      SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,

                                     DAG.getConstant(-ShiftAmt, DL, MVT::i32));

      DAG.ReplaceAllUsesWith(N, NewShift.getNode());

      return NewShift;

    }

  }


  return SDValue();

}


/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.


SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,

                                                   DAGCombinerInfo &DCI) const {

  SelectionDAG &DAG = DCI.DAG;

  unsigned IntNo = N->getConstantOperandVal(0);

  switch (IntNo) {

  default:

    // Don't do anything for most intrinsics.

    break;


  // Vector shifts: check for immediate versions and lower them.

  // Note: This is done during DAG combining instead of DAG legalizing because

  // the build_vectors for 64-bit vector element shift counts are generally

  // not legal, and it is hard to see their values after they get legalized to

  // loads from a constant pool.

  case Intrinsic::arm_neon_vshifts:

  case Intrinsic::arm_neon_vshiftu:

  case Intrinsic::arm_neon_vrshifts:

  case Intrinsic::arm_neon_vrshiftu:

  case Intrinsic::arm_neon_vrshiftn:

  case Intrinsic::arm_neon_vqshifts:

  case Intrinsic::arm_neon_vqshiftu:

  case Intrinsic::arm_neon_vqshiftsu:

  case Intrinsic::arm_neon_vqshiftns:

  case Intrinsic::arm_neon_vqshiftnu:

  case Intrinsic::arm_neon_vqshiftnsu:

  case Intrinsic::arm_neon_vqrshiftns:

  case Intrinsic::arm_neon_vqrshiftnu:

  case Intrinsic::arm_neon_vqrshiftnsu: {

    EVT VT = N->getOperand(1).getValueType();

    int64_t Cnt;

    unsigned VShiftOpc = 0;


    switch (IntNo) {

    case Intrinsic::arm_neon_vshifts:

    case Intrinsic::arm_neon_vshiftu:

      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {

        VShiftOpc = ARMISD::VSHLIMM;

        break;

      }

      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {

        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM

                                                          : ARMISD::VSHRuIMM);

        break;

      }

      return SDValue();


    case Intrinsic::arm_neon_vrshifts:

    case Intrinsic::arm_neon_vrshiftu:

      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))

        break;

      return SDValue();


    case Intrinsic::arm_neon_vqshifts:

    case Intrinsic::arm_neon_vqshiftu:

      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))

        break;

      return SDValue();


    case Intrinsic::arm_neon_vqshiftsu:

      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))

        break;

      llvm_unreachable("invalid shift count for vqshlu intrinsic");


    case Intrinsic::arm_neon_vrshiftn:

    case Intrinsic::arm_neon_vqshiftns:

    case Intrinsic::arm_neon_vqshiftnu:

    case Intrinsic::arm_neon_vqshiftnsu:

    case Intrinsic::arm_neon_vqrshiftns:

    case Intrinsic::arm_neon_vqrshiftnu:

    case Intrinsic::arm_neon_vqrshiftnsu:

      // Narrowing shifts require an immediate right shift.

      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))

        break;

      llvm_unreachable("invalid shift count for narrowing vector shift "

                       "intrinsic");


    default:

      llvm_unreachable("unhandled vector shift");

    }


    switch (IntNo) {

    case Intrinsic::arm_neon_vshifts:

    case Intrinsic::arm_neon_vshiftu:

      // Opcode already set above.

      break;

    case Intrinsic::arm_neon_vrshifts:

      VShiftOpc = ARMISD::VRSHRsIMM;

      break;

    case Intrinsic::arm_neon_vrshiftu:

      VShiftOpc = ARMISD::VRSHRuIMM;

      break;

    case Intrinsic::arm_neon_vrshiftn:

      VShiftOpc = ARMISD::VRSHRNIMM;

      break;

    case Intrinsic::arm_neon_vqshifts:

      VShiftOpc = ARMISD::VQSHLsIMM;

      break;

    case Intrinsic::arm_neon_vqshiftu:

      VShiftOpc = ARMISD::VQSHLuIMM;

      break;

    case Intrinsic::arm_neon_vqshiftsu:

      VShiftOpc = ARMISD::VQSHLsuIMM;

      break;

    case Intrinsic::arm_neon_vqshiftns:

      VShiftOpc = ARMISD::VQSHRNsIMM;

      break;

    case Intrinsic::arm_neon_vqshiftnu:

      VShiftOpc = ARMISD::VQSHRNuIMM;

      break;

    case Intrinsic::arm_neon_vqshiftnsu:

      VShiftOpc = ARMISD::VQSHRNsuIMM;

      break;

    case Intrinsic::arm_neon_vqrshiftns:

      VShiftOpc = ARMISD::VQRSHRNsIMM;

      break;

    case Intrinsic::arm_neon_vqrshiftnu:

      VShiftOpc = ARMISD::VQRSHRNuIMM;

      break;

    case Intrinsic::arm_neon_vqrshiftnsu:

      VShiftOpc = ARMISD::VQRSHRNsuIMM;

      break;

    }


    SDLoc dl(N);

    return DAG.getNode(VShiftOpc, dl, N->getValueType(0),

                       N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));

  }


  case Intrinsic::arm_neon_vshiftins: {

    EVT VT = N->getOperand(1).getValueType();

    int64_t Cnt;

    unsigned VShiftOpc = 0;


    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))

      VShiftOpc = ARMISD::VSLIIMM;

    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))

      VShiftOpc = ARMISD::VSRIIMM;

    else {

      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");

    }


    SDLoc dl(N);

    return DAG.getNode(VShiftOpc, dl, N->getValueType(0),

                       N->getOperand(1), N->getOperand(2),

                       DAG.getConstant(Cnt, dl, MVT::i32));

  }


  case Intrinsic::arm_neon_vqrshifts:

  case Intrinsic::arm_neon_vqrshiftu:

    // No immediate versions of these to check for.

    break;


  case Intrinsic::arm_neon_vbsl: {

    SDLoc dl(N);

    return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1),

                       N->getOperand(2), N->getOperand(3));

  }

  case Intrinsic::arm_mve_vqdmlah:

  case Intrinsic::arm_mve_vqdmlash:

  case Intrinsic::arm_mve_vqrdmlah:

  case Intrinsic::arm_mve_vqrdmlash:

  case Intrinsic::arm_mve_vmla_n_predicated:

  case Intrinsic::arm_mve_vmlas_n_predicated:

  case Intrinsic::arm_mve_vqdmlah_predicated:

  case Intrinsic::arm_mve_vqdmlash_predicated:

  case Intrinsic::arm_mve_vqrdmlah_predicated:

  case Intrinsic::arm_mve_vqrdmlash_predicated: {

    // These intrinsics all take an i32 scalar operand which is narrowed to the

    // size of a single lane of the vector type they return. So we don't need

    // any bits of that operand above that point, which allows us to eliminate

    // uxth/sxth.

    unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();

    APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);

    if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))

      return SDValue();

    break;

  }


  case Intrinsic::arm_mve_minv:

  case Intrinsic::arm_mve_maxv:

  case Intrinsic::arm_mve_minav:

  case Intrinsic::arm_mve_maxav:

  case Intrinsic::arm_mve_minv_predicated:

  case Intrinsic::arm_mve_maxv_predicated:

  case Intrinsic::arm_mve_minav_predicated:

  case Intrinsic::arm_mve_maxav_predicated: {

    // These intrinsics all take an i32 scalar operand which is narrowed to the

    // size of a single lane of the vector type they take as the other input.

    unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();

    APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);

    if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))

      return SDValue();

    break;

  }


  case Intrinsic::arm_mve_addv: {

    // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,

    // which allow PerformADDVecReduce to turn it into VADDLV when possible.

    bool Unsigned = N->getConstantOperandVal(2);

    unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;

    return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));

  }


  case Intrinsic::arm_mve_addlv:

  case Intrinsic::arm_mve_addlv_predicated: {

    // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR

    // which recombines the two outputs into an i64

    bool Unsigned = N->getConstantOperandVal(2);

    unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?

                    (Unsigned ? ARMISD::VADDLVu : ARMISD::VADDLVs) :

                    (Unsigned ? ARMISD::VADDLVpu : ARMISD::VADDLVps);


    SmallVector<SDValue, 4> Ops;

    for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)

      if (i != 2)                      // skip the unsigned flag

        Ops.push_back(N->getOperand(i));


    SDLoc dl(N);

    SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);

    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),

                       val.getValue(1));

  }

  }


  return SDValue();

}


/// PerformShiftCombine - Checks for immediate versions of vector shifts and

/// lowers them.  As with the vector shift intrinsics, this is done during DAG

/// combining instead of DAG legalizing because the build_vectors for 64-bit

/// vector element shift counts are generally not legal, and it is hard to see

/// their values after they get legalized to loads from a constant pool.


static SDValue PerformShiftCombine(SDNode *N,

                                   TargetLowering::DAGCombinerInfo &DCI,

                                   const ARMSubtarget *ST) {

  SelectionDAG &DAG = DCI.DAG;

  EVT VT = N->getValueType(0);


  if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&

      N->getOperand(0)->getOpcode() == ISD::AND &&

      N->getOperand(0)->hasOneUse()) {

    if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())

      return SDValue();

    // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't

    // usually show up because instcombine prefers to canonicalize it to

    // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come

    // out of GEP lowering in some cases.

    SDValue N0 = N->getOperand(0);

    ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));

    if (!ShiftAmtNode)

      return SDValue();

    uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());

    ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));

    if (!AndMaskNode)

      return SDValue();

    uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());

    // Don't transform uxtb/uxth.

    if (AndMask == 255 || AndMask == 65535)

      return SDValue();

    if (isMask_32(AndMask)) {

      uint32_t MaskedBits = llvm::countl_zero(AndMask);

      if (MaskedBits > ShiftAmt) {

        SDLoc DL(N);

        SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),

                                  DAG.getConstant(MaskedBits, DL, MVT::i32));

        return DAG.getNode(

            ISD::SRL, DL, MVT::i32, SHL,

            DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));

      }

    }

  }


  // Nothing to be done for scalar shifts.

  const TargetLowering &TLI = DAG.getTargetLoweringInfo();

  if (!VT.isVector() || !TLI.isTypeLegal(VT))

    return SDValue();

  if (ST->hasMVEIntegerOps())

    return SDValue();


  int64_t Cnt;


  switch (N->getOpcode()) {

  default: llvm_unreachable("unexpected shift opcode");


  case ISD::SHL:

    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {

      SDLoc dl(N);

      return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),

                         DAG.getConstant(Cnt, dl, MVT::i32));

    }

    break;


  case ISD::SRA:

  case ISD::SRL:

    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {

      unsigned VShiftOpc =

          (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);

      SDLoc dl(N);

      return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),

                         DAG.getConstant(Cnt, dl, MVT::i32));

    }

  }

  return SDValue();

}


// Look for a sign/zero/fpextend extend of a larger than legal load. This can be

// split into multiple extending loads, which are simpler to deal with than an

// arbitrary extend. For fp extends we use an integer extending load and a VCVTL

// to convert the type to an f32.


static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG) {

  SDValue N0 = N->getOperand(0);

  if (N0.getOpcode() != ISD::LOAD)

    return SDValue();

  LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());

  if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||

      LD->getExtensionType() != ISD::NON_EXTLOAD)

    return SDValue();

  EVT FromVT = LD->getValueType(0);

  EVT ToVT = N->getValueType(0);

  if (!ToVT.isVector())

    return SDValue();

  assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements());

  EVT ToEltVT = ToVT.getVectorElementType();

  EVT FromEltVT = FromVT.getVectorElementType();


  unsigned NumElements = 0;

  if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)

    NumElements = 4;

  if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)

    NumElements = 4;

  if (NumElements == 0 ||

      (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||

      FromVT.getVectorNumElements() % NumElements != 0 ||

      !isPowerOf2_32(NumElements))

    return SDValue();


  LLVMContext &C = *DAG.getContext();

  SDLoc DL(LD);

  // Details about the old load

  SDValue Ch = LD->getChain();

  SDValue BasePtr = LD->getBasePtr();

  Align Alignment = LD->getBaseAlign();

  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();

  AAMDNodes AAInfo = LD->getAAInfo();


  ISD::LoadExtType NewExtType =

      N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;

  SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());

  EVT NewFromVT = EVT::getVectorVT(

      C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);

  EVT NewToVT = EVT::getVectorVT(

      C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);


  SmallVector<SDValue, 4> Loads;

  SmallVector<SDValue, 4> Chains;

  for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {

    unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;

    SDValue NewPtr =

        DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));


    SDValue NewLoad =

        DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,

                    LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,

                    Alignment, MMOFlags, AAInfo);

    Loads.push_back(NewLoad);

    Chains.push_back(SDValue(NewLoad.getNode(), 1));

  }


  // Float truncs need to extended with VCVTB's into their floating point types.

  if (FromEltVT == MVT::f16) {

    SmallVector<SDValue, 4> Extends;


    for (unsigned i = 0; i < Loads.size(); i++) {

      SDValue LoadBC =

          DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);

      SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,

                                  DAG.getConstant(0, DL, MVT::i32));

      Extends.push_back(FPExt);

    }


    Loads = Extends;

  }


  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);

  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);

  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);

}


/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,

/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.


static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,

                                    const ARMSubtarget *ST) {

  SDValue N0 = N->getOperand(0);


  // Check for sign- and zero-extensions of vector extract operations of 8- and

  // 16-bit vector elements. NEON and MVE support these directly. They are

  // handled during DAG combining because type legalization will promote them

  // to 32-bit types and it is messy to recognize the operations after that.

  if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&

      N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {

    SDValue Vec = N0.getOperand(0);

    SDValue Lane = N0.getOperand(1);

    EVT VT = N->getValueType(0);

    EVT EltVT = N0.getValueType();

    const TargetLowering &TLI = DAG.getTargetLoweringInfo();


    if (VT == MVT::i32 &&

        (EltVT == MVT::i8 || EltVT == MVT::i16) &&

        TLI.isTypeLegal(Vec.getValueType()) &&

        isa<ConstantSDNode>(Lane)) {


      unsigned Opc = 0;

      switch (N->getOpcode()) {

      default: llvm_unreachable("unexpected opcode");

      case ISD::SIGN_EXTEND:

        Opc = ARMISD::VGETLANEs;

        break;

      case ISD::ZERO_EXTEND:

      case ISD::ANY_EXTEND:

        Opc = ARMISD::VGETLANEu;

        break;

      }

      return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);

    }

  }


  if (ST->hasMVEIntegerOps())

    if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))

      return NewLoad;


  return SDValue();

}


static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG,

                                      const ARMSubtarget *ST) {

  if (ST->hasMVEFloatOps())

    if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))

      return NewLoad;


  return SDValue();

}


// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating

// constant bounds.


static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG,

                                         const ARMSubtarget *Subtarget) {

  if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&

      !Subtarget->isThumb2())

    return SDValue();


  EVT VT = Op.getValueType();

  SDValue Op0 = Op.getOperand(0);


  if (VT != MVT::i32 ||

      (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||

      !isa<ConstantSDNode>(Op.getOperand(1)) ||

      !isa<ConstantSDNode>(Op0.getOperand(1)))

    return SDValue();


  SDValue Min = Op;

  SDValue Max = Op0;

  SDValue Input = Op0.getOperand(0);

  if (Min.getOpcode() == ISD::SMAX)

    std::swap(Min, Max);


  APInt MinC = Min.getConstantOperandAPInt(1);

  APInt MaxC = Max.getConstantOperandAPInt(1);


  if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||

      !(MinC + 1).isPowerOf2())

    return SDValue();


  SDLoc DL(Op);

  if (MinC == ~MaxC)

    return DAG.getNode(ARMISD::SSAT, DL, VT, Input,

                       DAG.getConstant(MinC.countr_one(), DL, VT));

  if (MaxC == 0)

    return DAG.getNode(ARMISD::USAT, DL, VT, Input,

                       DAG.getConstant(MinC.countr_one(), DL, VT));


  return SDValue();

}


/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating

/// saturates.


static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,

                                    const ARMSubtarget *ST) {

  EVT VT = N->getValueType(0);

  SDValue N0 = N->getOperand(0);


  if (VT == MVT::i32)

    return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);


  if (!ST->hasMVEIntegerOps())

    return SDValue();


  if (SDValue V = PerformVQDMULHCombine(N, DAG))

    return V;


  if (VT != MVT::v4i32 && VT != MVT::v8i16)

    return SDValue();


  auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {

    // Check one is a smin and the other is a smax

    if (Min->getOpcode() != ISD::SMIN)

      std::swap(Min, Max);

    if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)

      return false;


    APInt SaturateC;

    if (VT == MVT::v4i32)

      SaturateC = APInt(32, (1 << 15) - 1, true);

    else //if (VT == MVT::v8i16)

      SaturateC = APInt(16, (1 << 7) - 1, true);


    APInt MinC, MaxC;

    if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||

        MinC != SaturateC)

      return false;

    if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||

        MaxC != ~SaturateC)

      return false;

    return true;

  };


  if (IsSignedSaturate(N, N0.getNode())) {

    SDLoc DL(N);

    MVT ExtVT, HalfVT;

    if (VT == MVT::v4i32) {

      HalfVT = MVT::v8i16;

      ExtVT = MVT::v4i16;

    } else { // if (VT == MVT::v8i16)

      HalfVT = MVT::v16i8;

      ExtVT = MVT::v8i8;

    }


    // Create a VQMOVNB with undef top lanes, then signed extended into the top

    // half. That extend will hopefully be removed if only the bottom bits are

    // demanded (though a truncating store, for example).

    SDValue VQMOVN =

        DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),

                    N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));

    SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);

    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,

                       DAG.getValueType(ExtVT));

  }


  auto IsUnsignedSaturate = [&](SDNode *Min) {

    // For unsigned, we just need to check for <= 0xffff

    if (Min->getOpcode() != ISD::UMIN)

      return false;


    APInt SaturateC;

    if (VT == MVT::v4i32)

      SaturateC = APInt(32, (1 << 16) - 1, true);

    else //if (VT == MVT::v8i16)

      SaturateC = APInt(16, (1 << 8) - 1, true);


    APInt MinC;

    if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||

        MinC != SaturateC)

      return false;

    return true;

  };


  if (IsUnsignedSaturate(N)) {

    SDLoc DL(N);

    MVT HalfVT;

    unsigned ExtConst;

    if (VT == MVT::v4i32) {

      HalfVT = MVT::v8i16;

      ExtConst = 0x0000FFFF;

    } else { //if (VT == MVT::v8i16)

      HalfVT = MVT::v16i8;

      ExtConst = 0x00FF;

    }


    // Create a VQMOVNB with undef top lanes, then ZExt into the top half with

    // an AND. That extend will hopefully be removed if only the bottom bits are

    // demanded (though a truncating store, for example).

    SDValue VQMOVN =

        DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,

                    DAG.getConstant(0, DL, MVT::i32));

    SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);

    return DAG.getNode(ISD::AND, DL, VT, Bitcast,

                       DAG.getConstant(ExtConst, DL, VT));

  }


  return SDValue();

}


static const APInt *isPowerOf2Constant(SDValue V) {

  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);

  if (!C)

    return nullptr;

  const APInt *CV = &C->getAPIntValue();

  return CV->isPowerOf2() ? CV : nullptr;

}


SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {

  // If we have a CMOV, OR and AND combination such as:

  //   if (x & CN)

  //     y |= CM;

  //

  // And:

  //   * CN is a single bit;

  //   * All bits covered by CM are known zero in y

  //

  // Then we can convert this into a sequence of BFI instructions. This will

  // always be a win if CM is a single bit, will always be no worse than the

  // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is

  // three bits (due to the extra IT instruction).


  SDValue Op0 = CMOV->getOperand(0);

  SDValue Op1 = CMOV->getOperand(1);

  auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();

  SDValue CmpZ = CMOV->getOperand(3);


  // The compare must be against zero.

  if (!isNullConstant(CmpZ->getOperand(1)))

    return SDValue();


  assert(CmpZ->getOpcode() == ARMISD::CMPZ);

  SDValue And = CmpZ->getOperand(0);

  if (And->getOpcode() != ISD::AND)

    return SDValue();

  const APInt *AndC = isPowerOf2Constant(And->getOperand(1));

  if (!AndC)

    return SDValue();

  SDValue X = And->getOperand(0);


  if (CC == ARMCC::EQ) {

    // We're performing an "equal to zero" compare. Swap the operands so we

    // canonicalize on a "not equal to zero" compare.

    std::swap(Op0, Op1);

  } else {

    assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");

  }


  if (Op1->getOpcode() != ISD::OR)

    return SDValue();


  ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));

  if (!OrC)

    return SDValue();

  SDValue Y = Op1->getOperand(0);


  if (Op0 != Y)

    return SDValue();


  // Now, is it profitable to continue?

  APInt OrCI = OrC->getAPIntValue();

  unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;

  if (OrCI.popcount() > Heuristic)

    return SDValue();


  // Lastly, can we determine that the bits defined by OrCI

  // are zero in Y?

  KnownBits Known = DAG.computeKnownBits(Y);

  if ((OrCI & Known.Zero) != OrCI)

    return SDValue();


  // OK, we can do the combine.

  SDValue V = Y;

  SDLoc dl(X);

  EVT VT = X.getValueType();

  unsigned BitInX = AndC->logBase2();


  if (BitInX != 0) {

    // We must shift X first.

    X = DAG.getNode(ISD::SRL, dl, VT, X,

                    DAG.getConstant(BitInX, dl, VT));

  }


  for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();

       BitInY < NumActiveBits; ++BitInY) {

    if (OrCI[BitInY] == 0)

      continue;

    APInt Mask(VT.getSizeInBits(), 0);

    Mask.setBit(BitInY);

    V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,

                    // Confusingly, the operand is an *inverted* mask.

                    DAG.getConstant(~Mask, dl, VT));

  }


  return V;

}


// Given N, the value controlling the conditional branch, search for the loop

// intrinsic, returning it, along with how the value is used. We need to handle

// patterns such as the following:

// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)

// (brcond (setcc (loop.decrement), 0, eq), exit)

// (brcond (setcc (loop.decrement), 0, ne), header)


static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm,

                                   bool &Negate) {

  switch (N->getOpcode()) {

  default:

    break;

  case ISD::XOR: {

    if (!isa<ConstantSDNode>(N.getOperand(1)))

      return SDValue();

    if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())

      return SDValue();

    Negate = !Negate;

    return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);

  }

  case ISD::SETCC: {

    auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));

    if (!Const)

      return SDValue();

    if (Const->isZero())

      Imm = 0;

    else if (Const->isOne())

      Imm = 1;

    else

      return SDValue();

    CC = cast<CondCodeSDNode>(N.getOperand(2))->get();

    return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);

  }

  case ISD::INTRINSIC_W_CHAIN: {

    unsigned IntOp = N.getConstantOperandVal(1);

    if (IntOp != Intrinsic::test_start_loop_iterations &&

        IntOp != Intrinsic::loop_decrement_reg)

      return SDValue();

    return N;

  }

  }

  return SDValue();

}


static SDValue PerformHWLoopCombine(SDNode *N,

                                    TargetLowering::DAGCombinerInfo &DCI,

                                    const ARMSubtarget *ST) {


  // The hwloop intrinsics that we're interested are used for control-flow,

  // either for entering or exiting the loop:

  // - test.start.loop.iterations will test whether its operand is zero. If it

  //   is zero, the proceeding branch should not enter the loop.

  // - loop.decrement.reg also tests whether its operand is zero. If it is

  //   zero, the proceeding branch should not branch back to the beginning of

  //   the loop.

  // So here, we need to check that how the brcond is using the result of each

  // of the intrinsics to ensure that we're branching to the right place at the

  // right time.


  ISD::CondCode CC;

  SDValue Cond;

  int Imm = 1;

  bool Negate = false;

  SDValue Chain = N->getOperand(0);

  SDValue Dest;


  if (N->getOpcode() == ISD::BRCOND) {

    CC = ISD::SETEQ;

    Cond = N->getOperand(1);

    Dest = N->getOperand(2);

  } else {

    assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");

    CC = cast<CondCodeSDNode>(N->getOperand(1))->get();

    Cond = N->getOperand(2);

    Dest = N->getOperand(4);

    if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {

      if (!Const->isOne() && !Const->isZero())

        return SDValue();

      Imm = Const->getZExtValue();

    } else

      return SDValue();

  }


  SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);

  if (!Int)

    return SDValue();


  if (Negate)

    CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);


  auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {

    return (CC == ISD::SETEQ && Imm == 0) ||

           (CC == ISD::SETNE && Imm == 1) ||

           (CC == ISD::SETLT && Imm == 1) ||

           (CC == ISD::SETULT && Imm == 1);

  };


  auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {

    return (CC == ISD::SETEQ && Imm == 1) ||

           (CC == ISD::SETNE && Imm == 0) ||

           (CC == ISD::SETGT && Imm == 0) ||

           (CC == ISD::SETUGT && Imm == 0) ||

           (CC == ISD::SETGE && Imm == 1) ||

           (CC == ISD::SETUGE && Imm == 1);

  };


  assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&

         "unsupported condition");


  SDLoc dl(Int);

  SelectionDAG &DAG = DCI.DAG;

  SDValue Elements = Int.getOperand(2);

  unsigned IntOp = Int->getConstantOperandVal(1);

  assert((N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BR) &&

         "expected single br user");

  SDNode *Br = *N->user_begin();

  SDValue OtherTarget = Br->getOperand(1);


  // Update the unconditional branch to branch to the given Dest.

  auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {

    SDValue NewBrOps[] = { Br->getOperand(0), Dest };

    SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);

    DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);

  };


  if (IntOp == Intrinsic::test_start_loop_iterations) {

    SDValue Res;

    SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);

    // We expect this 'instruction' to branch when the counter is zero.

    if (IsTrueIfZero(CC, Imm)) {

      SDValue Ops[] = {Chain, Setup, Dest};

      Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);

    } else {

      // The logic is the reverse of what we need for WLS, so find the other

      // basic block target: the target of the proceeding br.

      UpdateUncondBr(Br, Dest, DAG);


      SDValue Ops[] = {Chain, Setup, OtherTarget};

      Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);

    }

    // Update LR count to the new value

    DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);

    // Update chain

    DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));

    return Res;

  } else {

    SDValue Size =

        DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);

    SDValue Args[] = { Int.getOperand(0), Elements, Size, };

    SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,

                                  DAG.getVTList(MVT::i32, MVT::Other), Args);

    DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());


    // We expect this instruction to branch when the count is not zero.

    SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;


    // Update the unconditional branch to target the loop preheader if we've

    // found the condition has been reversed.

    if (Target == OtherTarget)

      UpdateUncondBr(Br, Dest, DAG);


    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

                        SDValue(LoopDec.getNode(), 1), Chain);


    SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };

    return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);

  }

  return SDValue();

}


/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.

SDValue


ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {

  SDValue Cmp = N->getOperand(3);

  if (Cmp.getOpcode() != ARMISD::CMPZ)

    // Only looking at NE cases.

    return SDValue();


  SDLoc dl(N);

  SDValue LHS = Cmp.getOperand(0);

  SDValue RHS = Cmp.getOperand(1);

  SDValue Chain = N->getOperand(0);

  SDValue BB = N->getOperand(1);

  SDValue ARMcc = N->getOperand(2);

  ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();


  // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0))

  // -> (brcond Chain BB CC Flags)

  if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&

      LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&

      LHS->getOperand(0)->hasOneUse() &&

      isNullConstant(LHS->getOperand(0)->getOperand(0)) &&

      isOneConstant(LHS->getOperand(0)->getOperand(1)) &&

      isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {

    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,

                       LHS->getOperand(0)->getOperand(2),

                       LHS->getOperand(0)->getOperand(3));

  }


  return SDValue();

}


/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.

SDValue


ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {

  SDValue Cmp = N->getOperand(3);

  if (Cmp.getOpcode() != ARMISD::CMPZ)

    // Only looking at EQ and NE cases.

    return SDValue();


  EVT VT = N->getValueType(0);

  SDLoc dl(N);

  SDValue LHS = Cmp.getOperand(0);

  SDValue RHS = Cmp.getOperand(1);

  SDValue FalseVal = N->getOperand(0);

  SDValue TrueVal = N->getOperand(1);

  SDValue ARMcc = N->getOperand(2);

  ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();


  // BFI is only available on V6T2+.

  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {

    SDValue R = PerformCMOVToBFICombine(N, DAG);

    if (R)

      return R;

  }


  // Simplify

  //   mov     r1, r0

  //   cmp     r1, x

  //   mov     r0, y

  //   moveq   r0, x

  // to

  //   cmp     r0, x

  //   movne   r0, y

  //

  //   mov     r1, r0

  //   cmp     r1, x

  //   mov     r0, x

  //   movne   r0, y

  // to

  //   cmp     r0, x

  //   movne   r0, y

  /// FIXME: Turn this into a target neutral optimization?

  SDValue Res;

  if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {

    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);

  } else if (CC == ARMCC::EQ && TrueVal == RHS) {

    SDValue ARMcc;

    SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);

    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);

  }


  // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0))

  // -> (cmov F T CC Flags)

  if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&

      isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&

      isNullConstant(RHS)) {

    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,

                       LHS->getOperand(2), LHS->getOperand(3));

  }


  if (!VT.isInteger())

      return SDValue();


  // Fold away an unneccessary CMPZ/CMOV

  // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) ->

  // if C1==EQ -> CMOV A, B, C2, D

  // if C1==NE -> CMOV A, B, NOT(C2), D

  if (N->getConstantOperandVal(2) == ARMCC::EQ ||

      N->getConstantOperandVal(2) == ARMCC::NE) {

    ARMCC::CondCodes Cond;

    if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {

      if (N->getConstantOperandVal(2) == ARMCC::NE)

        Cond = ARMCC::getOppositeCondition(Cond);

      return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),

                         N->getOperand(1),

                         DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);

    }

  }


  // Materialize a boolean comparison for integers so we can avoid branching.

  if (isNullConstant(FalseVal)) {

    if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {

      if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {

        // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it

        // right 5 bits will make that 32 be 1, otherwise it will be 0.

        // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5

        SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);

        Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),

                          DAG.getConstant(5, dl, MVT::i32));

      } else {

        // CMOV 0, 1, ==, (CMPZ x, y) ->

        //     (UADDO_CARRY (SUB x, y), t:0, t:1)

        // where t = (USUBO_CARRY 0, (SUB x, y), 0)

        //

        // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when

        // x != y. In other words, a carry C == 1 when x == y, C == 0

        // otherwise.

        // The final UADDO_CARRY computes

        //     x - y + (0 - (x - y)) + C == C

        SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);

        SDVTList VTs = DAG.getVTList(VT, MVT::i32);

        SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);

        // ISD::USUBO_CARRY returns a borrow but we want the carry here

        // actually.

        SDValue Carry =

            DAG.getNode(ISD::SUB, dl, MVT::i32,

                        DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));

        Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);

      }

    } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&

               (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {

      // This seems pointless but will allow us to combine it further below.

      // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1

      SDValue Sub =

          DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);

      Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,

                        Sub.getValue(1));

      FalseVal = Sub;

    }

  } else if (isNullConstant(TrueVal)) {

    if (CC == ARMCC::EQ && !isNullConstant(RHS) &&

        (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {

      // This seems pointless but will allow us to combine it further below

      // Note that we change == for != as this is the dual for the case above.

      // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1

      SDValue Sub =

          DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);

      Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,

                        DAG.getConstant(ARMCC::NE, dl, MVT::i32),

                        Sub.getValue(1));

      FalseVal = Sub;

    }

  }


  // On Thumb1, the DAG above may be further combined if z is a power of 2

  // (z == 2 ^ K).

  // CMOV (SUBC x, y), z, !=, (SUBC x, y):1 ->

  // t1 = (USUBO (SUB x, y), 1)

  // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)

  // Result = if K != 0 then (SHL t2:0, K) else t2:0

  //

  // This also handles the special case of comparing against zero; it's

  // essentially, the same pattern, except there's no SUBC:

  // CMOV x, z, !=, (CMPZ x, 0) ->

  // t1 = (USUBO x, 1)

  // t2 = (USUBO_CARRY x, t1:0, t1:1)

  // Result = if K != 0 then (SHL t2:0, K) else t2:0

  const APInt *TrueConst;

  if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&

      ((FalseVal.getOpcode() == ARMISD::SUBC && FalseVal.getOperand(0) == LHS &&

        FalseVal.getOperand(1) == RHS) ||

       (FalseVal == LHS && isNullConstant(RHS))) &&

      (TrueConst = isPowerOf2Constant(TrueVal))) {

    SDVTList VTs = DAG.getVTList(VT, MVT::i32);

    unsigned ShiftAmount = TrueConst->logBase2();

    if (ShiftAmount)

      TrueVal = DAG.getConstant(1, dl, VT);

    SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);

    Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,

                      Subc.getValue(1));


    if (ShiftAmount)

      Res = DAG.getNode(ISD::SHL, dl, VT, Res,

                        DAG.getConstant(ShiftAmount, dl, MVT::i32));

  }


  if (Res.getNode()) {

    KnownBits Known = DAG.computeKnownBits(SDValue(N,0));

    // Capture demanded bits information that would be otherwise lost.

    if (Known.Zero == 0xfffffffe)

      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,

                        DAG.getValueType(MVT::i1));

    else if (Known.Zero == 0xffffff00)

      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,

                        DAG.getValueType(MVT::i8));

    else if (Known.Zero == 0xffff0000)

      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,

                        DAG.getValueType(MVT::i16));

  }


  return Res;

}


static SDValue PerformBITCASTCombine(SDNode *N,

                                     TargetLowering::DAGCombinerInfo &DCI,

                                     const ARMSubtarget *ST) {

  SelectionDAG &DAG = DCI.DAG;

  SDValue Src = N->getOperand(0);

  EVT DstVT = N->getValueType(0);


  // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.

  if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {

    EVT SrcVT = Src.getValueType();

    if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())

      return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));

  }


  // We may have a bitcast of something that has already had this bitcast

  // combine performed on it, so skip past any VECTOR_REG_CASTs.

  if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&

      Src.getOperand(0).getValueType().getScalarSizeInBits() <=

          Src.getValueType().getScalarSizeInBits())

    Src = Src.getOperand(0);


  // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that

  // would be generated is at least the width of the element type.

  EVT SrcVT = Src.getValueType();

  if ((Src.getOpcode() == ARMISD::VMOVIMM ||

       Src.getOpcode() == ARMISD::VMVNIMM ||

       Src.getOpcode() == ARMISD::VMOVFPIMM) &&

      SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&

      DAG.getDataLayout().isBigEndian())

    return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);


  // bitcast(extract(x, n)); bitcast(extract(x, n+1))  ->  VMOVRRD x

  if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))

    return R;


  return SDValue();

}


// Some combines for the MVETrunc truncations legalizer helper. Also lowers the

// node into stack operations after legalizeOps.


SDValue ARMTargetLowering::PerformMVETruncCombine(

    SDNode *N, TargetLowering::DAGCombinerInfo &DCI) const {

  SelectionDAG &DAG = DCI.DAG;

  EVT VT = N->getValueType(0);

  SDLoc DL(N);


  // MVETrunc(Undef, Undef) -> Undef

  if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))

    return DAG.getUNDEF(VT);


  // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc

  if (N->getNumOperands() == 2 &&

      N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&

      N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)

    return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),

                       N->getOperand(0).getOperand(1),

                       N->getOperand(1).getOperand(0),

                       N->getOperand(1).getOperand(1));


  // MVETrunc(shuffle, shuffle) -> VMOVN

  if (N->getNumOperands() == 2 &&

      N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&

      N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {

    auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());

    auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());


    if (S0->getOperand(0) == S1->getOperand(0) &&

        S0->getOperand(1) == S1->getOperand(1)) {

      // Construct complete shuffle mask

      SmallVector<int, 8> Mask(S0->getMask());

      Mask.append(S1->getMask().begin(), S1->getMask().end());


      if (isVMOVNTruncMask(Mask, VT, false))

        return DAG.getNode(

            ARMISD::VMOVN, DL, VT,

            DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),

            DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),

            DAG.getConstant(1, DL, MVT::i32));

      if (isVMOVNTruncMask(Mask, VT, true))

        return DAG.getNode(

            ARMISD::VMOVN, DL, VT,

            DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),

            DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),

            DAG.getConstant(1, DL, MVT::i32));

    }

  }


  // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the

  // truncate to a buildvector to allow the generic optimisations to kick in.

  if (all_of(N->ops(), [](SDValue Op) {

        return Op.getOpcode() == ISD::BUILD_VECTOR ||

               Op.getOpcode() == ISD::VECTOR_SHUFFLE ||

               (Op.getOpcode() == ISD::BITCAST &&

                Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);

      })) {

    SmallVector<SDValue, 8> Extracts;

    for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {

      SDValue O = N->getOperand(Op);

      for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {

        SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,

                                  DAG.getConstant(i, DL, MVT::i32));

        Extracts.push_back(Ext);

      }

    }

    return DAG.getBuildVector(VT, DL, Extracts);

  }


  // If we are late in the legalization process and nothing has optimised

  // the trunc to anything better, lower it to a stack store and reload,

  // performing the truncation whilst keeping the lanes in the correct order:

  //   VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;

  if (!DCI.isAfterLegalizeDAG())

    return SDValue();


  SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));

  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

  int NumIns = N->getNumOperands();

  assert((NumIns == 2 || NumIns == 4) &&

         "Expected 2 or 4 inputs to an MVETrunc");

  EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());

  if (N->getNumOperands() == 4)

    StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());


  SmallVector<SDValue> Chains;

  for (int I = 0; I < NumIns; I++) {

    SDValue Ptr = DAG.getNode(

        ISD::ADD, DL, StackPtr.getValueType(), StackPtr,

        DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));

    MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(

        DAG.getMachineFunction(), SPFI, I * 16 / NumIns);

    SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),

                                   Ptr, MPI, StoreVT, Align(4));

    Chains.push_back(Ch);

  }


  SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);

  MachinePointerInfo MPI =

      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI, 0);

  return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));

}


// Take a MVEEXT(load x) and split that into (extload x, extload x+8)


static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N,

                                                    SelectionDAG &DAG) {

  SDValue N0 = N->getOperand(0);

  LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());

  if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())

    return SDValue();


  EVT FromVT = LD->getMemoryVT();

  EVT ToVT = N->getValueType(0);

  if (!ToVT.isVector())

    return SDValue();

  assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);

  EVT ToEltVT = ToVT.getVectorElementType();

  EVT FromEltVT = FromVT.getVectorElementType();


  unsigned NumElements = 0;

  if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))

    NumElements = 4;

  if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)

    NumElements = 8;

  assert(NumElements != 0);


  ISD::LoadExtType NewExtType =

      N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;

  if (LD->getExtensionType() != ISD::NON_EXTLOAD &&

      LD->getExtensionType() != ISD::EXTLOAD &&

      LD->getExtensionType() != NewExtType)

    return SDValue();


  LLVMContext &C = *DAG.getContext();

  SDLoc DL(LD);

  // Details about the old load

  SDValue Ch = LD->getChain();

  SDValue BasePtr = LD->getBasePtr();

  Align Alignment = LD->getBaseAlign();

  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();

  AAMDNodes AAInfo = LD->getAAInfo();


  SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());

  EVT NewFromVT = EVT::getVectorVT(

      C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);

  EVT NewToVT = EVT::getVectorVT(

      C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);


  SmallVector<SDValue, 4> Loads;

  SmallVector<SDValue, 4> Chains;

  for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {

    unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;

    SDValue NewPtr =

        DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));


    SDValue NewLoad =

        DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,

                    LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,

                    Alignment, MMOFlags, AAInfo);

    Loads.push_back(NewLoad);

    Chains.push_back(SDValue(NewLoad.getNode(), 1));

  }


  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);

  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);

  return DAG.getMergeValues(Loads, DL);

}


// Perform combines for MVEEXT. If it has not be optimized to anything better

// before lowering, it gets converted to stack store and extloads performing the

// extend whilst still keeping the same lane ordering.


SDValue ARMTargetLowering::PerformMVEExtCombine(

    SDNode *N, TargetLowering::DAGCombinerInfo &DCI) const {

  SelectionDAG &DAG = DCI.DAG;

  EVT VT = N->getValueType(0);

  SDLoc DL(N);

  assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");

  assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");


  EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(

      *DAG.getContext());

  auto Extend = [&](SDValue V) {

    SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);

    return N->getOpcode() == ARMISD::MVESEXT

               ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,

                             DAG.getValueType(ExtVT))

               : DAG.getZeroExtendInReg(VVT, DL, ExtVT);

  };


  // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)

  if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {

    SDValue Ext = Extend(N->getOperand(0));

    return DAG.getMergeValues({Ext, Ext}, DL);

  }


  // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG

  if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {

    ArrayRef<int> Mask = SVN->getMask();

    assert(Mask.size() == 2 * VT.getVectorNumElements());

    assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());

    unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;

    SDValue Op0 = SVN->getOperand(0);

    SDValue Op1 = SVN->getOperand(1);


    auto CheckInregMask = [&](int Start, int Offset) {

      for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)

        if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)

          return false;

      return true;

    };

    SDValue V0 = SDValue(N, 0);

    SDValue V1 = SDValue(N, 1);

    if (CheckInregMask(0, 0))

      V0 = Extend(Op0);

    else if (CheckInregMask(0, 1))

      V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));

    else if (CheckInregMask(0, Mask.size()))

      V0 = Extend(Op1);

    else if (CheckInregMask(0, Mask.size() + 1))

      V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));


    if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))

      V1 = Extend(Op1);

    else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))

      V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));

    else if (CheckInregMask(VT.getVectorNumElements(), 0))

      V1 = Extend(Op0);

    else if (CheckInregMask(VT.getVectorNumElements(), 1))

      V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));


    if (V0.getNode() != N || V1.getNode() != N)

      return DAG.getMergeValues({V0, V1}, DL);

  }


  // MVEEXT(load) -> extload, extload

  if (N->getOperand(0)->getOpcode() == ISD::LOAD)

    if (SDValue L = PerformSplittingMVEEXTToWideningLoad(N, DAG))

      return L;


  if (!DCI.isAfterLegalizeDAG())

    return SDValue();


  // Lower to a stack store and reload:

  //  VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;

  SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));

  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();

  int NumOuts = N->getNumValues();

  assert((NumOuts == 2 || NumOuts == 4) &&

         "Expected 2 or 4 outputs to an MVEEXT");

  EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(

      *DAG.getContext());

  if (N->getNumOperands() == 4)

    LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());


  MachinePointerInfo MPI =

      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI, 0);

  SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),

                               StackPtr, MPI, Align(4));


  SmallVector<SDValue> Loads;

  for (int I = 0; I < NumOuts; I++) {

    SDValue Ptr = DAG.getNode(

        ISD::ADD, DL, StackPtr.getValueType(), StackPtr,

        DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));

    MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(

        DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);

    SDValue Load = DAG.getExtLoad(

        N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,

        VT, Chain, Ptr, MPI, LoadVT, Align(4));

    Loads.push_back(Load);

  }


  return DAG.getMergeValues(Loads, DL);

}


SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,

                                             DAGCombinerInfo &DCI) const {

  switch (N->getOpcode()) {

  default: break;

  case ISD::SELECT_CC:

  case ISD::SELECT:     return PerformSELECTCombine(N, DCI, Subtarget);

  case ISD::VSELECT:    return PerformVSELECTCombine(N, DCI, Subtarget);

  case ISD::SETCC:      return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);

  case ARMISD::ADDE:    return PerformADDECombine(N, DCI, Subtarget);

  case ARMISD::UMLAL:   return PerformUMLALCombine(N, DCI.DAG, Subtarget);

  case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);

  case ISD::SUB:        return PerformSUBCombine(N, DCI, Subtarget);

  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);

  case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);

  case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);

  case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);

  case ISD::BRCOND:

  case ISD::BR_CC:      return PerformHWLoopCombine(N, DCI, Subtarget);

  case ARMISD::ADDC:

  case ARMISD::SUBC:    return PerformAddcSubcCombine(N, DCI, Subtarget);

  case ARMISD::SUBE:    return PerformAddeSubeCombine(N, DCI, Subtarget);

  case ARMISD::BFI:     return PerformBFICombine(N, DCI.DAG);

  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);

  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);

  case ARMISD::VMOVhr:  return PerformVMOVhrCombine(N, DCI);

  case ARMISD::VMOVrh:  return PerformVMOVrhCombine(N, DCI.DAG);

  case ISD::STORE:      return PerformSTORECombine(N, DCI, Subtarget);

  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);

  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);

  case ISD::EXTRACT_VECTOR_ELT:

    return PerformExtractEltCombine(N, DCI, Subtarget);

  case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG);

  case ISD::INSERT_SUBVECTOR: return PerformInsertSubvectorCombine(N, DCI);

  case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);

  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);

  case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);

  case ISD::FP_TO_SINT:

  case ISD::FP_TO_UINT:

    return PerformVCVTCombine(N, DCI.DAG, Subtarget);

  case ISD::FADD:

    return PerformFADDCombine(N, DCI.DAG, Subtarget);

  case ISD::FMUL:

    return PerformVMulVCTPCombine(N, DCI.DAG, Subtarget);

  case ISD::INTRINSIC_WO_CHAIN:

    return PerformIntrinsicCombine(N, DCI);

  case ISD::SHL:

  case ISD::SRA:

  case ISD::SRL:

    return PerformShiftCombine(N, DCI, Subtarget);

  case ISD::SIGN_EXTEND:

  case ISD::ZERO_EXTEND:

  case ISD::ANY_EXTEND:

    return PerformExtendCombine(N, DCI.DAG, Subtarget);

  case ISD::FP_EXTEND:

    return PerformFPExtendCombine(N, DCI.DAG, Subtarget);

  case ISD::SMIN:

  case ISD::UMIN:

  case ISD::SMAX:

  case ISD::UMAX:

    return PerformMinMaxCombine(N, DCI.DAG, Subtarget);

  case ARMISD::CMOV:

    return PerformCMOVCombine(N, DCI.DAG);

  case ARMISD::BRCOND:

    return PerformBRCONDCombine(N, DCI.DAG);

  case ARMISD::CMPZ:

    return PerformCMPZCombine(N, DCI.DAG);

  case ARMISD::CSINC:

  case ARMISD::CSINV:

  case ARMISD::CSNEG:

    return PerformCSETCombine(N, DCI.DAG);

  case ISD::LOAD:

    return PerformLOADCombine(N, DCI, Subtarget);

  case ARMISD::VLD1DUP:

  case ARMISD::VLD2DUP:

  case ARMISD::VLD3DUP:

  case ARMISD::VLD4DUP:

    return PerformVLDCombine(N, DCI);

  case ARMISD::BUILD_VECTOR:

    return PerformARMBUILD_VECTORCombine(N, DCI);

  case ISD::BITCAST:

    return PerformBITCASTCombine(N, DCI, Subtarget);

  case ARMISD::PREDICATE_CAST:

    return PerformPREDICATE_CASTCombine(N, DCI);

  case ARMISD::VECTOR_REG_CAST:

    return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);

  case ARMISD::MVETRUNC:

    return PerformMVETruncCombine(N, DCI);

  case ARMISD::MVESEXT:

  case ARMISD::MVEZEXT:

    return PerformMVEExtCombine(N, DCI);

  case ARMISD::VCMP:

    return PerformVCMPCombine(N, DCI.DAG, Subtarget);

  case ISD::VECREDUCE_ADD:

    return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);

  case ARMISD::VADDVs:

  case ARMISD::VADDVu:

  case ARMISD::VADDLVs:

  case ARMISD::VADDLVu:

  case ARMISD::VADDLVAs:

  case ARMISD::VADDLVAu:

  case ARMISD::VMLAVs:

  case ARMISD::VMLAVu:

  case ARMISD::VMLALVs:

  case ARMISD::VMLALVu:

  case ARMISD::VMLALVAs:

  case ARMISD::VMLALVAu:

    return PerformReduceShuffleCombine(N, DCI.DAG);

  case ARMISD::VMOVN:

    return PerformVMOVNCombine(N, DCI);

  case ARMISD::VQMOVNs:

  case ARMISD::VQMOVNu:

    return PerformVQMOVNCombine(N, DCI);

  case ARMISD::VQDMULH:

    return PerformVQDMULHCombine(N, DCI);

  case ARMISD::ASRL:

  case ARMISD::LSRL:

  case ARMISD::LSLL:

    return PerformLongShiftCombine(N, DCI.DAG);

  case ARMISD::SMULWB: {

    unsigned BitWidth = N->getValueType(0).getSizeInBits();

    APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);

    if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))

      return SDValue();

    break;

  }

  case ARMISD::SMULWT: {

    unsigned BitWidth = N->getValueType(0).getSizeInBits();

    APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);

    if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))

      return SDValue();

    break;

  }

  case ARMISD::SMLALBB:

  case ARMISD::QADD16b:

  case ARMISD::QSUB16b:

  case ARMISD::UQADD16b:

  case ARMISD::UQSUB16b: {

    unsigned BitWidth = N->getValueType(0).getSizeInBits();

    APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);

    if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||

        (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))

      return SDValue();

    break;

  }

  case ARMISD::SMLALBT: {

    unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();

    APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);

    unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();

    APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);

    if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||

        (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))

      return SDValue();

    break;

  }

  case ARMISD::SMLALTB: {

    unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();

    APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);

    unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();

    APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);

    if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||

        (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))

      return SDValue();

    break;

  }

  case ARMISD::SMLALTT: {

    unsigned BitWidth = N->getValueType(0).getSizeInBits();

    APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);

    if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||

        (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))

      return SDValue();

    break;

  }

  case ARMISD::QADD8b:

  case ARMISD::QSUB8b:

  case ARMISD::UQADD8b:

  case ARMISD::UQSUB8b: {

    unsigned BitWidth = N->getValueType(0).getSizeInBits();

    APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);

    if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||

        (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))

      return SDValue();

    break;

  }

  case ARMISD::VBSP:

    if (N->getOperand(1) == N->getOperand(2))

      return N->getOperand(1);

    return SDValue();

  case ISD::INTRINSIC_VOID:

  case ISD::INTRINSIC_W_CHAIN:

    switch (N->getConstantOperandVal(1)) {

    case Intrinsic::arm_neon_vld1:

    case Intrinsic::arm_neon_vld1x2:

    case Intrinsic::arm_neon_vld1x3:

    case Intrinsic::arm_neon_vld1x4:

    case Intrinsic::arm_neon_vld2:

    case Intrinsic::arm_neon_vld3:

    case Intrinsic::arm_neon_vld4:

    case Intrinsic::arm_neon_vld2lane:

    case Intrinsic::arm_neon_vld3lane:

    case Intrinsic::arm_neon_vld4lane:

    case Intrinsic::arm_neon_vld2dup:

    case Intrinsic::arm_neon_vld3dup:

    case Intrinsic::arm_neon_vld4dup:

    case Intrinsic::arm_neon_vst1:

    case Intrinsic::arm_neon_vst1x2:

    case Intrinsic::arm_neon_vst1x3:

    case Intrinsic::arm_neon_vst1x4:

    case Intrinsic::arm_neon_vst2:

    case Intrinsic::arm_neon_vst3:

    case Intrinsic::arm_neon_vst4:

    case Intrinsic::arm_neon_vst2lane:

    case Intrinsic::arm_neon_vst3lane:

    case Intrinsic::arm_neon_vst4lane:

      return PerformVLDCombine(N, DCI);

    case Intrinsic::arm_mve_vld2q:

    case Intrinsic::arm_mve_vld4q:

    case Intrinsic::arm_mve_vst2q:

    case Intrinsic::arm_mve_vst4q:

      return PerformMVEVLDCombine(N, DCI);

    default: break;

    }

    break;

  }

  return SDValue();

}


bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,

                                                          EVT VT) const {

  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);

}


bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,

                                                       Align Alignment,

                                                       MachineMemOperand::Flags,

                                                       unsigned *Fast) const {

  // Depends what it gets converted into if the type is weird.

  if (!VT.isSimple())

    return false;


  // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus

  bool AllowsUnaligned = Subtarget->allowsUnalignedMem();

  auto Ty = VT.getSimpleVT().SimpleTy;


  if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {

    // Unaligned access can use (for example) LRDB, LRDH, LDR

    if (AllowsUnaligned) {

      if (Fast)

        *Fast = Subtarget->hasV7Ops();

      return true;

    }

  }


  if (Ty == MVT::f64 || Ty == MVT::v2f64) {

    // For any little-endian targets with neon, we can support unaligned ld/st

    // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.

    // A big-endian target may also explicitly support unaligned accesses

    if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {

      if (Fast)

        *Fast = 1;

      return true;

    }

  }


  if (!Subtarget->hasMVEIntegerOps())

    return false;


  // These are for predicates

  if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||

       Ty == MVT::v2i1)) {

    if (Fast)

      *Fast = 1;

    return true;

  }


  // These are for truncated stores/narrowing loads. They are fine so long as

  // the alignment is at least the size of the item being loaded

  if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&

      Alignment >= VT.getScalarSizeInBits() / 8) {

    if (Fast)

      *Fast = true;

    return true;

  }


  // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and

  // VSTRW.U32 all store the vector register in exactly the same format, and

  // differ only in the range of their immediate offset field and the required

  // alignment. So there is always a store that can be used, regardless of

  // actual type.

  //

  // For big endian, that is not the case. But can still emit a (VSTRB.U8;

  // VREV64.8) pair and get the same effect. This will likely be better than

  // aligning the vector through the stack.

  if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||

      Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||

      Ty == MVT::v2f64) {

    if (Fast)

      *Fast = 1;

    return true;

  }


  return false;

}


EVT ARMTargetLowering::getOptimalMemOpType(

    LLVMContext &Context, const MemOp &Op,

    const AttributeList &FuncAttributes) const {

  // See if we can use NEON instructions for this...

  if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&

      !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {

    unsigned Fast;

    if (Op.size() >= 16 &&

        (Op.isAligned(Align(16)) ||

         (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),

                                         MachineMemOperand::MONone, &Fast) &&

          Fast))) {

      return MVT::v2f64;

    } else if (Op.size() >= 8 &&

               (Op.isAligned(Align(8)) ||

                (allowsMisalignedMemoryAccesses(

                     MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&

                 Fast))) {

      return MVT::f64;

    }

  }


  // Let the target-independent logic figure it out.

  return MVT::Other;

}


// 64-bit integers are split into their high and low parts and held in two

// different registers, so the trunc is free since the low register can just

// be used.


bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {

  if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())

    return false;

  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();

  unsigned DestBits = DstTy->getPrimitiveSizeInBits();

  return (SrcBits == 64 && DestBits == 32);

}


bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {

  if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||

      !DstVT.isInteger())

    return false;

  unsigned SrcBits = SrcVT.getSizeInBits();

  unsigned DestBits = DstVT.getSizeInBits();

  return (SrcBits == 64 && DestBits == 32);

}


bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {

  if (Val.getOpcode() != ISD::LOAD)

    return false;


  EVT VT1 = Val.getValueType();

  if (!VT1.isSimple() || !VT1.isInteger() ||

      !VT2.isSimple() || !VT2.isInteger())

    return false;


  switch (VT1.getSimpleVT().SimpleTy) {

  default: break;

  case MVT::i1:

  case MVT::i8:

  case MVT::i16:

    // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.

    return true;

  }


  return false;

}


bool ARMTargetLowering::isFNegFree(EVT VT) const {

  if (!VT.isSimple())

    return false;


  // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that

  // negate values directly (fneg is free). So, we don't want to let the DAG

  // combiner rewrite fneg into xors and some other instructions.  For f16 and

  // FullFP16 argument passing, some bitcast nodes may be introduced,

  // triggering this DAG combine rewrite, so we are avoiding that with this.

  switch (VT.getSimpleVT().SimpleTy) {

  default: break;

  case MVT::f16:

    return Subtarget->hasFullFP16();

  }


  return false;

}


Type *ARMTargetLowering::shouldConvertSplatType(ShuffleVectorInst *SVI) const {

  if (!Subtarget->hasMVEIntegerOps())

    return nullptr;

  Type *SVIType = SVI->getType();

  Type *ScalarType = SVIType->getScalarType();


  if (ScalarType->isFloatTy())

    return Type::getInt32Ty(SVIType->getContext());

  if (ScalarType->isHalfTy())

    return Type::getInt16Ty(SVIType->getContext());

  return nullptr;

}


bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {

  EVT VT = ExtVal.getValueType();


  if (!isTypeLegal(VT))

    return false;


  if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {

    if (Ld->isExpandingLoad())

      return false;

  }


  if (Subtarget->hasMVEIntegerOps())

    return true;


  // Don't create a loadext if we can fold the extension into a wide/long

  // instruction.

  // If there's more than one user instruction, the loadext is desirable no

  // matter what.  There can be two uses by the same instruction.

  if (ExtVal->use_empty() ||

      !ExtVal->user_begin()->isOnlyUserOf(ExtVal.getNode()))

    return true;


  SDNode *U = *ExtVal->user_begin();

  if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||

       U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))

    return false;


  return true;

}


bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {

  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())

    return false;


  if (!isTypeLegal(EVT::getEVT(Ty1)))

    return false;


  assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");


  // Assuming the caller doesn't have a zeroext or signext return parameter,

  // truncation all the way down to i1 is valid.

  return true;

}


/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster

/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be

/// expanded to FMAs when this method returns true, otherwise fmuladd is

/// expanded to fmul + fadd.

///

/// ARM supports both fused and unfused multiply-add operations; we already

/// lower a pair of fmul and fadd to the latter so it's not clear that there

/// would be a gain or that the gain would be worthwhile enough to risk

/// correctness bugs.

///

/// For MVE, we set this to true as it helps simplify the need for some

/// patterns (and we don't have the non-fused floating point instruction).

bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,

                                                   EVT VT) const {

  if (Subtarget->useSoftFloat())

    return false;


  if (!VT.isSimple())

    return false;


  switch (VT.getSimpleVT().SimpleTy) {

  case MVT::v4f32:

  case MVT::v8f16:

    return Subtarget->hasMVEFloatOps();

  case MVT::f16:

    return Subtarget->useFPVFMx16();

  case MVT::f32:

    return Subtarget->useFPVFMx();

  case MVT::f64:

    return Subtarget->useFPVFMx64();

  default:

    break;

  }


  return false;

}


static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {

  if (V < 0)

    return false;


  unsigned Scale = 1;

  switch (VT.getSimpleVT().SimpleTy) {

  case MVT::i1:

  case MVT::i8:

    // Scale == 1;

    break;

  case MVT::i16:

    // Scale == 2;

    Scale = 2;

    break;

  default:

    // On thumb1 we load most things (i32, i64, floats, etc) with a LDR

    // Scale == 4;

    Scale = 4;

    break;

  }


  if ((V & (Scale - 1)) != 0)

    return false;

  return isUInt<5>(V / Scale);

}


static bool isLegalT2AddressImmediate(int64_t V, EVT VT,

                                      const ARMSubtarget *Subtarget) {

  if (!VT.isInteger() && !VT.isFloatingPoint())

    return false;

  if (VT.isVector() && Subtarget->hasNEON())

    return false;

  if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&

      !Subtarget->hasMVEFloatOps())

    return false;


  bool IsNeg = false;

  if (V < 0) {

    IsNeg = true;

    V = -V;

  }


  unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);


  // MVE: size * imm7

  if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {

    switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {

    case MVT::i32:

    case MVT::f32:

      return isShiftedUInt<7,2>(V);

    case MVT::i16:

    case MVT::f16:

      return isShiftedUInt<7,1>(V);

    case MVT::i8:

      return isUInt<7>(V);

    default:

      return false;

    }

  }


  // half VLDR: 2 * imm8

  if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())

    return isShiftedUInt<8, 1>(V);

  // VLDR and LDRD: 4 * imm8

  if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)

    return isShiftedUInt<8, 2>(V);


  if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {

    // + imm12 or - imm8

    if (IsNeg)

      return isUInt<8>(V);

    return isUInt<12>(V);

  }


  return false;

}


/// isLegalAddressImmediate - Return true if the integer value can be used

/// as the offset of the target addressing mode for load / store of the

/// given type.


static bool isLegalAddressImmediate(int64_t V, EVT VT,

                                    const ARMSubtarget *Subtarget) {

  if (V == 0)

    return true;


  if (!VT.isSimple())

    return false;


  if (Subtarget->isThumb1Only())

    return isLegalT1AddressImmediate(V, VT);

  else if (Subtarget->isThumb2())

    return isLegalT2AddressImmediate(V, VT, Subtarget);


  // ARM mode.

  if (V < 0)

    V = - V;

  switch (VT.getSimpleVT().SimpleTy) {

  default: return false;

  case MVT::i1:

  case MVT::i8:

  case MVT::i32:

    // +- imm12

    return isUInt<12>(V);

  case MVT::i16:

    // +- imm8

    return isUInt<8>(V);

  case MVT::f32:

  case MVT::f64:

    if (!Subtarget->hasVFP2Base()) // FIXME: NEON?

      return false;

    return isShiftedUInt<8, 2>(V);

  }

}


bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,

                                                      EVT VT) const {

  int Scale = AM.Scale;

  if (Scale < 0)

    return false;


  switch (VT.getSimpleVT().SimpleTy) {

  default: return false;

  case MVT::i1:

  case MVT::i8:

  case MVT::i16:

  case MVT::i32:

    if (Scale == 1)

      return true;

    // r + r << imm

    Scale = Scale & ~1;

    return Scale == 2 || Scale == 4 || Scale == 8;

  case MVT::i64:

    // FIXME: What are we trying to model here? ldrd doesn't have an r + r

    // version in Thumb mode.

    // r + r

    if (Scale == 1)

      return true;

    // r * 2 (this can be lowered to r + r).

    if (!AM.HasBaseReg && Scale == 2)

      return true;

    return false;

  case MVT::isVoid:

    // Note, we allow "void" uses (basically, uses that aren't loads or

    // stores), because arm allows folding a scale into many arithmetic

    // operations.  This should be made more precise and revisited later.


    // Allow r << imm, but the imm has to be a multiple of two.

    if (Scale & 1) return false;

    return isPowerOf2_32(Scale);

  }

}


bool ARMTargetLowering::isLegalT1ScaledAddressingMode(const AddrMode &AM,

                                                      EVT VT) const {

  const int Scale = AM.Scale;


  // Negative scales are not supported in Thumb1.

  if (Scale < 0)

    return false;


  // Thumb1 addressing modes do not support register scaling excepting the

  // following cases:

  // 1. Scale == 1 means no scaling.

  // 2. Scale == 2 this can be lowered to r + r if there is no base register.

  return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);

}


/// isLegalAddressingMode - Return true if the addressing mode represented

/// by AM is legal for this target, for a load/store of the specified type.


bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,

                                              const AddrMode &AM, Type *Ty,

                                              unsigned AS, Instruction *I) const {

  EVT VT = getValueType(DL, Ty, true);

  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))

    return false;


  // Can never fold addr of global into load/store.

  if (AM.BaseGV)

    return false;


  switch (AM.Scale) {

  case 0:  // no scale reg, must be "r+i" or "r", or "i".

    break;

  default:

    // ARM doesn't support any R+R*scale+imm addr modes.

    if (AM.BaseOffs)

      return false;


    if (!VT.isSimple())

      return false;


    if (Subtarget->isThumb1Only())

      return isLegalT1ScaledAddressingMode(AM, VT);


    if (Subtarget->isThumb2())

      return isLegalT2ScaledAddressingMode(AM, VT);


    int Scale = AM.Scale;

    switch (VT.getSimpleVT().SimpleTy) {

    default: return false;

    case MVT::i1:

    case MVT::i8:

    case MVT::i32:

      if (Scale < 0) Scale = -Scale;

      if (Scale == 1)

        return true;

      // r + r << imm

      return isPowerOf2_32(Scale & ~1);

    case MVT::i16:

    case MVT::i64:

      // r +/- r

      if (Scale == 1 || (AM.HasBaseReg && Scale == -1))

        return true;

      // r * 2 (this can be lowered to r + r).

      if (!AM.HasBaseReg && Scale == 2)

        return true;

      return false;


    case MVT::isVoid:

      // Note, we allow "void" uses (basically, uses that aren't loads or

      // stores), because arm allows folding a scale into many arithmetic

      // operations.  This should be made more precise and revisited later.


      // Allow r << imm, but the imm has to be a multiple of two.

      if (Scale & 1) return false;

      return isPowerOf2_32(Scale);

    }

  }

  return true;

}


/// isLegalICmpImmediate - Return true if the specified immediate is legal

/// icmp immediate, that is the target has icmp instructions which can compare

/// a register against the immediate without having to materialize the

/// immediate into a register.


bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {

  // Thumb2 and ARM modes can use cmn for negative immediates.

  if (!Subtarget->isThumb())

    return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||

           ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;

  if (Subtarget->isThumb2())

    return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||

           ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;

  // Thumb1 doesn't have cmn, and only 8-bit immediates.

  return Imm >= 0 && Imm <= 255;

}


/// isLegalAddImmediate - Return true if the specified immediate is a legal add

/// *or sub* immediate, that is the target has add or sub instructions which can

/// add a register with the immediate without having to materialize the

/// immediate into a register.


bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {

  // Same encoding for add/sub, just flip the sign.

  uint64_t AbsImm = AbsoluteValue(Imm);

  if (!Subtarget->isThumb())

    return ARM_AM::getSOImmVal(AbsImm) != -1;

  if (Subtarget->isThumb2())

    return ARM_AM::getT2SOImmVal(AbsImm) != -1;

  // Thumb1 only has 8-bit unsigned immediate.

  return AbsImm <= 255;

}


// Return false to prevent folding

// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,

// if the folding leads to worse code.


bool ARMTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,

                                                    SDValue ConstNode) const {

  // Let the DAGCombiner decide for vector types and large types.

  const EVT VT = AddNode.getValueType();

  if (VT.isVector() || VT.getScalarSizeInBits() > 32)

    return true;


  // It is worse if c0 is legal add immediate, while c1*c0 is not

  // and has to be composed by at least two instructions.

  const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));

  const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);

  const int64_t C0 = C0Node->getSExtValue();

  APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();

  if (!isLegalAddImmediate(C0) || isLegalAddImmediate(CA.getSExtValue()))

    return true;

  if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)

    return false;


  // Default to true and let the DAGCombiner decide.

  return true;

}


static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,

                                      bool isSEXTLoad, SDValue &Base,

                                      SDValue &Offset, bool &isInc,

                                      SelectionDAG &DAG) {

  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)

    return false;


  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {

    // AddressingMode 3

    Base = Ptr->getOperand(0);

    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {

      int RHSC = (int)RHS->getZExtValue();

      if (RHSC < 0 && RHSC > -256) {

        assert(Ptr->getOpcode() == ISD::ADD);

        isInc = false;

        Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));

        return true;

      }

    }

    isInc = (Ptr->getOpcode() == ISD::ADD);

    Offset = Ptr->getOperand(1);

    return true;

  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {

    // AddressingMode 2

    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {

      int RHSC = (int)RHS->getZExtValue();

      if (RHSC < 0 && RHSC > -0x1000) {

        assert(Ptr->getOpcode() == ISD::ADD);

        isInc = false;

        Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));

        Base = Ptr->getOperand(0);

        return true;

      }

    }


    if (Ptr->getOpcode() == ISD::ADD) {

      isInc = true;

      ARM_AM::ShiftOpc ShOpcVal=

        ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());

      if (ShOpcVal != ARM_AM::no_shift) {

        Base = Ptr->getOperand(1);

        Offset = Ptr->getOperand(0);

      } else {

        Base = Ptr->getOperand(0);

        Offset = Ptr->getOperand(1);

      }

      return true;

    }


    isInc = (Ptr->getOpcode() == ISD::ADD);

    Base = Ptr->getOperand(0);

    Offset = Ptr->getOperand(1);

    return true;

  }


  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.

  return false;

}


static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,

                                     bool isSEXTLoad, SDValue &Base,

                                     SDValue &Offset, bool &isInc,

                                     SelectionDAG &DAG) {

  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)

    return false;


  Base = Ptr->getOperand(0);

  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {

    int RHSC = (int)RHS->getZExtValue();

    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.

      assert(Ptr->getOpcode() == ISD::ADD);

      isInc = false;

      Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));

      return true;

    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.

      isInc = Ptr->getOpcode() == ISD::ADD;

      Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));

      return true;

    }

  }


  return false;

}


static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,

                                      bool isSEXTLoad, bool IsMasked, bool isLE,

                                      SDValue &Base, SDValue &Offset,

                                      bool &isInc, SelectionDAG &DAG) {

  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)

    return false;

  if (!isa<ConstantSDNode>(Ptr->getOperand(1)))

    return false;


  // We allow LE non-masked loads to change the type (for example use a vldrb.8

  // as opposed to a vldrw.32). This can allow extra addressing modes or

  // alignments for what is otherwise an equivalent instruction.

  bool CanChangeType = isLE && !IsMasked;


  ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));

  int RHSC = (int)RHS->getZExtValue();


  auto IsInRange = [&](int RHSC, int Limit, int Scale) {

    if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {

      assert(Ptr->getOpcode() == ISD::ADD);

      isInc = false;

      Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));

      return true;

    } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {

      isInc = Ptr->getOpcode() == ISD::ADD;

      Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));

      return true;

    }

    return false;

  };


  // Try to find a matching instruction based on s/zext, Alignment, Offset and

  // (in BE/masked) type.

  Base = Ptr->getOperand(0);

  if (VT == MVT::v4i16) {

    if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))

      return true;

  } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {

    if (IsInRange(RHSC, 0x80, 1))

      return true;

  } else if (Alignment >= 4 &&

             (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&

             IsInRange(RHSC, 0x80, 4))

    return true;

  else if (Alignment >= 2 &&

           (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&

           IsInRange(RHSC, 0x80, 2))

    return true;

  else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))

    return true;

  return false;

}


/// getPreIndexedAddressParts - returns true by value, base pointer and

/// offset pointer and addressing mode by reference if the node's address

/// can be legally represented as pre-indexed load / store address.

bool


ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,

                                             SDValue &Offset,

                                             ISD::MemIndexedMode &AM,

                                             SelectionDAG &DAG) const {

  if (Subtarget->isThumb1Only())

    return false;


  EVT VT;

  SDValue Ptr;

  Align Alignment;

  bool isSEXTLoad = false;

  bool IsMasked = false;

  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

    Ptr = LD->getBasePtr();

    VT = LD->getMemoryVT();

    Alignment = LD->getAlign();

    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;

  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

    Ptr = ST->getBasePtr();

    VT = ST->getMemoryVT();

    Alignment = ST->getAlign();

  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {

    Ptr = LD->getBasePtr();

    VT = LD->getMemoryVT();

    Alignment = LD->getAlign();

    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;

    IsMasked = true;

  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {

    Ptr = ST->getBasePtr();

    VT = ST->getMemoryVT();

    Alignment = ST->getAlign();

    IsMasked = true;

  } else

    return false;


  bool isInc;

  bool isLegal = false;

  if (VT.isVector())

    isLegal = Subtarget->hasMVEIntegerOps() &&

              getMVEIndexedAddressParts(

                  Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,

                  Subtarget->isLittle(), Base, Offset, isInc, DAG);

  else {

    if (Subtarget->isThumb2())

      isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,

                                         Offset, isInc, DAG);

    else

      isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,

                                          Offset, isInc, DAG);

  }

  if (!isLegal)

    return false;


  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;

  return true;

}


/// getPostIndexedAddressParts - returns true by value, base pointer and

/// offset pointer and addressing mode by reference if this node can be

/// combined with a load / store to form a post-indexed load / store.


bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,

                                                   SDValue &Base,

                                                   SDValue &Offset,

                                                   ISD::MemIndexedMode &AM,

                                                   SelectionDAG &DAG) const {

  EVT VT;

  SDValue Ptr;

  Align Alignment;

  bool isSEXTLoad = false, isNonExt;

  bool IsMasked = false;

  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {

    VT = LD->getMemoryVT();

    Ptr = LD->getBasePtr();

    Alignment = LD->getAlign();

    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;

    isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;

  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {

    VT = ST->getMemoryVT();

    Ptr = ST->getBasePtr();

    Alignment = ST->getAlign();

    isNonExt = !ST->isTruncatingStore();

  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {

    VT = LD->getMemoryVT();

    Ptr = LD->getBasePtr();

    Alignment = LD->getAlign();

    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;

    isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;

    IsMasked = true;

  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {

    VT = ST->getMemoryVT();

    Ptr = ST->getBasePtr();

    Alignment = ST->getAlign();

    isNonExt = !ST->isTruncatingStore();

    IsMasked = true;

  } else

    return false;


  if (Subtarget->isThumb1Only()) {

    // Thumb-1 can do a limited post-inc load or store as an updating LDM. It

    // must be non-extending/truncating, i32, with an offset of 4.

    assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");

    if (Op->getOpcode() != ISD::ADD || !isNonExt)

      return false;

    auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));

    if (!RHS || RHS->getZExtValue() != 4)

      return false;

    if (Alignment < Align(4))

      return false;


    Offset = Op->getOperand(1);

    Base = Op->getOperand(0);

    AM = ISD::POST_INC;

    return true;

  }


  bool isInc;

  bool isLegal = false;

  if (VT.isVector())

    isLegal = Subtarget->hasMVEIntegerOps() &&

              getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,

                                        Subtarget->isLittle(), Base, Offset,

                                        isInc, DAG);

  else {

    if (Subtarget->isThumb2())

      isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,

                                         isInc, DAG);

    else

      isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,

                                          isInc, DAG);

  }

  if (!isLegal)

    return false;


  if (Ptr != Base) {

    // Swap base ptr and offset to catch more post-index load / store when

    // it's legal. In Thumb2 mode, offset must be an immediate.

    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&

        !Subtarget->isThumb2())

      std::swap(Base, Offset);


    // Post-indexed load / store update the base pointer.

    if (Ptr != Base)

      return false;

  }


  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;

  return true;

}


void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,

                                                      KnownBits &Known,

                                                      const APInt &DemandedElts,

                                                      const SelectionDAG &DAG,

                                                      unsigned Depth) const {

  unsigned BitWidth = Known.getBitWidth();

  Known.resetAll();

  switch (Op.getOpcode()) {

  default: break;

  case ARMISD::ADDC:

  case ARMISD::ADDE:

  case ARMISD::SUBC:

  case ARMISD::SUBE:

    // Special cases when we convert a carry to a boolean.

    if (Op.getResNo() == 0) {

      SDValue LHS = Op.getOperand(0);

      SDValue RHS = Op.getOperand(1);

      // (ADDE 0, 0, C) will give us a single bit.

      if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&

          isNullConstant(RHS)) {

        Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);

        return;

      }

    }

    break;

  case ARMISD::CMOV: {

    // Bits are known zero/one if known on the LHS and RHS.

    Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);

    if (Known.isUnknown())

      return;


    KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);

    Known = Known.intersectWith(KnownRHS);

    return;

  }

  case ISD::INTRINSIC_W_CHAIN: {

    Intrinsic::ID IntID =

        static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));

    switch (IntID) {

    default: return;

    case Intrinsic::arm_ldaex:

    case Intrinsic::arm_ldrex: {

      EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();

      unsigned MemBits = VT.getScalarSizeInBits();

      Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);

      return;

    }

    }

  }

  case ARMISD::BFI: {

    // Conservatively, we can recurse down the first operand

    // and just mask out all affected bits.

    Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);


    // The operand to BFI is already a mask suitable for removing the bits it

    // sets.

    const APInt &Mask = Op.getConstantOperandAPInt(2);

    Known.Zero &= Mask;

    Known.One &= Mask;

    return;

  }

  case ARMISD::VGETLANEs:

  case ARMISD::VGETLANEu: {

    const SDValue &SrcSV = Op.getOperand(0);

    EVT VecVT = SrcSV.getValueType();

    assert(VecVT.isVector() && "VGETLANE expected a vector type");

    const unsigned NumSrcElts = VecVT.getVectorNumElements();

    ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());

    assert(Pos->getAPIntValue().ult(NumSrcElts) &&

           "VGETLANE index out of bounds");

    unsigned Idx = Pos->getZExtValue();

    APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);

    Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);


    EVT VT = Op.getValueType();

    const unsigned DstSz = VT.getScalarSizeInBits();

    const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();

    (void)SrcSz;

    assert(SrcSz == Known.getBitWidth());

    assert(DstSz > SrcSz);

    if (Op.getOpcode() == ARMISD::VGETLANEs)

      Known = Known.sext(DstSz);

    else {

      Known = Known.zext(DstSz);

    }

    assert(DstSz == Known.getBitWidth());

    break;

  }

  case ARMISD::VMOVrh: {

    KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);

    assert(KnownOp.getBitWidth() == 16);

    Known = KnownOp.zext(32);

    break;

  }

  case ARMISD::CSINC:

  case ARMISD::CSINV:

  case ARMISD::CSNEG: {

    KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);

    KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);


    // The result is either:

    // CSINC: KnownOp0 or KnownOp1 + 1

    // CSINV: KnownOp0 or ~KnownOp1

    // CSNEG: KnownOp0 or KnownOp1 * -1

    if (Op.getOpcode() == ARMISD::CSINC)

      KnownOp1 =

          KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1)));

    else if (Op.getOpcode() == ARMISD::CSINV)

      std::swap(KnownOp1.Zero, KnownOp1.One);

    else if (Op.getOpcode() == ARMISD::CSNEG)

      KnownOp1 = KnownBits::mul(KnownOp1,

                                KnownBits::makeConstant(APInt::getAllOnes(32)));


    Known = KnownOp0.intersectWith(KnownOp1);

    break;

  }

  case ARMISD::VORRIMM:

  case ARMISD::VBICIMM: {

    unsigned Encoded = Op.getConstantOperandVal(1);

    unsigned DecEltBits = 0;

    uint64_t DecodedVal = ARM_AM::decodeVMOVModImm(Encoded, DecEltBits);


    unsigned EltBits = Op.getScalarValueSizeInBits();

    if (EltBits != DecEltBits) {

      // Be conservative: only update Known when EltBits == DecEltBits.

      // This is believed to always be true for VORRIMM/VBICIMM today, but if

      // that changes in the future, doing nothing here is safer than risking

      // subtle bugs.

      break;

    }


    KnownBits KnownLHS = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);

    bool IsVORR = Op.getOpcode() == ARMISD::VORRIMM;

    APInt Imm(DecEltBits, DecodedVal);


    Known.One = IsVORR ? (KnownLHS.One | Imm) : (KnownLHS.One & ~Imm);

    Known.Zero = IsVORR ? (KnownLHS.Zero & ~Imm) : (KnownLHS.Zero | Imm);

    break;

  }

  }

}


bool ARMTargetLowering::targetShrinkDemandedConstant(

    SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,

    TargetLoweringOpt &TLO) const {

  // Delay optimization, so we don't have to deal with illegal types, or block

  // optimizations.

  if (!TLO.LegalOps)

    return false;


  // Only optimize AND for now.

  if (Op.getOpcode() != ISD::AND)

    return false;


  EVT VT = Op.getValueType();


  // Ignore vectors.

  if (VT.isVector())

    return false;


  assert(VT == MVT::i32 && "Unexpected integer type");


  // Make sure the RHS really is a constant.

  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));

  if (!C)

    return false;


  unsigned Mask = C->getZExtValue();


  unsigned Demanded = DemandedBits.getZExtValue();

  unsigned ShrunkMask = Mask & Demanded;

  unsigned ExpandedMask = Mask | ~Demanded;


  // If the mask is all zeros, let the target-independent code replace the

  // result with zero.

  if (ShrunkMask == 0)

    return false;


  // If the mask is all ones, erase the AND. (Currently, the target-independent

  // code won't do this, so we have to do it explicitly to avoid an infinite

  // loop in obscure cases.)

  if (ExpandedMask == ~0U)

    return TLO.CombineTo(Op, Op.getOperand(0));


  auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {

    return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;

  };

  auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {

    if (NewMask == Mask)

      return true;

    SDLoc DL(Op);

    SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);

    SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);

    return TLO.CombineTo(Op, NewOp);

  };


  // Prefer uxtb mask.

  if (IsLegalMask(0xFF))

    return UseMask(0xFF);


  // Prefer uxth mask.

  if (IsLegalMask(0xFFFF))

    return UseMask(0xFFFF);


  // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.

  // FIXME: Prefer a contiguous sequence of bits for other optimizations.

  if (ShrunkMask < 256)

    return UseMask(ShrunkMask);


  // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.

  // FIXME: Prefer a contiguous sequence of bits for other optimizations.

  if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)

    return UseMask(ExpandedMask);


  // Potential improvements:

  //

  // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.

  // We could try to prefer Thumb1 immediates which can be lowered to a

  // two-instruction sequence.

  // We could try to recognize more legal ARM/Thumb2 immediates here.


  return false;

}


bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(

    SDValue Op, const APInt &OriginalDemandedBits,

    const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,

    unsigned Depth) const {

  unsigned Opc = Op.getOpcode();


  switch (Opc) {

  case ARMISD::ASRL:

  case ARMISD::LSRL: {

    // If this is result 0 and the other result is unused, see if the demand

    // bits allow us to shrink this long shift into a standard small shift in

    // the opposite direction.

    if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&

        isa<ConstantSDNode>(Op->getOperand(2))) {

      unsigned ShAmt = Op->getConstantOperandVal(2);

      if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)

                                                        << (32 - ShAmt)))

        return TLO.CombineTo(

            Op, TLO.DAG.getNode(

                    ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),

                    TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));

    }

    break;

  }

  case ARMISD::VBICIMM: {

    SDValue Op0 = Op.getOperand(0);

    unsigned ModImm = Op.getConstantOperandVal(1);

    unsigned EltBits = 0;

    uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);

    if ((OriginalDemandedBits & Mask) == 0)

      return TLO.CombineTo(Op, Op0);

  }

  }


  return TargetLowering::SimplifyDemandedBitsForTargetNode(

      Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);

}


//===----------------------------------------------------------------------===//

//                           ARM Inline Assembly Support

//===----------------------------------------------------------------------===//


const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {

  // At this point, we have to lower this constraint to something else, so we

  // lower it to an "r" or "w". However, by doing this we will force the result

  // to be in register, while the X constraint is much more permissive.

  //

  // Although we are correct (we are free to emit anything, without

  // constraints), we might break use cases that would expect us to be more

  // efficient and emit something else.

  if (!Subtarget->hasVFP2Base())

    return "r";

  if (ConstraintVT.isFloatingPoint())

    return "w";

  if (ConstraintVT.isVector() && Subtarget->hasNEON() &&

     (ConstraintVT.getSizeInBits() == 64 ||

      ConstraintVT.getSizeInBits() == 128))

    return "w";


  return "r";

}


/// getConstraintType - Given a constraint letter, return the type of

/// constraint it is for this target.

ARMTargetLowering::ConstraintType


ARMTargetLowering::getConstraintType(StringRef Constraint) const {

  unsigned S = Constraint.size();

  if (S == 1) {

    switch (Constraint[0]) {

    default:  break;

    case 'l': return C_RegisterClass;

    case 'w': return C_RegisterClass;

    case 'h': return C_RegisterClass;

    case 'x': return C_RegisterClass;

    case 't': return C_RegisterClass;

    case 'j': return C_Immediate; // Constant for movw.

    // An address with a single base register. Due to the way we

    // currently handle addresses it is the same as an 'r' memory constraint.

    case 'Q': return C_Memory;

    }

  } else if (S == 2) {

    switch (Constraint[0]) {

    default: break;

    case 'T': return C_RegisterClass;

    // All 'U+' constraints are addresses.

    case 'U': return C_Memory;

    }

  }

  return TargetLowering::getConstraintType(Constraint);

}


/// Examine constraint type and operand type and determine a weight value.

/// This object must already have been set up with the operand type

/// and the current alternative constraint selected.

TargetLowering::ConstraintWeight


ARMTargetLowering::getSingleConstraintMatchWeight(

    AsmOperandInfo &info, const char *constraint) const {

  ConstraintWeight weight = CW_Invalid;

  Value *CallOperandVal = info.CallOperandVal;

    // If we don't have a value, we can't do a match,

    // but allow it at the lowest weight.

  if (!CallOperandVal)

    return CW_Default;

  Type *type = CallOperandVal->getType();

  // Look at the constraint type.

  switch (*constraint) {

  default:

    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);

    break;

  case 'l':

    if (type->isIntegerTy()) {

      if (Subtarget->isThumb())

        weight = CW_SpecificReg;

      else

        weight = CW_Register;

    }

    break;

  case 'w':

    if (type->isFloatingPointTy())

      weight = CW_Register;

    break;

  }

  return weight;

}


static bool isIncompatibleReg(const MCPhysReg &PR, MVT VT) {

  if (PR == 0 || VT == MVT::Other)

    return false;

  if (ARM::SPRRegClass.contains(PR))

    return VT != MVT::f32 && VT != MVT::f16 && VT != MVT::i32;

  if (ARM::DPRRegClass.contains(PR))

    return VT != MVT::f64 && !VT.is64BitVector();

  return false;

}


using RCPair = std::pair<unsigned, const TargetRegisterClass *>;


RCPair ARMTargetLowering::getRegForInlineAsmConstraint(

    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {

  switch (Constraint.size()) {

  case 1:

    // GCC ARM Constraint Letters

    switch (Constraint[0]) {

    case 'l': // Low regs or general regs.

      if (Subtarget->isThumb())

        return RCPair(0U, &ARM::tGPRRegClass);

      return RCPair(0U, &ARM::GPRRegClass);

    case 'h': // High regs or no regs.

      if (Subtarget->isThumb())

        return RCPair(0U, &ARM::hGPRRegClass);

      break;

    case 'r':

      if (Subtarget->isThumb1Only())

        return RCPair(0U, &ARM::tGPRRegClass);

      return RCPair(0U, &ARM::GPRRegClass);

    case 'w':

      if (VT == MVT::Other)

        break;

      if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)

        return RCPair(0U, &ARM::SPRRegClass);

      if (VT.getSizeInBits() == 64)

        return RCPair(0U, &ARM::DPRRegClass);

      if (VT.getSizeInBits() == 128)

        return RCPair(0U, &ARM::QPRRegClass);

      break;

    case 'x':

      if (VT == MVT::Other)

        break;

      if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)

        return RCPair(0U, &ARM::SPR_8RegClass);

      if (VT.getSizeInBits() == 64)

        return RCPair(0U, &ARM::DPR_8RegClass);

      if (VT.getSizeInBits() == 128)

        return RCPair(0U, &ARM::QPR_8RegClass);

      break;

    case 't':

      if (VT == MVT::Other)

        break;

      if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)

        return RCPair(0U, &ARM::SPRRegClass);

      if (VT.getSizeInBits() == 64)

        return RCPair(0U, &ARM::DPR_VFP2RegClass);

      if (VT.getSizeInBits() == 128)

        return RCPair(0U, &ARM::QPR_VFP2RegClass);

      break;

    }

    break;


  case 2:

    if (Constraint[0] == 'T') {

      switch (Constraint[1]) {

      default:

        break;

      case 'e':

        return RCPair(0U, &ARM::tGPREvenRegClass);

      case 'o':

        return RCPair(0U, &ARM::tGPROddRegClass);

      }

    }

    break;


  default:

    break;

  }


  if (StringRef("{cc}").equals_insensitive(Constraint))

    return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);


  auto RCP = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

  if (isIncompatibleReg(RCP.first, VT))

    return {0, nullptr};

  return RCP;

}


/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops

/// vector.  If it is invalid, don't add anything to Ops.


void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,

                                                     StringRef Constraint,

                                                     std::vector<SDValue> &Ops,

                                                     SelectionDAG &DAG) const {

  SDValue Result;


  // Currently only support length 1 constraints.

  if (Constraint.size() != 1)

    return;


  char ConstraintLetter = Constraint[0];

  switch (ConstraintLetter) {

  default: break;

  case 'j':

  case 'I': case 'J': case 'K': case 'L':

  case 'M': case 'N': case 'O':

    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);

    if (!C)

      return;


    int64_t CVal64 = C->getSExtValue();

    int CVal = (int) CVal64;

    // None of these constraints allow values larger than 32 bits.  Check

    // that the value fits in an int.

    if (CVal != CVal64)

      return;


    switch (ConstraintLetter) {

      case 'j':

        // Constant suitable for movw, must be between 0 and

        // 65535.

        if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))

          if (CVal >= 0 && CVal <= 65535)

            break;

        return;

      case 'I':

        if (Subtarget->isThumb1Only()) {

          // This must be a constant between 0 and 255, for ADD

          // immediates.

          if (CVal >= 0 && CVal <= 255)

            break;

        } else if (Subtarget->isThumb2()) {

          // A constant that can be used as an immediate value in a

          // data-processing instruction.

          if (ARM_AM::getT2SOImmVal(CVal) != -1)

            break;

        } else {

          // A constant that can be used as an immediate value in a

          // data-processing instruction.

          if (ARM_AM::getSOImmVal(CVal) != -1)

            break;

        }

        return;


      case 'J':

        if (Subtarget->isThumb1Only()) {

          // This must be a constant between -255 and -1, for negated ADD

          // immediates. This can be used in GCC with an "n" modifier that

          // prints the negated value, for use with SUB instructions. It is

          // not useful otherwise but is implemented for compatibility.

          if (CVal >= -255 && CVal <= -1)

            break;

        } else {

          // This must be a constant between -4095 and 4095. This is suitable

          // for use as the immediate offset field in LDR and STR instructions

          // such as LDR r0,[r1,#offset].

          if (CVal >= -4095 && CVal <= 4095)

            break;

        }

        return;


      case 'K':

        if (Subtarget->isThumb1Only()) {

          // A 32-bit value where only one byte has a nonzero value. Exclude

          // zero to match GCC. This constraint is used by GCC internally for

          // constants that can be loaded with a move/shift combination.

          // It is not useful otherwise but is implemented for compatibility.

          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))

            break;

        } else if (Subtarget->isThumb2()) {

          // A constant whose bitwise inverse can be used as an immediate

          // value in a data-processing instruction. This can be used in GCC

          // with a "B" modifier that prints the inverted value, for use with

          // BIC and MVN instructions. It is not useful otherwise but is

          // implemented for compatibility.

          if (ARM_AM::getT2SOImmVal(~CVal) != -1)

            break;

        } else {

          // A constant whose bitwise inverse can be used as an immediate

          // value in a data-processing instruction. This can be used in GCC

          // with a "B" modifier that prints the inverted value, for use with

          // BIC and MVN instructions. It is not useful otherwise but is

          // implemented for compatibility.

          if (ARM_AM::getSOImmVal(~CVal) != -1)

            break;

        }

        return;


      case 'L':

        if (Subtarget->isThumb1Only()) {

          // This must be a constant between -7 and 7,

          // for 3-operand ADD/SUB immediate instructions.

          if (CVal >= -7 && CVal < 7)

            break;

        } else if (Subtarget->isThumb2()) {

          // A constant whose negation can be used as an immediate value in a

          // data-processing instruction. This can be used in GCC with an "n"

          // modifier that prints the negated value, for use with SUB

          // instructions. It is not useful otherwise but is implemented for

          // compatibility.

          if (ARM_AM::getT2SOImmVal(-CVal) != -1)

            break;

        } else {

          // A constant whose negation can be used as an immediate value in a

          // data-processing instruction. This can be used in GCC with an "n"

          // modifier that prints the negated value, for use with SUB

          // instructions. It is not useful otherwise but is implemented for

          // compatibility.

          if (ARM_AM::getSOImmVal(-CVal) != -1)

            break;

        }

        return;


      case 'M':

        if (Subtarget->isThumb1Only()) {

          // This must be a multiple of 4 between 0 and 1020, for

          // ADD sp + immediate.

          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))

            break;

        } else {

          // A power of two or a constant between 0 and 32.  This is used in

          // GCC for the shift amount on shifted register operands, but it is

          // useful in general for any shift amounts.

          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))

            break;

        }

        return;


      case 'N':

        if (Subtarget->isThumb1Only()) {

          // This must be a constant between 0 and 31, for shift amounts.

          if (CVal >= 0 && CVal <= 31)

            break;

        }

        return;


      case 'O':

        if (Subtarget->isThumb1Only()) {

          // This must be a multiple of 4 between -508 and 508, for

          // ADD/SUB sp = sp + immediate.

          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))

            break;

        }

        return;

    }

    Result = DAG.getSignedTargetConstant(CVal, SDLoc(Op), Op.getValueType());

    break;

  }


  if (Result.getNode()) {

    Ops.push_back(Result);

    return;

  }

  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);

}


static RTLIB::Libcall getDivRemLibcall(

    const SDNode *N, MVT::SimpleValueType SVT) {

  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||

          N->getOpcode() == ISD::SREM    || N->getOpcode() == ISD::UREM) &&

         "Unhandled Opcode in getDivRemLibcall");

  bool isSigned = N->getOpcode() == ISD::SDIVREM ||

                  N->getOpcode() == ISD::SREM;

  RTLIB::Libcall LC;

  switch (SVT) {

  default: llvm_unreachable("Unexpected request for libcall!");

  case MVT::i8:  LC = isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;

  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;

  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;

  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;

  }

  return LC;

}


static TargetLowering::ArgListTy getDivRemArgList(

    const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {

  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||

          N->getOpcode() == ISD::SREM    || N->getOpcode() == ISD::UREM) &&

         "Unhandled Opcode in getDivRemArgList");

  bool isSigned = N->getOpcode() == ISD::SDIVREM ||

                  N->getOpcode() == ISD::SREM;

  TargetLowering::ArgListTy Args;

  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

    EVT ArgVT = N->getOperand(i).getValueType();

    Type *ArgTy = ArgVT.getTypeForEVT(*Context);

    TargetLowering::ArgListEntry Entry(N->getOperand(i), ArgTy);

    Entry.IsSExt = isSigned;

    Entry.IsZExt = !isSigned;

    Args.push_back(Entry);

  }

  if (Subtarget->isTargetWindows() && Args.size() >= 2)

    std::swap(Args[0], Args[1]);

  return Args;

}


SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {

  assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||

          Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||

          Subtarget->isTargetFuchsia() || Subtarget->isTargetWindows()) &&

         "Register-based DivRem lowering only");

  unsigned Opcode = Op->getOpcode();

  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&

         "Invalid opcode for Div/Rem lowering");

  bool isSigned = (Opcode == ISD::SDIVREM);

  EVT VT = Op->getValueType(0);

  SDLoc dl(Op);


  if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {

    SmallVector<SDValue> Result;

    if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {

        SDValue Res0 =

            DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);

        SDValue Res1 =

            DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);

        return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),

                           {Res0, Res1});

    }

  }


  Type *Ty = VT.getTypeForEVT(*DAG.getContext());


  // If the target has hardware divide, use divide + multiply + subtract:

  //     div = a / b

  //     rem = a - b * div

  //     return {div, rem}

  // This should be lowered into UDIV/SDIV + MLS later on.

  bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()

                                        : Subtarget->hasDivideInARMMode();

  if (hasDivide && Op->getValueType(0).isSimple() &&

      Op->getSimpleValueType(0) == MVT::i32) {

    unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;

    const SDValue Dividend = Op->getOperand(0);

    const SDValue Divisor = Op->getOperand(1);

    SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);

    SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);

    SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);


    SDValue Values[2] = {Div, Rem};

    return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);

  }


  RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),

                                       VT.getSimpleVT().SimpleTy);

  SDValue InChain = DAG.getEntryNode();


  TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),

                                                    DAG.getContext(),

                                                    Subtarget);


  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),

                                         getPointerTy(DAG.getDataLayout()));


  Type *RetTy = StructType::get(Ty, Ty);


  if (Subtarget->isTargetWindows())

    InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);


  TargetLowering::CallLoweringInfo CLI(DAG);

  CLI.setDebugLoc(dl).setChain(InChain)

    .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))

    .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);


  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);

  return CallInfo.first;

}


// Lowers REM using divmod helpers

// see RTABI section 4.2/4.3

SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {

  EVT VT = N->getValueType(0);


  if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {

    SmallVector<SDValue> Result;

    if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))

        return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),

                           Result[0], Result[1]);

  }


  // Build return types (div and rem)

  std::vector<Type*> RetTyParams;

  Type *RetTyElement;


  switch (VT.getSimpleVT().SimpleTy) {

  default: llvm_unreachable("Unexpected request for libcall!");

  case MVT::i8:   RetTyElement = Type::getInt8Ty(*DAG.getContext());  break;

  case MVT::i16:  RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;

  case MVT::i32:  RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;

  case MVT::i64:  RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;

  }


  RetTyParams.push_back(RetTyElement);

  RetTyParams.push_back(RetTyElement);

  ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);

  Type *RetTy = StructType::get(*DAG.getContext(), ret);


  RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().

                                                             SimpleTy);

  SDValue InChain = DAG.getEntryNode();

  TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(),

                                                    Subtarget);

  bool isSigned = N->getOpcode() == ISD::SREM;

  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),

                                         getPointerTy(DAG.getDataLayout()));


  if (Subtarget->isTargetWindows())

    InChain = WinDBZCheckDenominator(DAG, N, InChain);


  // Lower call

  CallLoweringInfo CLI(DAG);

  CLI.setChain(InChain)

     .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))

     .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));

  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);


  // Return second (rem) result operand (first contains div)

  SDNode *ResNode = CallResult.first.getNode();

  assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");

  return ResNode->getOperand(1);

}


SDValue

ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {

  assert(Subtarget->isTargetWindows() && "unsupported target platform");

  SDLoc DL(Op);


  // Get the inputs.

  SDValue Chain = Op.getOperand(0);

  SDValue Size  = Op.getOperand(1);


  if (DAG.getMachineFunction().getFunction().hasFnAttribute(

          "no-stack-arg-probe")) {

    MaybeAlign Align =

        cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();

    SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);

    Chain = SP.getValue(1);

    SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);

    if (Align)

      SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),

                       DAG.getSignedConstant(-Align->value(), DL, MVT::i32));

    Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);

    SDValue Ops[2] = { SP, Chain };

    return DAG.getMergeValues(Ops, DL);

  }


  SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,

                              DAG.getConstant(2, DL, MVT::i32));


  SDValue Glue;

  Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);

  Glue = Chain.getValue(1);


  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

  Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);


  SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);

  Chain = NewSP.getValue(1);


  SDValue Ops[2] = { NewSP, Chain };

  return DAG.getMergeValues(Ops, DL);

}


SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {

  bool IsStrict = Op->isStrictFPOpcode();

  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);

  const unsigned DstSz = Op.getValueType().getSizeInBits();

  const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();

  assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&

         "Unexpected type for custom-lowering FP_EXTEND");


  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&

         "With both FP DP and 16, any FP conversion is legal!");


  assert(!(DstSz == 32 && Subtarget->hasFP16()) &&

         "With FP16, 16 to 32 conversion is legal!");


  // Converting from 32 -> 64 is valid if we have FP64.

  if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {

    // FIXME: Remove this when we have strict fp instruction selection patterns

    if (IsStrict) {

      SDLoc Loc(Op);

      SDValue Result = DAG.getNode(ISD::FP_EXTEND,

                                   Loc, Op.getValueType(), SrcVal);

      return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);

    }

    return Op;

  }


  // Either we are converting from 16 -> 64, without FP16 and/or

  // FP.double-precision or without Armv8-fp. So we must do it in two

  // steps.

  // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32

  // without FP16. So we must do a function call.

  SDLoc Loc(Op);

  RTLIB::Libcall LC;

  MakeLibCallOptions CallOptions;

  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();

  for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {

    bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());

    MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);

    MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);

    if (Supported) {

      if (IsStrict) {

        SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,

                             {DstVT, MVT::Other}, {Chain, SrcVal});

        Chain = SrcVal.getValue(1);

      } else {

        SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);

      }

    } else {

      LC = RTLIB::getFPEXT(SrcVT, DstVT);

      assert(LC != RTLIB::UNKNOWN_LIBCALL &&

             "Unexpected type for custom-lowering FP_EXTEND");

      std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,

                                            Loc, Chain);

    }

  }


  return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;

}


SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {

  bool IsStrict = Op->isStrictFPOpcode();


  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);

  EVT SrcVT = SrcVal.getValueType();

  EVT DstVT = Op.getValueType();

  const unsigned DstSz = Op.getValueType().getSizeInBits();

  const unsigned SrcSz = SrcVT.getSizeInBits();

  (void)DstSz;

  assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&

         "Unexpected type for custom-lowering FP_ROUND");


  assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&

         "With both FP DP and 16, any FP conversion is legal!");


  SDLoc Loc(Op);


  // Instruction from 32 -> 16 if hasFP16 is valid

  if (SrcSz == 32 && Subtarget->hasFP16())

    return Op;


  // Lib call from 32 -> 16 / 64 -> [32, 16]

  RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);

  assert(LC != RTLIB::UNKNOWN_LIBCALL &&

         "Unexpected type for custom-lowering FP_ROUND");

  MakeLibCallOptions CallOptions;

  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();

  SDValue Result;

  std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,

                                        Loc, Chain);

  return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;

}


bool


ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {

  // The ARM target isn't yet aware of offsets.

  return false;

}


bool ARM::isBitFieldInvertedMask(unsigned v) {

  if (v == 0xffffffff)

    return false;


  // there can be 1's on either or both "outsides", all the "inside"

  // bits must be 0's

  return isShiftedMask_32(~v);

}


/// isFPImmLegal - Returns true if the target can instruction select the

/// specified FP immediate natively. If false, the legalizer will

/// materialize the FP immediate as a load from a constant pool.


bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,

                                     bool ForCodeSize) const {

  if (!Subtarget->hasVFP3Base())

    return false;

  if (VT == MVT::f16 && Subtarget->hasFullFP16())

    return ARM_AM::getFP16Imm(Imm) != -1;

  if (VT == MVT::f32 && Subtarget->hasFullFP16() &&

      ARM_AM::getFP32FP16Imm(Imm) != -1)

    return true;

  if (VT == MVT::f32)

    return ARM_AM::getFP32Imm(Imm) != -1;

  if (VT == MVT::f64 && Subtarget->hasFP64())

    return ARM_AM::getFP64Imm(Imm) != -1;

  return false;

}


/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as

/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment

/// specified in the intrinsic calls.


bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

                                           const CallInst &I,

                                           MachineFunction &MF,

                                           unsigned Intrinsic) const {

  switch (Intrinsic) {

  case Intrinsic::arm_neon_vld1:

  case Intrinsic::arm_neon_vld2:

  case Intrinsic::arm_neon_vld3:

  case Intrinsic::arm_neon_vld4:

  case Intrinsic::arm_neon_vld2lane:

  case Intrinsic::arm_neon_vld3lane:

  case Intrinsic::arm_neon_vld4lane:

  case Intrinsic::arm_neon_vld2dup:

  case Intrinsic::arm_neon_vld3dup:

  case Intrinsic::arm_neon_vld4dup: {

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    // Conservatively set memVT to the entire set of vectors loaded.

    auto &DL = I.getDataLayout();

    uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;

    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);

    Info.ptrVal = I.getArgOperand(0);

    Info.offset = 0;

    Value *AlignArg = I.getArgOperand(I.arg_size() - 1);

    Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();

    // volatile loads with NEON intrinsics not supported

    Info.flags = MachineMemOperand::MOLoad;

    return true;

  }

  case Intrinsic::arm_neon_vld1x2:

  case Intrinsic::arm_neon_vld1x3:

  case Intrinsic::arm_neon_vld1x4: {

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    // Conservatively set memVT to the entire set of vectors loaded.

    auto &DL = I.getDataLayout();

    uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;

    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);

    Info.ptrVal = I.getArgOperand(I.arg_size() - 1);

    Info.offset = 0;

    Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne();

    // volatile loads with NEON intrinsics not supported

    Info.flags = MachineMemOperand::MOLoad;

    return true;

  }

  case Intrinsic::arm_neon_vst1:

  case Intrinsic::arm_neon_vst2:

  case Intrinsic::arm_neon_vst3:

  case Intrinsic::arm_neon_vst4:

  case Intrinsic::arm_neon_vst2lane:

  case Intrinsic::arm_neon_vst3lane:

  case Intrinsic::arm_neon_vst4lane: {

    Info.opc = ISD::INTRINSIC_VOID;

    // Conservatively set memVT to the entire set of vectors stored.

    auto &DL = I.getDataLayout();

    unsigned NumElts = 0;

    for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {

      Type *ArgTy = I.getArgOperand(ArgI)->getType();

      if (!ArgTy->isVectorTy())

        break;

      NumElts += DL.getTypeSizeInBits(ArgTy) / 64;

    }

    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);

    Info.ptrVal = I.getArgOperand(0);

    Info.offset = 0;

    Value *AlignArg = I.getArgOperand(I.arg_size() - 1);

    Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();

    // volatile stores with NEON intrinsics not supported

    Info.flags = MachineMemOperand::MOStore;

    return true;

  }

  case Intrinsic::arm_neon_vst1x2:

  case Intrinsic::arm_neon_vst1x3:

  case Intrinsic::arm_neon_vst1x4: {

    Info.opc = ISD::INTRINSIC_VOID;

    // Conservatively set memVT to the entire set of vectors stored.

    auto &DL = I.getDataLayout();

    unsigned NumElts = 0;

    for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {

      Type *ArgTy = I.getArgOperand(ArgI)->getType();

      if (!ArgTy->isVectorTy())

        break;

      NumElts += DL.getTypeSizeInBits(ArgTy) / 64;

    }

    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);

    Info.ptrVal = I.getArgOperand(0);

    Info.offset = 0;

    Info.align = I.getParamAlign(0).valueOrOne();

    // volatile stores with NEON intrinsics not supported

    Info.flags = MachineMemOperand::MOStore;

    return true;

  }

  case Intrinsic::arm_mve_vld2q:

  case Intrinsic::arm_mve_vld4q: {

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    // Conservatively set memVT to the entire set of vectors loaded.

    Type *VecTy = cast<StructType>(I.getType())->getElementType(1);

    unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;

    Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);

    Info.ptrVal = I.getArgOperand(0);

    Info.offset = 0;

    Info.align = Align(VecTy->getScalarSizeInBits() / 8);

    // volatile loads with MVE intrinsics not supported

    Info.flags = MachineMemOperand::MOLoad;

    return true;

  }

  case Intrinsic::arm_mve_vst2q:

  case Intrinsic::arm_mve_vst4q: {

    Info.opc = ISD::INTRINSIC_VOID;

    // Conservatively set memVT to the entire set of vectors stored.

    Type *VecTy = I.getArgOperand(1)->getType();

    unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;

    Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);

    Info.ptrVal = I.getArgOperand(0);

    Info.offset = 0;

    Info.align = Align(VecTy->getScalarSizeInBits() / 8);

    // volatile stores with MVE intrinsics not supported

    Info.flags = MachineMemOperand::MOStore;

    return true;

  }

  case Intrinsic::arm_mve_vldr_gather_base:

  case Intrinsic::arm_mve_vldr_gather_base_predicated: {

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    Info.ptrVal = nullptr;

    Info.memVT = MVT::getVT(I.getType());

    Info.align = Align(1);

    Info.flags |= MachineMemOperand::MOLoad;

    return true;

  }

  case Intrinsic::arm_mve_vldr_gather_base_wb:

  case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    Info.ptrVal = nullptr;

    Info.memVT = MVT::getVT(I.getType()->getContainedType(0));

    Info.align = Align(1);

    Info.flags |= MachineMemOperand::MOLoad;

    return true;

  }

  case Intrinsic::arm_mve_vldr_gather_offset:

  case Intrinsic::arm_mve_vldr_gather_offset_predicated: {

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    Info.ptrVal = nullptr;

    MVT DataVT = MVT::getVT(I.getType());

    unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();

    Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),

                                  DataVT.getVectorNumElements());

    Info.align = Align(1);

    Info.flags |= MachineMemOperand::MOLoad;

    return true;

  }

  case Intrinsic::arm_mve_vstr_scatter_base:

  case Intrinsic::arm_mve_vstr_scatter_base_predicated: {

    Info.opc = ISD::INTRINSIC_VOID;

    Info.ptrVal = nullptr;

    Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());

    Info.align = Align(1);

    Info.flags |= MachineMemOperand::MOStore;

    return true;

  }

  case Intrinsic::arm_mve_vstr_scatter_base_wb:

  case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    Info.ptrVal = nullptr;

    Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());

    Info.align = Align(1);

    Info.flags |= MachineMemOperand::MOStore;

    return true;

  }

  case Intrinsic::arm_mve_vstr_scatter_offset:

  case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {

    Info.opc = ISD::INTRINSIC_VOID;

    Info.ptrVal = nullptr;

    MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());

    unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();

    Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),

                                  DataVT.getVectorNumElements());

    Info.align = Align(1);

    Info.flags |= MachineMemOperand::MOStore;

    return true;

  }

  case Intrinsic::arm_ldaex:

  case Intrinsic::arm_ldrex: {

    auto &DL = I.getDataLayout();

    Type *ValTy = I.getParamElementType(0);

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    Info.memVT = MVT::getVT(ValTy);

    Info.ptrVal = I.getArgOperand(0);

    Info.offset = 0;

    Info.align = DL.getABITypeAlign(ValTy);

    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;

    return true;

  }

  case Intrinsic::arm_stlex:

  case Intrinsic::arm_strex: {

    auto &DL = I.getDataLayout();

    Type *ValTy = I.getParamElementType(1);

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    Info.memVT = MVT::getVT(ValTy);

    Info.ptrVal = I.getArgOperand(1);

    Info.offset = 0;

    Info.align = DL.getABITypeAlign(ValTy);

    Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;

    return true;

  }

  case Intrinsic::arm_stlexd:

  case Intrinsic::arm_strexd:

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    Info.memVT = MVT::i64;

    Info.ptrVal = I.getArgOperand(2);

    Info.offset = 0;

    Info.align = Align(8);

    Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;

    return true;


  case Intrinsic::arm_ldaexd:

  case Intrinsic::arm_ldrexd:

    Info.opc = ISD::INTRINSIC_W_CHAIN;

    Info.memVT = MVT::i64;

    Info.ptrVal = I.getArgOperand(0);

    Info.offset = 0;

    Info.align = Align(8);

    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;

    return true;


  default:

    break;

  }


  return false;

}


/// Returns true if it is beneficial to convert a load of a constant

/// to just the constant itself.


bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,

                                                          Type *Ty) const {

  assert(Ty->isIntegerTy());


  unsigned Bits = Ty->getPrimitiveSizeInBits();

  if (Bits == 0 || Bits > 32)

    return false;

  return true;

}


bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,

                                                unsigned Index) const {

  if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))

    return false;


  return (Index == 0 || Index == ResVT.getVectorNumElements());

}


Instruction *ARMTargetLowering::makeDMB(IRBuilderBase &Builder,

                                        ARM_MB::MemBOpt Domain) const {

  // First, if the target has no DMB, see what fallback we can use.

  if (!Subtarget->hasDataBarrier()) {

    // Some ARMv6 cpus can support data barriers with an mcr instruction.

    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get

    // here.

    if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {

      Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),

                        Builder.getInt32(0), Builder.getInt32(7),

                        Builder.getInt32(10), Builder.getInt32(5)};

      return Builder.CreateIntrinsic(Intrinsic::arm_mcr, args);

    } else {

      // Instead of using barriers, atomic accesses on these subtargets use

      // libcalls.

      llvm_unreachable("makeDMB on a target so old that it has no barriers");

    }

  } else {

    // Only a full system barrier exists in the M-class architectures.

    Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;

    Constant *CDomain = Builder.getInt32(Domain);

    return Builder.CreateIntrinsic(Intrinsic::arm_dmb, CDomain);

  }

}


// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html


Instruction *ARMTargetLowering::emitLeadingFence(IRBuilderBase &Builder,

                                                 Instruction *Inst,

                                                 AtomicOrdering Ord) const {

  switch (Ord) {

  case AtomicOrdering::NotAtomic:

  case AtomicOrdering::Unordered:

    llvm_unreachable("Invalid fence: unordered/non-atomic");

  case AtomicOrdering::Monotonic:

  case AtomicOrdering::Acquire:

    return nullptr; // Nothing to do

  case AtomicOrdering::SequentiallyConsistent:

    if (!Inst->hasAtomicStore())

      return nullptr; // Nothing to do

    [[fallthrough]];

  case AtomicOrdering::Release:

  case AtomicOrdering::AcquireRelease:

    if (Subtarget->preferISHSTBarriers())

      return makeDMB(Builder, ARM_MB::ISHST);

    // FIXME: add a comment with a link to documentation justifying this.

    else

      return makeDMB(Builder, ARM_MB::ISH);

  }

  llvm_unreachable("Unknown fence ordering in emitLeadingFence");

}


Instruction *ARMTargetLowering::emitTrailingFence(IRBuilderBase &Builder,

                                                  Instruction *Inst,

                                                  AtomicOrdering Ord) const {

  switch (Ord) {

  case AtomicOrdering::NotAtomic:

  case AtomicOrdering::Unordered:

    llvm_unreachable("Invalid fence: unordered/not-atomic");

  case AtomicOrdering::Monotonic:

  case AtomicOrdering::Release:

    return nullptr; // Nothing to do

  case AtomicOrdering::Acquire:

  case AtomicOrdering::AcquireRelease:

  case AtomicOrdering::SequentiallyConsistent:

    return makeDMB(Builder, ARM_MB::ISH);

  }

  llvm_unreachable("Unknown fence ordering in emitTrailingFence");

}


// Loads and stores less than 64-bits are already atomic; ones above that

// are doomed anyway, so defer to the default libcall and blame the OS when

// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit

// anything for those.

TargetLoweringBase::AtomicExpansionKind


ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {

  bool has64BitAtomicStore;

  if (Subtarget->isMClass())

    has64BitAtomicStore = false;

  else if (Subtarget->isThumb())

    has64BitAtomicStore = Subtarget->hasV7Ops();

  else

    has64BitAtomicStore = Subtarget->hasV6Ops();


  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();

  return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand

                                           : AtomicExpansionKind::None;

}


// Loads and stores less than 64-bits are already atomic; ones above that

// are doomed anyway, so defer to the default libcall and blame the OS when

// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit

// anything for those.

// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that

// guarantee, see DDI0406C ARM architecture reference manual,

// sections A8.8.72-74 LDRD)

TargetLowering::AtomicExpansionKind


ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {

  bool has64BitAtomicLoad;

  if (Subtarget->isMClass())

    has64BitAtomicLoad = false;

  else if (Subtarget->isThumb())

    has64BitAtomicLoad = Subtarget->hasV7Ops();

  else

    has64BitAtomicLoad = Subtarget->hasV6Ops();


  unsigned Size = LI->getType()->getPrimitiveSizeInBits();

  return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly

                                            : AtomicExpansionKind::None;

}


// For the real atomic operations, we have ldrex/strex up to 32 bits,

// and up to 64 bits on the non-M profiles

TargetLowering::AtomicExpansionKind


ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {

  if (AI->isFloatingPointOperation())

    return AtomicExpansionKind::CmpXChg;


  unsigned Size = AI->getType()->getPrimitiveSizeInBits();

  bool hasAtomicRMW;

  if (Subtarget->isMClass())

    hasAtomicRMW = Subtarget->hasV8MBaselineOps();

  else if (Subtarget->isThumb())

    hasAtomicRMW = Subtarget->hasV7Ops();

  else

    hasAtomicRMW = Subtarget->hasV6Ops();

  if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {

    // At -O0, fast-regalloc cannot cope with the live vregs necessary to

    // implement atomicrmw without spilling. If the target address is also on

    // the stack and close enough to the spill slot, this can lead to a

    // situation where the monitor always gets cleared and the atomic operation

    // can never succeed. So at -O0 lower this operation to a CAS loop.

    if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)

      return AtomicExpansionKind::CmpXChg;

    return AtomicExpansionKind::LLSC;

  }

  return AtomicExpansionKind::None;

}


// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used  up to 32

// bits, and up to 64 bits on the non-M profiles.

TargetLowering::AtomicExpansionKind


ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {

  // At -O0, fast-regalloc cannot cope with the live vregs necessary to

  // implement cmpxchg without spilling. If the address being exchanged is also

  // on the stack and close enough to the spill slot, this can lead to a

  // situation where the monitor always gets cleared and the atomic operation

  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.

  unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();

  bool HasAtomicCmpXchg;

  if (Subtarget->isMClass())

    HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();

  else if (Subtarget->isThumb())

    HasAtomicCmpXchg = Subtarget->hasV7Ops();

  else

    HasAtomicCmpXchg = Subtarget->hasV6Ops();

  if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&

      HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))

    return AtomicExpansionKind::LLSC;

  return AtomicExpansionKind::None;

}


bool ARMTargetLowering::shouldInsertFencesForAtomic(

    const Instruction *I) const {

  return InsertFencesForAtomic;

}


bool ARMTargetLowering::useLoadStackGuardNode(const Module &M) const {

  // ROPI/RWPI are not supported currently.

  return !Subtarget->isROPI() && !Subtarget->isRWPI();

}


void ARMTargetLowering::insertSSPDeclarations(Module &M) const {

  // MSVC CRT provides functionalities for stack protection.

  RTLIB::LibcallImpl SecurityCheckCookieLibcall =

      getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);


  RTLIB::LibcallImpl SecurityCookieVar =

      getLibcallImpl(RTLIB::STACK_CHECK_GUARD);

  if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&

      SecurityCookieVar != RTLIB::Unsupported) {

    // MSVC CRT has a global variable holding security cookie.

    M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),

                        PointerType::getUnqual(M.getContext()));


    // MSVC CRT has a function to validate security cookie.

    FunctionCallee SecurityCheckCookie =

        M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),

                              Type::getVoidTy(M.getContext()),

                              PointerType::getUnqual(M.getContext()));

    if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))

      F->addParamAttr(0, Attribute::AttrKind::InReg);

  }


  TargetLowering::insertSSPDeclarations(M);

}


bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,

                                                  unsigned &Cost) const {

  // If we do not have NEON, vector types are not natively supported.

  if (!Subtarget->hasNEON())

    return false;


  // Floating point values and vector values map to the same register file.

  // Therefore, although we could do a store extract of a vector type, this is

  // better to leave at float as we have more freedom in the addressing mode for

  // those.

  if (VectorTy->isFPOrFPVectorTy())

    return false;


  // If the index is unknown at compile time, this is very expensive to lower

  // and it is not possible to combine the store with the extract.

  if (!isa<ConstantInt>(Idx))

    return false;


  assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");

  unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();

  // We can do a store + vector extract on any vector that fits perfectly in a D

  // or Q register.

  if (BitWidth == 64 || BitWidth == 128) {

    Cost = 0;

    return true;

  }

  return false;

}


bool ARMTargetLowering::canCreateUndefOrPoisonForTargetNode(

    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,

    bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {

  unsigned Opcode = Op.getOpcode();

  switch (Opcode) {

  case ARMISD::VORRIMM:

  case ARMISD::VBICIMM:

    return false;

  }

  return TargetLowering::canCreateUndefOrPoisonForTargetNode(

      Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);

}


bool ARMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {

  return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();

}


bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {

  return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();

}


bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial(

    const Instruction &AndI) const {

  if (!Subtarget->hasV7Ops())

    return false;


  // Sink the `and` instruction only if the mask would fit into a modified

  // immediate operand.

  ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));

  if (!Mask || Mask->getValue().getBitWidth() > 32u)

    return false;

  auto MaskVal = unsigned(Mask->getValue().getZExtValue());

  return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)

                                : ARM_AM::getSOImmVal(MaskVal)) != -1;

}


TargetLowering::ShiftLegalizationStrategy


ARMTargetLowering::preferredShiftLegalizationStrategy(

    SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {

  if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())

    return ShiftLegalizationStrategy::LowerToLibcall;

  return TargetLowering::preferredShiftLegalizationStrategy(DAG, N,

                                                            ExpansionFactor);

}


Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,

                                         Value *Addr,

                                         AtomicOrdering Ord) const {

  Module *M = Builder.GetInsertBlock()->getParent()->getParent();

  bool IsAcquire = isAcquireOrStronger(Ord);


  // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd

  // intrinsic must return {i32, i32} and we have to recombine them into a

  // single i64 here.

  if (ValueTy->getPrimitiveSizeInBits() == 64) {

    Intrinsic::ID Int =

        IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;


    Value *LoHi =

        Builder.CreateIntrinsic(Int, Addr, /*FMFSource=*/nullptr, "lohi");


    Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");

    Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");

    if (!Subtarget->isLittle())

      std::swap (Lo, Hi);

    Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");

    Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");

    return Builder.CreateOr(

        Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");

  }


  Type *Tys[] = { Addr->getType() };

  Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;

  CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);


  CI->addParamAttr(

      0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));

  return Builder.CreateTruncOrBitCast(CI, ValueTy);

}


void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(

    IRBuilderBase &Builder) const {

  if (!Subtarget->hasV7Ops())

    return;

  Builder.CreateIntrinsic(Intrinsic::arm_clrex, {});

}


Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder,

                                               Value *Val, Value *Addr,

                                               AtomicOrdering Ord) const {

  Module *M = Builder.GetInsertBlock()->getParent()->getParent();

  bool IsRelease = isReleaseOrStronger(Ord);


  // Since the intrinsics must have legal type, the i64 intrinsics take two

  // parameters: "i32, i32". We must marshal Val into the appropriate form

  // before the call.

  if (Val->getType()->getPrimitiveSizeInBits() == 64) {

    Intrinsic::ID Int =

        IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;

    Type *Int32Ty = Type::getInt32Ty(M->getContext());


    Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");

    Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");

    if (!Subtarget->isLittle())

      std::swap(Lo, Hi);

    return Builder.CreateIntrinsic(Int, {Lo, Hi, Addr});

  }


  Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;

  Type *Tys[] = { Addr->getType() };

  Function *Strex = Intrinsic::getOrInsertDeclaration(M, Int, Tys);


  CallInst *CI = Builder.CreateCall(

      Strex, {Builder.CreateZExtOrBitCast(

                  Val, Strex->getFunctionType()->getParamType(0)),

              Addr});

  CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,

                                     Val->getType()));

  return CI;

}


bool ARMTargetLowering::alignLoopsWithOptSize() const {

  return Subtarget->isMClass();

}


/// A helper function for determining the number of interleaved accesses we

/// will generate when lowering accesses of the given type.

unsigned


ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,

                                             const DataLayout &DL) const {

  return (DL.getTypeSizeInBits(VecTy) + 127) / 128;

}


bool ARMTargetLowering::isLegalInterleavedAccessType(

    unsigned Factor, FixedVectorType *VecTy, Align Alignment,

    const DataLayout &DL) const {


  unsigned VecSize = DL.getTypeSizeInBits(VecTy);

  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());


  if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())

    return false;


  // Ensure the vector doesn't have f16 elements. Even though we could do an

  // i16 vldN, we can't hold the f16 vectors and will end up converting via

  // f32.

  if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())

    return false;

  if (Subtarget->hasMVEIntegerOps() && Factor == 3)

    return false;


  // Ensure the number of vector elements is greater than 1.

  if (VecTy->getNumElements() < 2)

    return false;


  // Ensure the element type is legal.

  if (ElSize != 8 && ElSize != 16 && ElSize != 32)

    return false;

  // And the alignment if high enough under MVE.

  if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)

    return false;


  // Ensure the total vector size is 64 or a multiple of 128. Types larger than

  // 128 will be split into multiple interleaved accesses.

  if (Subtarget->hasNEON() && VecSize == 64)

    return true;

  return VecSize % 128 == 0;

}


unsigned ARMTargetLowering::getMaxSupportedInterleaveFactor() const {

  if (Subtarget->hasNEON())

    return 4;

  if (Subtarget->hasMVEIntegerOps())

    return MVEMaxSupportedInterleaveFactor;

  return TargetLoweringBase::getMaxSupportedInterleaveFactor();

}


/// Lower an interleaved load into a vldN intrinsic.

///

/// E.g. Lower an interleaved load (Factor = 2):

///        %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4

///        %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>  ; Extract even elements

///        %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>  ; Extract odd elements

///

///      Into:

///        %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)

///        %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0

///        %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1


bool ARMTargetLowering::lowerInterleavedLoad(

    Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles,

    ArrayRef<unsigned> Indices, unsigned Factor, const APInt &GapMask) const {

  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&

         "Invalid interleave factor");

  assert(!Shuffles.empty() && "Empty shufflevector input");

  assert(Shuffles.size() == Indices.size() &&

         "Unmatched number of shufflevectors and indices");


  auto *LI = dyn_cast<LoadInst>(Load);

  if (!LI)

    return false;

  assert(!Mask && GapMask.popcount() == Factor && "Unexpected mask on a load");


  auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());

  Type *EltTy = VecTy->getElementType();


  const DataLayout &DL = LI->getDataLayout();

  Align Alignment = LI->getAlign();


  // Skip if we do not have NEON and skip illegal vector types. We can

  // "legalize" wide vector types into multiple interleaved accesses as long as

  // the vector types are divisible by 128.

  if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))

    return false;


  unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);


  // A pointer vector can not be the return type of the ldN intrinsics. Need to

  // load integer vectors first and then convert to pointer vectors.

  if (EltTy->isPointerTy())

    VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);


  IRBuilder<> Builder(LI);


  // The base address of the load.

  Value *BaseAddr = LI->getPointerOperand();


  if (NumLoads > 1) {

    // If we're going to generate more than one load, reset the sub-vector type

    // to something legal.

    VecTy = FixedVectorType::get(VecTy->getElementType(),

                                 VecTy->getNumElements() / NumLoads);

  }


  assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");


  auto createLoadIntrinsic = [&](Value *BaseAddr) {

    if (Subtarget->hasNEON()) {

      Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());

      Type *Tys[] = {VecTy, PtrTy};

      static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,

                                                Intrinsic::arm_neon_vld3,

                                                Intrinsic::arm_neon_vld4};


      SmallVector<Value *, 2> Ops;

      Ops.push_back(BaseAddr);

      Ops.push_back(Builder.getInt32(LI->getAlign().value()));


      return Builder.CreateIntrinsic(LoadInts[Factor - 2], Tys, Ops,

                                     /*FMFSource=*/nullptr, "vldN");

    } else {

      assert((Factor == 2 || Factor == 4) &&

             "expected interleave factor of 2 or 4 for MVE");

      Intrinsic::ID LoadInts =

          Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;

      Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());

      Type *Tys[] = {VecTy, PtrTy};


      SmallVector<Value *, 2> Ops;

      Ops.push_back(BaseAddr);

      return Builder.CreateIntrinsic(LoadInts, Tys, Ops, /*FMFSource=*/nullptr,

                                     "vldN");

    }

  };


  // Holds sub-vectors extracted from the load intrinsic return values. The

  // sub-vectors are associated with the shufflevector instructions they will

  // replace.

  DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;


  for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {

    // If we're generating more than one load, compute the base address of

    // subsequent loads as an offset from the previous.

    if (LoadCount > 0)

      BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,

                                            VecTy->getNumElements() * Factor);


    CallInst *VldN = createLoadIntrinsic(BaseAddr);


    // Replace uses of each shufflevector with the corresponding vector loaded

    // by ldN.

    for (unsigned i = 0; i < Shuffles.size(); i++) {

      ShuffleVectorInst *SV = Shuffles[i];

      unsigned Index = Indices[i];


      Value *SubVec = Builder.CreateExtractValue(VldN, Index);


      // Convert the integer vector to pointer vector if the element is pointer.

      if (EltTy->isPointerTy())

        SubVec = Builder.CreateIntToPtr(

            SubVec,

            FixedVectorType::get(SV->getType()->getElementType(), VecTy));


      SubVecs[SV].push_back(SubVec);

    }

  }


  // Replace uses of the shufflevector instructions with the sub-vectors

  // returned by the load intrinsic. If a shufflevector instruction is

  // associated with more than one sub-vector, those sub-vectors will be

  // concatenated into a single wide vector.

  for (ShuffleVectorInst *SVI : Shuffles) {

    auto &SubVec = SubVecs[SVI];

    auto *WideVec =

        SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];

    SVI->replaceAllUsesWith(WideVec);

  }


  return true;

}


/// Lower an interleaved store into a vstN intrinsic.

///

/// E.g. Lower an interleaved store (Factor = 3):

///        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,

///                                  <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>

///        store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4

///

///      Into:

///        %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>

///        %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>

///        %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>

///        call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)

///

/// Note that the new shufflevectors will be removed and we'll only generate one

/// vst3 instruction in CodeGen.

///

/// Example for a more general valid mask (Factor 3). Lower:

///        %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,

///                 <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>

///        store <12 x i32> %i.vec, <12 x i32>* %ptr

///

///      Into:

///        %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>

///        %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>

///        %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>

///        call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)


bool ARMTargetLowering::lowerInterleavedStore(Instruction *Store,

                                              Value *LaneMask,

                                              ShuffleVectorInst *SVI,

                                              unsigned Factor,

                                              const APInt &GapMask) const {

  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&

         "Invalid interleave factor");

  auto *SI = dyn_cast<StoreInst>(Store);

  if (!SI)

    return false;

  assert(!LaneMask && GapMask.popcount() == Factor &&

         "Unexpected mask on store");


  auto *VecTy = cast<FixedVectorType>(SVI->getType());

  assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");


  unsigned LaneLen = VecTy->getNumElements() / Factor;

  Type *EltTy = VecTy->getElementType();

  auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);


  const DataLayout &DL = SI->getDataLayout();

  Align Alignment = SI->getAlign();


  // Skip if we do not have NEON and skip illegal vector types. We can

  // "legalize" wide vector types into multiple interleaved accesses as long as

  // the vector types are divisible by 128.

  if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))

    return false;


  unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);


  Value *Op0 = SVI->getOperand(0);

  Value *Op1 = SVI->getOperand(1);

  IRBuilder<> Builder(SI);


  // StN intrinsics don't support pointer vectors as arguments. Convert pointer

  // vectors to integer vectors.

  if (EltTy->isPointerTy()) {

    Type *IntTy = DL.getIntPtrType(EltTy);


    // Convert to the corresponding integer vector.

    auto *IntVecTy =

        FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));

    Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);

    Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);


    SubVecTy = FixedVectorType::get(IntTy, LaneLen);

  }


  // The base address of the store.

  Value *BaseAddr = SI->getPointerOperand();


  if (NumStores > 1) {

    // If we're going to generate more than one store, reset the lane length

    // and sub-vector type to something legal.

    LaneLen /= NumStores;

    SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);

  }


  assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");


  auto Mask = SVI->getShuffleMask();


  auto createStoreIntrinsic = [&](Value *BaseAddr,

                                  SmallVectorImpl<Value *> &Shuffles) {

    if (Subtarget->hasNEON()) {

      static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,

                                                 Intrinsic::arm_neon_vst3,

                                                 Intrinsic::arm_neon_vst4};

      Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());

      Type *Tys[] = {PtrTy, SubVecTy};


      SmallVector<Value *, 6> Ops;

      Ops.push_back(BaseAddr);

      append_range(Ops, Shuffles);

      Ops.push_back(Builder.getInt32(SI->getAlign().value()));

      Builder.CreateIntrinsic(StoreInts[Factor - 2], Tys, Ops);

    } else {

      assert((Factor == 2 || Factor == 4) &&

             "expected interleave factor of 2 or 4 for MVE");

      Intrinsic::ID StoreInts =

          Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;

      Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());

      Type *Tys[] = {PtrTy, SubVecTy};


      SmallVector<Value *, 6> Ops;

      Ops.push_back(BaseAddr);

      append_range(Ops, Shuffles);

      for (unsigned F = 0; F < Factor; F++) {

        Ops.push_back(Builder.getInt32(F));

        Builder.CreateIntrinsic(StoreInts, Tys, Ops);

        Ops.pop_back();

      }

    }

  };


  for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {

    // If we generating more than one store, we compute the base address of

    // subsequent stores as an offset from the previous.

    if (StoreCount > 0)

      BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),

                                            BaseAddr, LaneLen * Factor);


    SmallVector<Value *, 4> Shuffles;


    // Split the shufflevector operands into sub vectors for the new vstN call.

    for (unsigned i = 0; i < Factor; i++) {

      unsigned IdxI = StoreCount * LaneLen * Factor + i;

      if (Mask[IdxI] >= 0) {

        Shuffles.push_back(Builder.CreateShuffleVector(

            Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));

      } else {

        unsigned StartMask = 0;

        for (unsigned j = 1; j < LaneLen; j++) {

          unsigned IdxJ = StoreCount * LaneLen * Factor + j;

          if (Mask[IdxJ * Factor + IdxI] >= 0) {

            StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;

            break;

          }

        }

        // Note: If all elements in a chunk are undefs, StartMask=0!

        // Note: Filling undef gaps with random elements is ok, since

        // those elements were being written anyway (with undefs).

        // In the case of all undefs we're defaulting to using elems from 0

        // Note: StartMask cannot be negative, it's checked in

        // isReInterleaveMask

        Shuffles.push_back(Builder.CreateShuffleVector(

            Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));

      }

    }


    createStoreIntrinsic(BaseAddr, Shuffles);

  }

  return true;

}


enum HABaseType {

  HA_UNKNOWN = 0,

  HA_FLOAT,

  HA_DOUBLE,

  HA_VECT64,

  HA_VECT128

};


static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,

                                   uint64_t &Members) {

  if (auto *ST = dyn_cast<StructType>(Ty)) {

    for (unsigned i = 0; i < ST->getNumElements(); ++i) {

      uint64_t SubMembers = 0;

      if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))

        return false;

      Members += SubMembers;

    }

  } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {

    uint64_t SubMembers = 0;

    if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))

      return false;

    Members += SubMembers * AT->getNumElements();

  } else if (Ty->isFloatTy()) {

    if (Base != HA_UNKNOWN && Base != HA_FLOAT)

      return false;

    Members = 1;

    Base = HA_FLOAT;

  } else if (Ty->isDoubleTy()) {

    if (Base != HA_UNKNOWN && Base != HA_DOUBLE)

      return false;

    Members = 1;

    Base = HA_DOUBLE;

  } else if (auto *VT = dyn_cast<VectorType>(Ty)) {

    Members = 1;

    switch (Base) {

    case HA_FLOAT:

    case HA_DOUBLE:

      return false;

    case HA_VECT64:

      return VT->getPrimitiveSizeInBits().getFixedValue() == 64;

    case HA_VECT128:

      return VT->getPrimitiveSizeInBits().getFixedValue() == 128;

    case HA_UNKNOWN:

      switch (VT->getPrimitiveSizeInBits().getFixedValue()) {

      case 64:

        Base = HA_VECT64;

        return true;

      case 128:

        Base = HA_VECT128;

        return true;

      default:

        return false;

      }

    }

  }


  return (Members > 0 && Members <= 4);

}


/// Return the correct alignment for the current calling convention.


Align ARMTargetLowering::getABIAlignmentForCallingConv(

    Type *ArgTy, const DataLayout &DL) const {

  const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);

  if (!ArgTy->isVectorTy())

    return ABITypeAlign;


  // Avoid over-aligning vector parameters. It would require realigning the

  // stack and waste space for no real benefit.

  MaybeAlign StackAlign = DL.getStackAlignment();

  assert(StackAlign && "data layout string is missing stack alignment");

  return std::min(ABITypeAlign, *StackAlign);

}


/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of

/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when

/// passing according to AAPCS rules.


bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(

    Type *Ty, CallingConv::ID CallConv, bool isVarArg,

    const DataLayout &DL) const {

  if (getEffectiveCallingConv(CallConv, isVarArg) !=

      CallingConv::ARM_AAPCS_VFP)

    return false;


  HABaseType Base = HA_UNKNOWN;

  uint64_t Members = 0;

  bool IsHA = isHomogeneousAggregate(Ty, Base, Members);

  LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());


  bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();

  return IsHA || IsIntArray;

}


Register ARMTargetLowering::getExceptionPointerRegister(

    const Constant *PersonalityFn) const {

  // Platforms which do not use SjLj EH may return values in these registers

  // via the personality function.

  ExceptionHandling EM = getTargetMachine().getExceptionModel();

  return EM == ExceptionHandling::SjLj ? Register() : ARM::R0;

}


Register ARMTargetLowering::getExceptionSelectorRegister(

    const Constant *PersonalityFn) const {

  // Platforms which do not use SjLj EH may return values in these registers

  // via the personality function.

  ExceptionHandling EM = getTargetMachine().getExceptionModel();

  return EM == ExceptionHandling::SjLj ? Register() : ARM::R1;

}


void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {

  // Update IsSplitCSR in ARMFunctionInfo.

  ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();

  AFI->setIsSplitCSR(true);

}


void ARMTargetLowering::insertCopiesSplitCSR(

    MachineBasicBlock *Entry,

    const SmallVectorImpl<MachineBasicBlock *> &Exits) const {

  const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();

  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());

  if (!IStart)

    return;


  const TargetInstrInfo *TII = Subtarget->getInstrInfo();

  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();

  MachineBasicBlock::iterator MBBI = Entry->begin();

  for (const MCPhysReg *I = IStart; *I; ++I) {

    const TargetRegisterClass *RC = nullptr;

    if (ARM::GPRRegClass.contains(*I))

      RC = &ARM::GPRRegClass;

    else if (ARM::DPRRegClass.contains(*I))

      RC = &ARM::DPRRegClass;

    else

      llvm_unreachable("Unexpected register class in CSRsViaCopy!");


    Register NewVR = MRI->createVirtualRegister(RC);

    // Create copy from CSR to a virtual register.

    // FIXME: this currently does not emit CFI pseudo-instructions, it works

    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be

    // nounwind. If we want to generalize this later, we may need to emit

    // CFI pseudo-instructions.

    assert(Entry->getParent()->getFunction().hasFnAttribute(

               Attribute::NoUnwind) &&

           "Function should be nounwind in insertCopiesSplitCSR!");

    Entry->addLiveIn(*I);

    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)

        .addReg(*I);


    // Insert the copy-back instructions right before the terminator.

    for (auto *Exit : Exits)

      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),

              TII->get(TargetOpcode::COPY), *I)

          .addReg(NewVR);

  }

}


void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const {

  MF.getFrameInfo().computeMaxCallFrameSize(MF);

  TargetLoweringBase::finalizeLowering(MF);

}


bool ARMTargetLowering::isComplexDeinterleavingSupported() const {

  return Subtarget->hasMVEIntegerOps();

}


bool ARMTargetLowering::isComplexDeinterleavingOperationSupported(

    ComplexDeinterleavingOperation Operation, Type *Ty) const {

  auto *VTy = dyn_cast<FixedVectorType>(Ty);

  if (!VTy)

    return false;


  auto *ScalarTy = VTy->getScalarType();

  unsigned NumElements = VTy->getNumElements();


  unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;

  if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))

    return false;


  // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32

  if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())

    return Subtarget->hasMVEFloatOps();


  if (Operation != ComplexDeinterleavingOperation::CAdd)

    return false;


  return Subtarget->hasMVEIntegerOps() &&

         (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||

          ScalarTy->isIntegerTy(32));

}


ArrayRef<MCPhysReg> ARMTargetLowering::getRoundingControlRegisters() const {

  static const MCPhysReg RCRegs[] = {ARM::FPSCR_RM};

  return RCRegs;

}


Value *ARMTargetLowering::createComplexDeinterleavingIR(

    IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,

    ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,

    Value *Accumulator) const {


  FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());


  unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();


  assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");


  if (TyWidth > 128) {

    int Stride = Ty->getNumElements() / 2;

    auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());

    auto SplitSeqVec = llvm::to_vector(SplitSeq);

    ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);

    ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);


    auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);

    auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);

    auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);

    auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);

    Value *LowerSplitAcc = nullptr;

    Value *UpperSplitAcc = nullptr;


    if (Accumulator) {

      LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);

      UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);

    }


    auto *LowerSplitInt = createComplexDeinterleavingIR(

        B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);

    auto *UpperSplitInt = createComplexDeinterleavingIR(

        B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);


    ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());

    return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);

  }


  auto *IntTy = Type::getInt32Ty(B.getContext());


  ConstantInt *ConstRotation = nullptr;

  if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {

    ConstRotation = ConstantInt::get(IntTy, (int)Rotation);


    if (Accumulator)

      return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,

                               {ConstRotation, Accumulator, InputB, InputA});

    return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,

                             {ConstRotation, InputB, InputA});

  }


  if (OperationType == ComplexDeinterleavingOperation::CAdd) {

    // 1 means the value is not halved.

    auto *ConstHalving = ConstantInt::get(IntTy, 1);


    if (Rotation == ComplexDeinterleavingRotation::Rotation_90)

      ConstRotation = ConstantInt::get(IntTy, 0);

    else if (Rotation == ComplexDeinterleavingRotation::Rotation_270)

      ConstRotation = ConstantInt::get(IntTy, 1);


    if (!ConstRotation)

      return nullptr; // Invalid rotation for arm_mve_vcaddq


    return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,

                             {ConstHalving, ConstRotation, InputA, InputB});

  }


  return nullptr;

}


MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

SelectTypeKind::Int
@ Int
Definition AArch64ISelDAGToDAG.cpp:1838

isAddSubSExt
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:5565

isVShiftRImm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
Definition AArch64ISelLowering.cpp:16526

isExtendedBUILD_VECTOR
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
Definition AArch64ISelLowering.cpp:5517

isZeroExtended
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:5559

SDValue
return SDValue()

GPRArgRegs
static const MCPhysReg GPRArgRegs[]
Definition AArch64ISelLowering.cpp:171

GeneratePerfectShuffle
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
Definition AArch64ISelLowering.cpp:14239

FlagsVT
constexpr MVT FlagsVT
Value type used for NZCV flags.
Definition AArch64ISelLowering.cpp:169

isNegatedInteger
static bool isNegatedInteger(SDValue Op)
Definition AArch64ISelLowering.cpp:21619

getVShiftImm
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
Definition AArch64ISelLowering.cpp:16495

LowerPREFETCH
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:4569

isSignExtended
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:5553

isAddSubZExt
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
Definition AArch64ISelLowering.cpp:5576

isVShiftLImm
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
Definition AArch64ISelLowering.cpp:16515

canGuaranteeTCO
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
Definition AArch64ISelLowering.cpp:9025

FMAInstKind::Accumulator
@ Accumulator
Definition AArch64InstrInfo.cpp:8172

RegSize
unsigned RegSize
Definition AArch64MIPeepholeOpt.cpp:165

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition AMDGPUAliasAnalysis.cpp:31

isConstant
static bool isConstant(const MachineInstr &MI)
Definition AMDGPUInstructionSelector.cpp:2903

F64
constexpr LLT F64
Definition AMDGPULegalizerInfo.cpp:300

S1
constexpr LLT S1
Definition AMDGPULegalizerInfo.cpp:294

APFloat.h
This file declares a class to represent arbitrary precision floating point values and provide a varie...

APInt.h
This file implements a class to represent arbitrary precision integral constant values and operations...

LowerVASTART
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
Definition ARCISelLowering.cpp:756

isStore
static bool isStore(int Opcode)
Definition ARCInstrInfo.cpp:59

ARMAddressingModes.h

isThumb
static bool isThumb(const MCSubtargetInfo &STI)
Definition ARMAsmPrinter.cpp:489

ARMBaseInstrInfo.h

ARMBaseRegisterInfo.h

ARMCallingConv.h

ARMConstantPoolValue.h

LowerUADDSUBO_CARRY
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:9635

PerformExtractEltToVMOVRRD
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:15370

isIncompatibleReg
static bool isIncompatibleReg(const MCPhysReg &PR, MVT VT)
Definition ARMISelLowering.cpp:20136

PerformVQDMULHCombine
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:13124

LowerBUILD_VECTOR_i1
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:7528

LowerShift
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:6425

HABaseType
HABaseType
Definition ARMISelLowering.cpp:21674

HA_DOUBLE
@ HA_DOUBLE
Definition ARMISelLowering.cpp:21677

HA_VECT128
@ HA_VECT128
Definition ARMISelLowering.cpp:21679

HA_VECT64
@ HA_VECT64
Definition ARMISelLowering.cpp:21678

HA_FLOAT
@ HA_FLOAT
Definition ARMISelLowering.cpp:21676

HA_UNKNOWN
@ HA_UNKNOWN
Definition ARMISelLowering.cpp:21675

LowerVECTOR_SHUFFLE
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:8564

AddRequiredExtensionForVMULL
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
Definition ARMISelLowering.cpp:9259

ConstpoolPromotionMaxSize
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))

isZeroOrAllOnes
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
Definition ARMISelLowering.cpp:12307

LowerINSERT_VECTOR_ELT_i1
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:8792

isVTBLMask
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
Definition ARMISelLowering.cpp:7070

PerformSUBCombine
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
Definition ARMISelLowering.cpp:13916

EnableConstpoolPromotion
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))

PerformFAddVSelectCombine
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:16795

PerformExtractFpToIntStores
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:16623

PerformVDUPCombine
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
Definition ARMISelLowering.cpp:16349

PerformExtractEltCombine
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:15430

isPowerOf2Constant
static const APInt * isPowerOf2Constant(SDValue V)
Definition ARMISelLowering.cpp:17935

PerformVCVTCombine
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
Definition ARMISelLowering.cpp:16747

PerformVMOVhrCombine
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:15015

LowerVectorFP_TO_INT
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:5639

LowerVECTOR_SHUFFLEUsingOneOff
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:8513

isValidMVECond
static bool isValidMVECond(unsigned CC, bool IsFloat)
Definition ARMISelLowering.cpp:14502

PerformPREDICATE_CASTCombine
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:15248

IntCCToARMCC
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
Definition ARMISelLowering.cpp:1689

PerformSTORECombine
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
Definition ARMISelLowering.cpp:16657

ConvertBooleanCarryToCarryFlag
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:4776

LowerCONCAT_VECTORS
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:8976

isGTorGE
static bool isGTorGE(ISD::CondCode CC)
Definition ARMISelLowering.cpp:5027

CombineVLDDUP
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
Definition ARMISelLowering.cpp:16231

ParseBFI
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
Definition ARMISelLowering.cpp:14704

isReverseMask
static bool isReverseMask(ArrayRef< int > M, EVT VT)
Definition ARMISelLowering.cpp:7328

isVZIP_v_undef_Mask
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
Definition ARMISelLowering.cpp:7273

PerformSELECTCombine
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:13013

AddCombineTo64bitUMAAL
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:12875

PerformVECTOR_REG_CASTCombine
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:15281

PerformVMulVCTPCombine
static SDValue PerformVMulVCTPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVMulVCTPCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations...
Definition ARMISelLowering.cpp:16887

createGPRPairNode2xi32
static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0, SDValue V1)
Definition ARMISelLowering.cpp:10143

bitcastf32Toi32
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:5382

findPointerConstIncrement
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
Definition ARMISelLowering.cpp:15987

isVTRNMask
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition ARMISelLowering.cpp:7105

LowerEXTRACT_SUBVECTOR
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:9001

CanInvertMVEVCMP
static bool CanInvertMVEVCMP(SDValue N)
Definition ARMISelLowering.cpp:14528

PerformLongShiftCombine
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:17315

AddCombineToVPADD
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:12447

PerformShiftCombine
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
Definition ARMISelLowering.cpp:17576

FPCCToARMCC
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
Definition ARMISelLowering.cpp:1706

ExpandREAD_REGISTER
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:5941

getVectorTyFromPredicateVector
static EVT getVectorTyFromPredicateVector(EVT VT)
Definition ARMISelLowering.cpp:8325

PerformFADDVCMLACombine
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:16838

handleCMSEValue
static SDValue handleCMSEValue(const SDValue &Value, const ISD::InputArg &Arg, SelectionDAG &DAG, const SDLoc &DL)
Definition ARMISelLowering.cpp:163

PerformARMBUILD_VECTORCombine
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
Definition ARMISelLowering.cpp:15157

isSRL16
static bool isSRL16(const SDValue &Op)
Definition ARMISelLowering.cpp:1654

PerformVMOVrhCombine
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:15077

PerformLOADCombine
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:16387

IsCMPZCSINC
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
Definition ARMISelLowering.cpp:14839

getPointerConstIncrement
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
Definition ARMISelLowering.cpp:15965

combineSelectAndUseCommutative
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:12422

LowerATOMIC_FENCE
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:4031

genTPEntry
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
Definition ARMISelLowering.cpp:11663

createGPRPairNodei64
static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V)
Definition ARMISelLowering.cpp:10155

isLTorLE
static bool isLTorLE(ISD::CondCode CC)
Definition ARMISelLowering.cpp:5031

PerformVCMPCombine
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:15309

PerformMVEVMULLCombine
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:14003

LowerSDIV_v4i16
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:9483

performNegCMovCombine
static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:13889

getExtensionTo64Bits
static EVT getExtensionTo64Bits(const EVT &OrigVT)
Definition ARMISelLowering.cpp:9239

PerformBITCASTCombine
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:18415

AddCombineTo64bitMLAL
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:12709

LowerWRITE_REGISTER
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:3222

checkAndUpdateCPSRKill
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
Definition ARMISelLowering.cpp:11634

PerformCMPZCombine
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:14872

hasNormalLoadOperand
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
Definition ARMISelLowering.cpp:15112

PerformInsertEltCombine
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
Definition ARMISelLowering.cpp:15341

PerformVDUPLANECombine
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
Definition ARMISelLowering.cpp:16306

LowerBuildVectorOfFPTrunc
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:7416

ConstpoolPromotionMaxTotal
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))

LowerTruncatei1
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:9055

getDivRemLibcall
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
Definition ARMISelLowering.cpp:20393

SkipLoadExtensionForVMULL
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
Definition ARMISelLowering.cpp:9281

AddCombineVUZPToVPADDL
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:12475

PerformADDCombineWithOperands
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
Definition ARMISelLowering.cpp:13350

PromoteMVEPredVector
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:8340

isVZIPMask
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition ARMISelLowering.cpp:7240

PerformORCombineToSMULWBT
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:14316

isVTRN_v_undef_Mask
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
Definition ARMISelLowering.cpp:7137

LowerUDIV
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:9558

FindBFIToCombineWith
static SDValue FindBFIToCombineWith(SDNode *N)
Definition ARMISelLowering.cpp:14733

LowerADDSUBSAT
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:4837

ConvertCarryFlagToBooleanCarry
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:4789

ShuffleOpCodes
ShuffleOpCodes
Definition ARMISelLowering.cpp:8138

OP_VEXT3
@ OP_VEXT3
Definition ARMISelLowering.cpp:8147

OP_VTRNR
@ OP_VTRNR
Definition ARMISelLowering.cpp:8153

OP_VDUP1
@ OP_VDUP1
Definition ARMISelLowering.cpp:8142

OP_VZIPR
@ OP_VZIPR
Definition ARMISelLowering.cpp:8151

OP_VUZPR
@ OP_VUZPR
Definition ARMISelLowering.cpp:8149

OP_VREV
@ OP_VREV
Definition ARMISelLowering.cpp:8140

OP_VZIPL
@ OP_VZIPL
Definition ARMISelLowering.cpp:8150

OP_VTRNL
@ OP_VTRNL
Definition ARMISelLowering.cpp:8152

OP_COPY
@ OP_COPY
Definition ARMISelLowering.cpp:8139

OP_VEXT1
@ OP_VEXT1
Definition ARMISelLowering.cpp:8145

OP_VDUP0
@ OP_VDUP0
Definition ARMISelLowering.cpp:8141

OP_VEXT2
@ OP_VEXT2
Definition ARMISelLowering.cpp:8146

OP_VUZPL
@ OP_VUZPL
Definition ARMISelLowering.cpp:8148

OP_VDUP3
@ OP_VDUP3
Definition ARMISelLowering.cpp:8144

OP_VDUP2
@ OP_VDUP2
Definition ARMISelLowering.cpp:8143

checkVSELConstraints
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
Definition ARMISelLowering.cpp:4953

ReplaceLongIntrinsic
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:10520

isS16
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:1682

isSRA16
static bool isSRA16(const SDValue &Op)
Definition ARMISelLowering.cpp:1662

AddCombineBUILD_VECTORToVPADDL
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:12528

LowerVECTOR_SHUFFLEUsingMovs
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:8427

LowerInterruptReturn
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:2929

LowerEXTRACT_VECTOR_ELT_i1
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:8855

LowerSDIV_v4i8
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:9452

expandf64Toi32
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
Definition ARMISelLowering.cpp:5394

LowerCONCAT_VECTORS_i1
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:8894

LowerCTTZ
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:6283

PerformVLDCombine
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:16101

isSHL16
static bool isSHL16(const SDValue &Op)
Definition ARMISelLowering.cpp:1670

isVEXTMask
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
Definition ARMISelLowering.cpp:7034

PerformMVEVLDCombine
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:16109

ArmMaxBaseUpdatesToCheck
cl::opt< unsigned > ArmMaxBaseUpdatesToCheck("arm-max-base-updates-to-check", cl::Hidden, cl::desc("Maximum number of base-updates to check generating postindex."), cl::init(64))

isTruncMask
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
Definition ARMISelLowering.cpp:7342

PerformADDCombine
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
Definition ARMISelLowering.cpp:13838

getLdOpcode
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
Definition ARMISelLowering.cpp:11159

isLegalT2AddressImmediate
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:19236

isLegalMVEShuffleOp
static bool isLegalMVEShuffleOp(unsigned PFEntry)
Definition ARMISelLowering.cpp:8156

PerformSignExtendInregCombine
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:15491

PerformShuffleVMOVNCombine
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:15547

isVUZPMask
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
Definition ARMISelLowering.cpp:7169

PerformVECTOR_SHUFFLECombine
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
Definition ARMISelLowering.cpp:15572

SkipExtensionForVMULL
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
Definition ARMISelLowering.cpp:9305

isVMOVNTruncMask
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
Definition ARMISelLowering.cpp:7388

PerformVQMOVNCombine
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:17278

OtherSucc
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
Definition ARMISelLowering.cpp:11150

LowerVecReduceMinMax
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:10036

PerformFPExtendCombine
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:17777

PerformAddcSubcCombine
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:12952

PerformVSELECTCombine
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:13232

getDivRemArgList
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:20411

PerformVECREDUCE_ADDCombine
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:16945

getZeroVector
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
Definition ARMISelLowering.cpp:6077

LowerAtomicLoadStore
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:10110

PerformSplittingToNarrowingStores
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:16488

getT2IndexedAddressParts
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:19556

getVCMPCondCode
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
Definition ARMISelLowering.cpp:14519

ARMInterworking
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))

ReplaceREADCYCLECOUNTER
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:10120

PerformORCombineToBFI
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:14376

isConditionalZeroOrAllOnes
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:12323

LowerEXTRACT_VECTOR_ELT
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:8873

PerformVSetCCToVCTPCombine
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:13272

LowerBUILD_VECTORToVIDUP
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:7590

isZeroVector
static bool isZeroVector(SDValue N)
Definition ARMISelLowering.cpp:9930

PerformAddeSubeCombine
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:12985

ReplaceCMP_SWAP_64Results
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:10164

isLowerSaturate
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
Definition ARMISelLowering.cpp:5041

LowerPredicateLoad
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:9801

emitPostSt
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
Definition ARMISelLowering.cpp:11238

isVMOVNMask
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
Definition ARMISelLowering.cpp:7364

CombineBaseUpdate
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
Definition ARMISelLowering.cpp:16016

LowerSaturatingConditional
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:5067

PerformSubCSINCCombine
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:13861

PerformVMOVRRDCombine
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
Definition ARMISelLowering.cpp:14907

LowerFP_TO_INT_SAT
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:5708

PerformCSETCombine
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:14885

PerformVMOVNCombine
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:17238

PerformInsertSubvectorCombine
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:15506

LowerVectorExtend
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:9131

WinDBZCheckDenominator
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
Definition ARMISelLowering.cpp:9769

LowerVECTOR_SHUFFLEv8i8
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:8286

PerformVMULCombine
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
Definition ARMISelLowering.cpp:13972

LowerMUL
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:9377

PerformBFICombine
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:14764

PerformORCombine
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
Definition ARMISelLowering.cpp:14559

LowerMLOAD
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:9936

PerformTruncatingStoreCombine
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:16403

SelectPairHalf
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
Definition ARMISelLowering.cpp:7077

emitPostLd
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
Definition ARMISelLowering.cpp:11197

TryDistrubutionADDVecReduce
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:13371

isValidBaseUpdate
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
Definition ARMISelLowering.cpp:15643

IsSingleInstrConstant
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
Definition ARMISelLowering.cpp:7511

IsQRMVEInstruction
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
Definition ARMISelLowering.cpp:7625

PerformMinMaxToSatCombine
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:17788

PerformXORCombine
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:14659

getMVEIndexedAddressParts
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:19581

RCPair
std::pair< unsigned, const TargetRegisterClass * > RCPair
Definition ARMISelLowering.cpp:20146

combineSelectAndUse
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
Definition ARMISelLowering.cpp:12396

PerformExtendCombine
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
Definition ARMISelLowering.cpp:17734

LowerSDIV
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:9522

MVEMaxSupportedInterleaveFactor
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))

isVMOVModifiedImm
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
Definition ARMISelLowering.cpp:6755

LowerBuildVectorOfFPExt
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:7469

CombineVMOVDRRCandidateWithVecOp
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
Definition ARMISelLowering.cpp:5966

promoteToConstantPool
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
Definition ARMISelLowering.cpp:3582

isNEONTwoResultShuffleMask
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
Definition ARMISelLowering.cpp:7305

BitsProperlyConcatenate
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
Definition ARMISelLowering.cpp:14727

getARMIndexedAddressParts
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:19497

LowerVecReduce
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:9963

LowerVectorINT_TO_FP
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:5745

TryCombineBaseUpdate
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
Definition ARMISelLowering.cpp:15659

allUsersAreInFunction
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
Definition ARMISelLowering.cpp:3566

isSingletonVEXTMask
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
Definition ARMISelLowering.cpp:7006

PerformVMOVDRRCombine
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
Definition ARMISelLowering.cpp:14999

isLowerSaturatingConditional
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
Definition ARMISelLowering.cpp:5133

isLegalAddressImmediate
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
Definition ARMISelLowering.cpp:19290

LowerVSETCC
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:6545

isLegalT1AddressImmediate
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
Definition ARMISelLowering.cpp:19210

CombineANDShift
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:14154

LowerSETCCCARRY
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:6725

PerformSHLSimplify
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:13730

PerformADDECombine
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
Definition ARMISelLowering.cpp:13333

PerformReduceShuffleCombine
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:17205

PerformUMLALCombine
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:12930

LowerSTORE
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:9897

LowerTruncate
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:9071

PerformHWLoopCombine
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:18075

PerformSplittingMVETruncToNarrowingStores
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:16581

isVUZP_v_undef_Mask
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
Definition ARMISelLowering.cpp:7199

isHomogeneousAggregate
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
Definition ARMISelLowering.cpp:21682

PerformMULCombine
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:14068

PerformFADDCombine
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:16869

LowerReverse_VECTOR_SHUFFLE
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:8306

PerformANDCombine
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:14266

PerformADDVecReduce
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:13516

LowerPredicateStore
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:9859

SearchLoopIntrinsic
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
Definition ARMISelLowering.cpp:18038

canChangeToInt
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
Definition ARMISelLowering.cpp:5361

getStOpcode
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
Definition ARMISelLowering.cpp:11178

IsVUZPShuffleNode
static bool IsVUZPShuffleNode(SDNode *N)
Definition ARMISelLowering.cpp:12435

Expand64BitShift
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:6468

AddCombineTo64BitSMLAL16
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:12632

attachMEMCPYScratchRegs
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
Definition ARMISelLowering.cpp:12180

isFloatingPointZero
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Definition ARMISelLowering.cpp:4505

findMUL_LOHI
static SDValue findMUL_LOHI(SDValue V)
Definition ARMISelLowering.cpp:12625

LowerVECTOR_SHUFFLE_i1
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:8375

PerformORCombine_i1
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
Definition ARMISelLowering.cpp:14533

PerformSplittingMVEEXTToWideningLoad
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:18557

PerformSplittingToWideningLoad
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
Definition ARMISelLowering.cpp:17653

genTPLoopBody
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
Definition ARMISelLowering.cpp:11701

PerformBUILD_VECTORCombine
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
Definition ARMISelLowering.cpp:15124

LowerVecReduceF
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
Definition ARMISelLowering.cpp:10029

PerformMinMaxCombine
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
Definition ARMISelLowering.cpp:17829

ARMISelLowering.h

ARMMachineFunctionInfo.h

ARMPerfectShuffle.h

ARMRegisterInfo.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition ARMSLSHardening.cpp:72

ARMSelectionDAGInfo.h

ARMSubtarget.h

ARMTargetTransformInfo.h
This file a TargetTransformInfoImplBase conforming object specific to the ARM target machine.

false
Function Alias Analysis false
Definition AliasAnalysis.cpp:734

Results
Function Alias Analysis Results
Definition AliasAnalysis.cpp:734

ArrayRef.h

AtomicOrdering.h
Atomic ordering constants.

Attributes.h
This file contains the simple types necessary to represent the attributes associated with functions a...

BitVector.h
This file implements the BitVector class.

BranchProbability.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CallingConvLower.h

CallingConv.h

Casting.h

CodeGen.h

isBigEndian
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Definition CombinerHelper.cpp:129

CommandLine.h

Compiler.h

ComplexDeinterleavingPass.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

Domain
Domain
Definition CorrelatedValuePropagation.cpp:756

createLoadIntrinsic
static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, dxil::ResourceTypeInfo &RTI)
Definition DXILResourceAccess.cpp:309

createStoreIntrinsic
static void createStoreIntrinsic(IntrinsicInst *II, StoreInst *SI, Value *Offset, dxil::ResourceTypeInfo &RTI)
Definition DXILResourceAccess.cpp:118

DataLayout.h

DebugLoc.h

DenseMap.h
This file defines the DenseMap class.

DerivedTypes.h

isSigned
static bool isSigned(unsigned int Opcode)
Definition ExpandLargeDivRem.cpp:52

Check
#define Check(C,...)
Definition GenericConvergenceVerifierImpl.h:34

GlobalAlias.h

GlobalValue.h

GlobalVariable.h

op
#define op(i)

im
#define im(i)

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

IRBuilder.h

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

Constant.h

Function.h

Instruction.h

IntrinsicInst.h

Module.h
Module.h This file contains the declarations for the Module class.

Type.h

User.h

Value.h

ISDOpcodes.h

InlineAsm.h

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition InstCombineVectorOps.cpp:803

Instructions.h

LowerCTPOP
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
Definition IntrinsicLowering.cpp:148

Intrinsics.h

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3370

KnownBits.h

RegName
#define RegName(no)

Options
static LVOptions Options
Definition LVOptions.cpp:25

info
lazy value info
Definition LazyValueInfo.cpp:59

MCInstrDesc.h

MCInstrItineraries.h

MCSchedule.h

ARMBaseInfo.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

G
#define G(x, y, z)
Definition MD5.cpp:55

MachineBasicBlock.h

MachineConstantPool.h
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...

MachineFrameInfo.h

MachineFunction.h

MachineInstrBuilder.h

getDebugLoc
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
Definition MachineInstrBundle.cpp:90

MachineInstr.h

MachineJumpTableInfo.h

MachineMemOperand.h

MachineOperand.h

MachineRegisterInfo.h

Reg
Register Reg
Definition MachineSink.cpp:2117

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

MachineValueType.h

MathExtras.h

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

Context
@ Context
Definition MemProfContextDisambiguation.cpp:133

T
#define T
Definition Mips16ISelLowering.cpp:315

args
nvptx lower args
Definition NVPTXLowerArgs.cpp:185

High
uint64_t High
Definition NVVMIntrRange.cpp:46

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

Operation
PowerPC Reduce CR logical Operation
Definition PPCReduceCRLogicals.cpp:735

Merge
R600 Clause Merge
Definition R600ClauseMergePass.cpp:70

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:71

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:75

Mode
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))

RuntimeLibcallUtil.h

Copies
SI Lower i1 Copies
Definition SILowerI1Copies.cpp:921

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

contains
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480

SelectionDAGAddressAnalysis.h

SelectionDAGNodes.h

MaxSteps
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))

SelectionDAG.h

SmallPtrSet.h
This file defines the SmallPtrSet class.

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171

StringExtras.h
This file contains some functions that are useful when dealing with strings.

StringRef.h

StringSwitch.h
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

X
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39

TargetInstrInfo.h

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetOpcodes.h

TargetOptions.h

TargetRegisterInfo.h

TargetSubtargetInfo.h

Triple.h

Twine.h

ARMBaseInfo.h

UndefPoisonKind::PoisonOnly
@ PoisonOnly
Definition ValueTracking.cpp:7535

ValueTypes.h

VectorUtils.h

getSwappedCondition
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
Definition X86InstrInfo.cpp:3354

Concat
static constexpr int Concat[]
Definition X86InterleavedAccess.cpp:232

RHS
Value * RHS
Definition X86PartialReduction.cpp:81

LHS
Value * LHS
Definition X86PartialReduction.cpp:80

Mul
BinaryOperator * Mul
Definition X86PartialReduction.cpp:75

isIntrinsic
static bool isIntrinsic(const CallBase &Call, Intrinsic::ID ID)
Definition X86WinEHState.cpp:525

Input
The Input class is used to parse a yaml document into in-memory structs and vectors.
Definition YAMLTraits.h:1313

Node
Definition ItaniumDemangle.h:166

VectorType
Definition ItaniumDemangle.h:1189

llvm::APFloatBase::rmTowardZero
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348

llvm::APFloatBase::opOK
@ opOK
Definition APFloat.h:361

llvm::APFloat
Definition APFloat.h:940

llvm::APFloat::getExactInverse
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5995

llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition APFloat.h:1335

llvm::APFloat::convertToInteger
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235

llvm::APInt::isMinSignedValue
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541

llvm::APInt::popcount
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1671

llvm::APInt::zextOrTrunc
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033

llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513

llvm::APInt::trunc
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936

llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331

llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202

llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372

llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489

llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112

llvm::APInt::countr_zero
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640

llvm::APInt::countl_zero
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1599

llvm::APInt::getSplat
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651

llvm::APInt::logBase2
unsigned logBase2() const
Definition APInt.h:1762

llvm::APInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476

llvm::APInt::isSubsetOf
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258

llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441

llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307

llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297

llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240

llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563

llvm::APInt::lshrInPlace
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859

llvm::APInt::lshr
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852

llvm::APInt::countr_one
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1657

llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222

llvm::APSInt
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24

llvm::ARMBaseInstrInfo
Definition ARMBaseInstrInfo.h:42

llvm::ARMBaseInstrInfo::getRegisterInfo
const ARMBaseRegisterInfo & getRegisterInfo() const
Definition ARMBaseInstrInfo.h:129

llvm::ARMBaseRegisterInfo
Definition ARMBaseRegisterInfo.h:52

llvm::ARMBaseRegisterInfo::getSjLjDispatchPreservedMask
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
Definition ARMBaseRegisterInfo.cpp:192

llvm::ARMBaseRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Definition ARMBaseRegisterInfo.cpp:63

llvm::ARMBaseRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition ARMBaseRegisterInfo.cpp:514

llvm::ARMBaseRegisterInfo::getCallPreservedMask
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Definition ARMBaseRegisterInfo.cpp:157

llvm::ARMBaseRegisterInfo::getTLSCallPreservedMask
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
Definition ARMBaseRegisterInfo.cpp:185

llvm::ARMBaseRegisterInfo::getThisReturnPreservedMask
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
Definition ARMBaseRegisterInfo.cpp:201

llvm::ARMBaseTargetMachine
Definition ARMTargetMachine.h:29

llvm::ARMConstantPoolConstant::Create
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
Definition ARMConstantPoolValue.cpp:148

llvm::ARMConstantPoolMBB::Create
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
Definition ARMConstantPoolValue.cpp:272

llvm::ARMConstantPoolSymbol::Create
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
Definition ARMConstantPoolValue.cpp:233

llvm::ARMConstantPoolValue
ARMConstantPoolValue - ARM specific constantpool value.
Definition ARMConstantPoolValue.h:61

llvm::ARMFunctionInfo
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
Definition ARMMachineFunctionInfo.h:33

llvm::ARMFunctionInfo::getVarArgsFrameIndex
int getVarArgsFrameIndex() const
Definition ARMMachineFunctionInfo.h:240

llvm::ARMFunctionInfo::getPromotedConstpoolIncrease
int getPromotedConstpoolIncrease() const
Definition ARMMachineFunctionInfo.h:276

llvm::ARMFunctionInfo::getGlobalsPromotedToConstantPool
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
Definition ARMMachineFunctionInfo.h:273

llvm::ARMFunctionInfo::setArgumentStackToRestore
void setArgumentStackToRestore(unsigned v)
Definition ARMMachineFunctionInfo.h:226

llvm::ARMFunctionInfo::branchTargetEnforcement
bool branchTargetEnforcement() const
Definition ARMMachineFunctionInfo.h:301

llvm::ARMFunctionInfo::createPICLabelUId
unsigned createPICLabelUId()
Definition ARMMachineFunctionInfo.h:236

llvm::ARMFunctionInfo::setPreservesR0
void setPreservesR0()
Definition ARMMachineFunctionInfo.h:286

llvm::ARMFunctionInfo::setPromotedConstpoolIncrease
void setPromotedConstpoolIncrease(int Sz)
Definition ARMMachineFunctionInfo.h:279

llvm::ARMFunctionInfo::isThumb1OnlyFunction
bool isThumb1OnlyFunction() const
Definition ARMMachineFunctionInfo.h:169

llvm::ARMFunctionInfo::setArgRegsSaveSize
void setArgRegsSaveSize(unsigned s)
Definition ARMMachineFunctionInfo.h:176

llvm::ARMFunctionInfo::isCmseNSEntryFunction
bool isCmseNSEntryFunction() const
Definition ARMMachineFunctionInfo.h:172

llvm::ARMFunctionInfo::setReturnRegsCount
void setReturnRegsCount(unsigned s)
Definition ARMMachineFunctionInfo.h:179

llvm::ARMFunctionInfo::setVarArgsFrameIndex
void setVarArgsFrameIndex(int Index)
Definition ARMMachineFunctionInfo.h:241

llvm::ARMFunctionInfo::getArgRegsSaveSize
unsigned getArgRegsSaveSize() const
Definition ARMMachineFunctionInfo.h:175

llvm::ARMFunctionInfo::markGlobalAsPromotedToConstantPool
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
Definition ARMMachineFunctionInfo.h:270

llvm::ARMFunctionInfo::setIsSplitCSR
void setIsSplitCSR(bool s)
Definition ARMMachineFunctionInfo.h:247

llvm::ARMFunctionInfo::setArgumentStackSize
void setArgumentStackSize(unsigned size)
Definition ARMMachineFunctionInfo.h:223

llvm::ARMFunctionInfo::getArgumentStackSize
unsigned getArgumentStackSize() const
Definition ARMMachineFunctionInfo.h:222

llvm::ARMSubtarget
Definition ARMSubtarget.h:48

llvm::ARMSubtarget::getTargetTriple
const Triple & getTargetTriple() const
Definition ARMSubtarget.h:336

llvm::ARMSubtarget::getInstrInfo
const ARMBaseInstrInfo * getInstrInfo() const override
Definition ARMSubtarget.h:241

llvm::ARMSubtarget::isThumb1Only
bool isThumb1Only() const
Definition ARMSubtarget.h:382

llvm::ARMSubtarget::useFPVFMx
bool useFPVFMx() const
Definition ARMSubtarget.h:321

llvm::ARMSubtarget::isThumb2
bool isThumb2() const
Definition ARMSubtarget.h:383

llvm::ARMSubtarget::isTargetWindows
bool isTargetWindows() const
Definition ARMSubtarget.h:349

llvm::ARMSubtarget::hasBaseDSP
bool hasBaseDSP() const
Definition ARMSubtarget.h:326

llvm::ARMSubtarget::getTargetLowering
const ARMTargetLowering * getTargetLowering() const override
Definition ARMSubtarget.h:245

llvm::ARMSubtarget::getRegisterInfo
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition ARMSubtarget.h:253

llvm::ARMSubtarget::hasVFP2Base
bool hasVFP2Base() const
Definition ARMSubtarget.h:310

llvm::ARMSubtarget::isROPI
bool isROPI() const
Definition ARMSubtarget.cpp:311

llvm::ARMSubtarget::useFPVFMx64
bool useFPVFMx64() const
Definition ARMSubtarget.h:325

llvm::ARMSubtarget::isRWPI
bool isRWPI() const
Definition ARMSubtarget.cpp:315

llvm::ARMSubtarget::isLittle
bool isLittle() const
Definition ARMSubtarget.h:414

llvm::ARMSubtarget::useFPVFMx16
bool useFPVFMx16() const
Definition ARMSubtarget.h:324

llvm::ARMSubtarget::isMClass
bool isMClass() const
Definition ARMSubtarget.h:384

llvm::ARMSubtarget::useMulOps
bool useMulOps() const
Definition ARMSubtarget.h:319

llvm::ARMSubtarget::getDualLoadStoreAlignment
Align getDualLoadStoreAlignment() const
Definition ARMSubtarget.h:450

llvm::ARMTargetLowering
Definition ARMISelLowering.h:87

llvm::ARMTargetLowering::shouldFoldSelectWithIdentityConstant
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
Definition ARMISelLowering.cpp:13694

llvm::ARMTargetLowering::isReadOnly
bool isReadOnly(const GlobalValue *GV) const
Definition ARMISelLowering.cpp:3678

llvm::ARMTargetLowering::getMaxSupportedInterleaveFactor
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
Definition ARMISelLowering.cpp:21371

llvm::ARMTargetLowering::shouldExpandAtomicLoadInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
Definition ARMISelLowering.cpp:21072

llvm::ARMTargetLowering::getNumInterleavedAccesses
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Definition ARMISelLowering.cpp:21330

llvm::ARMTargetLowering::shouldInsertFencesForAtomic
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Definition ARMISelLowering.cpp:21137

llvm::ARMTargetLowering::getABIAlignmentForCallingConv
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
Definition ARMISelLowering.cpp:21734

llvm::ARMTargetLowering::isDesirableToCommuteWithShift
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Definition ARMISelLowering.cpp:13605

llvm::ARMTargetLowering::getExceptionPointerRegister
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
Definition ARMISelLowering.cpp:21766

llvm::ARMTargetLowering::getSingleConstraintMatchWeight
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
Definition ARMISelLowering.cpp:20106

llvm::ARMTargetLowering::isLegalAddressingMode
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
Definition ARMISelLowering.cpp:19379

llvm::ARMTargetLowering::getSubtarget
const ARMSubtarget * getSubtarget() const
Definition ARMISelLowering.h:278

llvm::ARMTargetLowering::isLegalT2ScaledAddressingMode
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Definition ARMISelLowering.cpp:19324

llvm::ARMTargetLowering::isLegalT1ScaledAddressingMode
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
Definition ARMISelLowering.cpp:19362

llvm::ARMTargetLowering::getPreIndexedAddressParts
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
Definition ARMISelLowering.cpp:19638

llvm::ARMTargetLowering::EmitKCFICheck
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
Definition ARMISelLowering.cpp:11809

llvm::ARMTargetLowering::shouldAlignPointerArgs
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Definition ARMISelLowering.cpp:1601

llvm::ARMTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
Definition ARMISelLowering.cpp:10549

llvm::ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
Definition ARMISelLowering.cpp:21281

llvm::ARMTargetLowering::isMulAddWithConstProfitable
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
Definition ARMISelLowering.cpp:19475

llvm::ARMTargetLowering::isLegalAddImmediate
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Definition ARMISelLowering.cpp:19461

llvm::ARMTargetLowering::getOptimalMemOpType
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
Definition ARMISelLowering.cpp:19031

llvm::ARMTargetLowering::emitTrailingFence
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Definition ARMISelLowering.cpp:21027

llvm::ARMTargetLowering::isFNegFree
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
Definition ARMISelLowering.cpp:19098

llvm::ARMTargetLowering::finalizeLowering
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
Definition ARMISelLowering.cpp:21829

llvm::ARMTargetLowering::PerformMVETruncCombine
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
Definition ARMISelLowering.cpp:18455

llvm::ARMTargetLowering::isFPImmLegal
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
Definition ARMISelLowering.cpp:20708

llvm::ARMTargetLowering::getConstraintType
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
Definition ARMISelLowering.cpp:20076

llvm::ARMTargetLowering::preferIncOfAddToSubOfNot
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Definition ARMISelLowering.cpp:13701

llvm::ARMTargetLowering::computeKnownBitsForTargetNode
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Definition ARMISelLowering.cpp:19787

llvm::ARMTargetLowering::shouldExpandAtomicStoreInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
Definition ARMISelLowering.cpp:21050

llvm::ARMTargetLowering::insertSSPDeclarations
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Definition ARMISelLowering.cpp:21147

llvm::ARMTargetLowering::PerformIntrinsicCombine
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
Definition ARMISelLowering.cpp:17344

llvm::ARMTargetLowering::shouldFoldConstantShiftPairToMask
bool shouldFoldConstantShiftPairToMask(const SDNode *N) const override
Return true if it is profitable to fold a pair of shifts into a mask.
Definition ARMISelLowering.cpp:13676

llvm::ARMTargetLowering::isDesirableToCommuteXorWithShift
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
Definition ARMISelLowering.cpp:13652

llvm::ARMTargetLowering::PerformCMOVCombine
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Definition ARMISelLowering.cpp:18235

llvm::ARMTargetLowering::createComplexDeinterleavingIR
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
Definition ARMISelLowering.cpp:21868

llvm::ARMTargetLowering::isComplexDeinterleavingSupported
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Definition ARMISelLowering.cpp:21834

llvm::ARMTargetLowering::PerformMVEExtCombine
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
Definition ARMISelLowering.cpp:18624

llvm::ARMTargetLowering::SimplifyDemandedBitsForTargetNode
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
Definition ARMISelLowering.cpp:20011

llvm::ARMTargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
Definition ARMISelLowering.cpp:1559

llvm::ARMTargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition ARMISelLowering.cpp:11869

llvm::ARMTargetLowering::emitStoreConditional
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
Definition ARMISelLowering.cpp:21288

llvm::ARMTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition ARMISelLowering.cpp:10371

llvm::ARMTargetLowering::CCAssignFnForReturn
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
Definition ARMISelLowering.cpp:1789

llvm::ARMTargetLowering::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
Definition ARMISelLowering.cpp:1615

llvm::ARMTargetLowering::AdjustInstrPostInstrSelection
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
Definition ARMISelLowering.cpp:12204

llvm::ARMTargetLowering::shouldExpandAtomicRMWInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Definition ARMISelLowering.cpp:21089

llvm::ARMTargetLowering::isExtractSubvectorCheap
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
Definition ARMISelLowering.cpp:20968

llvm::ARMTargetLowering::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
Definition ARMISelLowering.cpp:20727

llvm::ARMTargetLowering::isTruncateFree
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition ARMISelLowering.cpp:19060

llvm::ARMTargetLowering::isShuffleMaskLegal
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
Definition ARMISelLowering.cpp:8174

llvm::ARMTargetLowering::shouldConvertConstantLoadToIntImm
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
Definition ARMISelLowering.cpp:20958

llvm::ARMTargetLowering::lowerInterleavedStore
bool lowerInterleavedStore(Instruction *Store, Value *Mask, ShuffleVectorInst *SVI, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved store into a vstN intrinsic.
Definition ARMISelLowering.cpp:21538

llvm::ARMTargetLowering::canCreateUndefOrPoisonForTargetNode
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
Definition ARMISelLowering.cpp:21201

llvm::ARMTargetLowering::getRegClassFor
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
Definition ARMISelLowering.cpp:1577

llvm::ARMTargetLowering::useLoadStackGuardNode
bool useLoadStackGuardNode(const Module &M) const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
Definition ARMISelLowering.cpp:21142

llvm::ARMTargetLowering::lowerInterleavedLoad
bool lowerInterleavedLoad(Instruction *Load, Value *Mask, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor, const APInt &GapMask) const override
Lower an interleaved load into a vldN intrinsic.
Definition ARMISelLowering.cpp:21390

llvm::ARMTargetLowering::findRepresentativeClass
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
Definition ARMISelLowering.cpp:1522

llvm::ARMTargetLowering::preferSelectsOverBooleanArithmetic
bool preferSelectsOverBooleanArithmetic(EVT VT) const override
Should we prefer selects to doing arithmetic on boolean types.
Definition ARMISelLowering.cpp:1507

llvm::ARMTargetLowering::isZExtFree
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Definition ARMISelLowering.cpp:19077

llvm::ARMTargetLowering::isCheapToSpeculateCttz
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition ARMISelLowering.cpp:21214

llvm::ARMTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition ARMISelLowering.cpp:18728

llvm::ARMTargetLowering::isCheapToSpeculateCtlz
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition ARMISelLowering.cpp:21218

llvm::ARMTargetLowering::targetShrinkDemandedConstant
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
Definition ARMISelLowering.cpp:19929

llvm::ARMTargetLowering::ARMTargetLowering
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
Definition ARMISelLowering.cpp:509

llvm::ARMTargetLowering::isComplexDeinterleavingOperationSupported
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
Definition ARMISelLowering.cpp:21838

llvm::ARMTargetLowering::supportKCFIBundles
bool supportKCFIBundles() const override
Return true if the target supports kcfi operand bundles.
Definition ARMISelLowering.cpp:11803

llvm::ARMTargetLowering::PerformBRCONDCombine
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
Definition ARMISelLowering.cpp:18203

llvm::ARMTargetLowering::getExceptionSelectorRegister
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Definition ARMISelLowering.cpp:21774

llvm::ARMTargetLowering::shouldConvertSplatType
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Definition ARMISelLowering.cpp:19116

llvm::ARMTargetLowering::emitLoadLinked
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Definition ARMISelLowering.cpp:21246

llvm::ARMTargetLowering::makeDMB
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
Definition ARMISelLowering.cpp:20976

llvm::ARMTargetLowering::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
Definition ARMISelLowering.cpp:19445

llvm::ARMTargetLowering::LowerXConstraint
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
Definition ARMISelLowering.cpp:20053

llvm::ARMTargetLowering::getJumpTableEncoding
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
Definition ARMISelLowering.cpp:3283

llvm::ARMTargetLowering::isDesirableToTransformToIntegerOp
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
Definition ARMISelLowering.cpp:18954

llvm::ARMTargetLowering::shouldExpandAtomicCmpXchgInIR
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition ARMISelLowering.cpp:21117

llvm::ARMTargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
Definition ARMISelLowering.cpp:1784

llvm::ARMTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
Definition ARMISelLowering.cpp:18959

llvm::ARMTargetLowering::isLegalInterleavedAccessType
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
Definition ARMISelLowering.cpp:21335

llvm::ARMTargetLowering::isVectorLoadExtDesirable
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
Definition ARMISelLowering.cpp:19129

llvm::ARMTargetLowering::canCombineStoreAndExtract
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy,Idx).
Definition ARMISelLowering.cpp:21172

llvm::ARMTargetLowering::useSoftFloat
bool useSoftFloat() const override
Definition ARMISelLowering.cpp:1503

llvm::ARMTargetLowering::alignLoopsWithOptSize
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
Definition ARMISelLowering.cpp:21323

llvm::ARMTargetLowering::PerformCMOVToBFICombine
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
Definition ARMISelLowering.cpp:17943

llvm::ARMTargetLowering::allowTruncateForTailCall
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
Definition ARMISelLowering.cpp:19159

llvm::ARMTargetLowering::LowerAsmOperandForConstraint
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
Definition ARMISelLowering.cpp:20227

llvm::ARMTargetLowering::getRegForInlineAsmConstraint
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Definition ARMISelLowering.cpp:20148

llvm::ARMTargetLowering::shouldConvertFpToSat
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
Definition ARMISelLowering.cpp:13710

llvm::ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
Definition ARMISelLowering.cpp:21750

llvm::ARMTargetLowering::isOffsetFoldingLegal
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Definition ARMISelLowering.cpp:20691

llvm::ARMTargetLowering::getTM
const ARMBaseTargetMachine & getTM() const
Definition ARMISelLowering.cpp:505

llvm::ARMTargetLowering::isMaskAndCmp0FoldingBeneficial
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
Definition ARMISelLowering.cpp:21222

llvm::ARMTargetLowering::preferredShiftLegalizationStrategy
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
Definition ARMISelLowering.cpp:21238

llvm::ARMTargetLowering::getPostIndexedAddressParts
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Definition ARMISelLowering.cpp:19698

llvm::ARMTargetLowering::emitLeadingFence
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
Definition ARMISelLowering.cpp:21002

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142

llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137

llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition Instructions.h:507

llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition Instructions.h:710

llvm::AtomicRMWInst::isFloatingPointOperation
bool isFloatingPointOperation() const
Definition Instructions.h:899

llvm::Attribute::get
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition Attributes.cpp:95

llvm::BaseIndexOffset::match
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
Definition SelectionDAGAddressAnalysis.cpp:301

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BitVector
Definition BitVector.h:101

llvm::BlockAddress
The address of a basic block.
Definition Constants.h:899

llvm::BranchProbability::getZero
static BranchProbability getZero()
Definition BranchProbability.h:51

llvm::BuildVectorSDNode
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Definition SelectionDAGNodes.h:2199

llvm::BuildVectorSDNode::isConstantSplat
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
Definition SelectionDAG.cpp:13617

llvm::BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int
LLVM_ABI int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
Definition SelectionDAG.cpp:13810

llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition CallingConvLower.h:171

llvm::CCState::getInRegsParamInfo
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
Definition CallingConvLower.h:449

llvm::CCState::getFirstUnallocated
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
Definition CallingConvLower.h:318

llvm::CCState::resultsCompatible
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
Definition CallingConvLower.cpp:264

llvm::CCState::AllocateReg
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
Definition CallingConvLower.h:333

llvm::CCState::CheckReturn
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
Definition CallingConvLower.cpp:99

llvm::CCState::AnalyzeReturn
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
Definition CallingConvLower.cpp:113

llvm::CCState::nextInRegsParam
bool nextInRegsParam()
Definition CallingConvLower.h:467

llvm::CCState::rewindByValRegsInfo
void rewindByValRegsInfo()
Definition CallingConvLower.h:481

llvm::CCState::getInRegsParamsProcessed
unsigned getInRegsParamsProcessed() const
Definition CallingConvLower.h:445

llvm::CCState::getStackSize
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
Definition CallingConvLower.h:246

llvm::CCState::addInRegsParamInfo
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
Definition CallingConvLower.h:460

llvm::CCState::AnalyzeFormalArguments
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition CallingConvLower.cpp:85

llvm::CCState::getInRegsParamsCount
unsigned getInRegsParamsCount() const
Definition CallingConvLower.h:442

llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition CallingConvLower.h:34

llvm::CCValAssign::isRegLoc
bool isRegLoc() const
Definition CallingConvLower.h:123

llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition CallingConvLower.h:129

llvm::CCValAssign::getLocInfo
LocInfo getLocInfo() const
Definition CallingConvLower.h:135

llvm::CCValAssign::BCvt
@ BCvt
Definition CallingConvLower.h:47

llvm::CCValAssign::SExt
@ SExt
Definition CallingConvLower.h:38

llvm::CCValAssign::ZExt
@ ZExt
Definition CallingConvLower.h:39

llvm::CCValAssign::Full
@ Full
Definition CallingConvLower.h:37

llvm::CCValAssign::AExt
@ AExt
Definition CallingConvLower.h:40

llvm::CCValAssign::needsCustom
bool needsCustom() const
Definition CallingConvLower.h:127

llvm::CCValAssign::getValVT
MVT getValVT() const
Definition CallingConvLower.h:121

llvm::CCValAssign::isMemLoc
bool isMemLoc() const
Definition CallingConvLower.h:124

llvm::CCValAssign::getLocMemOffset
int64_t getLocMemOffset() const
Definition CallingConvLower.h:130

llvm::CCValAssign::getValNo
unsigned getValNo() const
Definition CallingConvLower.h:120

llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition CallingConvLower.h:133

llvm::CallBase::isMustTailCall
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Definition Instructions.cpp:344

llvm::CallBase::isIndirectCall
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
Definition Instructions.cpp:335

llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the attributes for this call.
Definition InstrTypes.h:1422

llvm::CallBase::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition InstrTypes.h:1504

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition Instructions.h:1511

llvm::CallInst::isTailCall
bool isTailCall() const
Definition Instructions.h:1622

llvm::ConstantDataArray::get
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:715

llvm::ConstantFPSDNode
Definition SelectionDAGNodes.h:1806

llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition SelectionDAGNodes.h:1819

llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition Constants.h:87

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163

llvm::ConstantPoolSDNode
Definition SelectionDAGNodes.h:2094

llvm::ConstantSDNode
Definition SelectionDAGNodes.h:1751

llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition SelectionDAGNodes.h:1768

llvm::ConstantSDNode::getAPIntValue
const APInt & getAPIntValue() const
Definition SelectionDAGNodes.h:1767

llvm::ConstantSDNode::getSExtValue
int64_t getSExtValue() const
Definition SelectionDAGNodes.h:1769

llvm::ConstantSDNode::isZero
bool isZero() const
Definition SelectionDAGNodes.h:1777

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::DWARFExpression::Operation::getNumOperands
uint64_t getNumOperands() const
Definition DWARFExpression.h:93

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63

llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207

llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition DataLayout.h:208

llvm::DataLayout::getStackAlignment
MaybeAlign getStackAlignment() const
Returns the natural stack alignment, or MaybeAlign() if one wasn't specified.
Definition DataLayout.h:237

llvm::DataLayout::getTypeAllocSize
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition DataLayout.cpp:858

llvm::DataLayout::getPreferredAlign
LLVM_ABI Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
Definition DataLayout.cpp:1042

llvm::DataLayout::getPrivateGlobalPrefix
StringRef getPrivateGlobalPrefix() const
Definition DataLayout.h:295

llvm::DataLayout::getPrefTypeAlign
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition DataLayout.cpp:900

llvm::DebugLoc
A debug info location.
Definition DebugLoc.h:124

llvm::DemandedBits
Definition DemandedBits.h:41

llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178

llvm::DenseMapBase::size
unsigned size() const
Definition DenseMap.h:110

llvm::DenseMapBase::empty
bool empty() const
Definition DenseMap.h:109

llvm::DenseMapBase::begin
iterator begin()
Definition DenseMap.h:78

llvm::DenseMapBase::end
iterator end()
Definition DenseMap.h:81

llvm::DenseMap
Definition DenseMap.h:739

llvm::FastISel
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66

llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition DerivedTypes.h:594

llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition DerivedTypes.h:637

llvm::FixedVectorType::get
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802

llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition SelectionDAGNodes.h:2019

llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition DerivedTypes.h:170

llvm::FunctionCallee::getCallee
Value * getCallee()
Definition DerivedTypes.h:189

llvm::FunctionLoweringInfo
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Definition FunctionLoweringInfo.h:56

llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition DerivedTypes.h:137

llvm::Function
Definition Function.h:64

llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270

llvm::Function::arg_begin
arg_iterator arg_begin()
Definition Function.h:866

llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359

llvm::Function::hasStructRetAttr
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition Function.h:687

llvm::Function::const_arg_iterator
const Argument * const_arg_iterator
Definition Function.h:73

llvm::Function::isVarArg
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227

llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730

llvm::GlobalAddressSDNode
Definition SelectionDAGNodes.h:1980

llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition SelectionDAGNodes.h:1994

llvm::GlobalAlias
Definition GlobalAlias.h:29

llvm::GlobalValue
Definition GlobalValue.h:49

llvm::GlobalValue::isDSOLocal
bool isDSOLocal() const
Definition GlobalValue.h:307

llvm::GlobalValue::hasExternalWeakLinkage
bool hasExternalWeakLinkage() const
Definition GlobalValue.h:531

llvm::GlobalValue::hasDLLImportStorageClass
bool hasDLLImportStorageClass() const
Definition GlobalValue.h:280

llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition GlobalValue.h:663

llvm::GlobalValue::isStrongDefinitionForLinker
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition GlobalValue.h:638

llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788

llvm::Init
Definition Record.h:286

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::hasAtomicStore
LLVM_ABI bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
Definition Instruction.cpp:1094

llvm::LLT
Definition LowLevelType.h:40

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::LLVMContext::diagnose
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition LLVMContext.cpp:247

llvm::LSBaseSDNode::isUnindexed
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
Definition SelectionDAGNodes.h:2549

llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition SelectionDAGNodes.h:2546

llvm::LoadInst
An instruction for reading from memory.
Definition Instructions.h:181

llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition SelectionDAGNodes.h:2558

llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition SelectionDAGNodes.h:2577

llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition MCInstrDesc.h:210

llvm::MVT
Machine Value Type.
Definition MachineValueType.h:36

llvm::MVT::getFloatingPointVT
static MVT getFloatingPointVT(unsigned BitWidth)
Definition MachineValueType.h:438

llvm::MVT::SimpleValueType
SimpleValueType
Definition MachineValueType.h:38

llvm::MVT::integer_fixedlen_vector_valuetypes
static auto integer_fixedlen_vector_valuetypes()
Definition MachineValueType.h:561

llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition MachineValueType.h:56

llvm::MVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition MachineValueType.h:353

llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition MachineValueType.h:301

llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition MachineValueType.h:91

llvm::MVT::getVT
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition ValueTypes.cpp:249

llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition MachineValueType.h:532

llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition MachineValueType.h:315

llvm::MVT::fixedlen_vector_valuetypes
static auto fixedlen_vector_valuetypes()
Definition MachineValueType.h:549

llvm::MVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition MachineValueType.h:349

llvm::MVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
Definition MachineValueType.h:101

llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition MachineValueType.h:458

llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition MachineValueType.h:270

llvm::MVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition MachineValueType.h:81

llvm::MVT::getIntegerVT
static MVT getIntegerVT(unsigned BitWidth)
Definition MachineValueType.h:448

llvm::MVT::fp_valuetypes
static auto fp_valuetypes()
Definition MachineValueType.h:538

llvm::MVT::is64BitVector
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition MachineValueType.h:152

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition MachineBasicBlock.cpp:955

llvm::MachineBasicBlock::isEHPad
bool isEHPad() const
Returns true if the block is a landing pad.
Definition MachineBasicBlock.h:664

llvm::MachineBasicBlock::getFallThrough
LLVM_ABI MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
Definition MachineBasicBlock.cpp:995

llvm::MachineBasicBlock::setCallFrameSize
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
Definition MachineBasicBlock.h:1273

llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition MachineBasicBlock.h:253

llvm::MachineBasicBlock::canFallThrough
LLVM_ABI bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
Definition MachineBasicBlock.cpp:1039

llvm::MachineBasicBlock::addSuccessor
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition MachineBasicBlock.cpp:816

llvm::MachineBasicBlock::begin
iterator begin()
Definition MachineBasicBlock.h:377

llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition MachineBasicBlock.h:336

llvm::MachineBasicBlock::reverse_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Definition MachineBasicBlock.h:343

llvm::MachineBasicBlock::splitAt
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Definition MachineBasicBlock.cpp:1043

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:379

llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition MachineBasicBlock.h:478

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition MachineBasicBlock.h:323

llvm::MachineBasicBlock::erase
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition MachineBasicBlock.cpp:1479

llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition MachineBasicBlock.h:466

llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition MachineBasicBlock.h:460

llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition MachineBasicBlock.h:1156

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:341

llvm::MachineBasicBlock::moveAfter
LLVM_ABI void moveAfter(MachineBasicBlock *NewBefore)
Definition MachineBasicBlock.cpp:698

llvm::MachineBasicBlock::setIsEHPad
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
Definition MachineBasicBlock.h:668

llvm::MachineConstantPool
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
Definition MachineConstantPool.h:117

llvm::MachineConstantPool::getConstantPoolIndex
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
Definition MachineFunction.cpp:1566

llvm::MachineFrameInfo::CreateFixedObject
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition MachineFrameInfo.cpp:83

llvm::MachineFrameInfo::computeMaxCallFrameSize
LLVM_ABI void computeMaxCallFrameSize(MachineFunction &MF, std::vector< MachineBasicBlock::iterator > *FrameSDOps=nullptr)
Computes the maximum size of a callframe.
Definition MachineFrameInfo.cpp:187

llvm::MachineFrameInfo::setAdjustsStack
void setAdjustsStack(bool V)
Definition MachineFrameInfo.h:634

llvm::MachineFrameInfo::setFrameAddressIsTaken
void setFrameAddressIsTaken(bool T)
Definition MachineFrameInfo.h:379

llvm::MachineFrameInfo::setHasTailCall
void setHasTailCall(bool V=true)
Definition MachineFrameInfo.h:663

llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition MachineFrameInfo.h:385

llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition MachineFrameInfo.h:477

llvm::MachineFrameInfo::hasVAStart
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
Definition MachineFrameInfo.h:654

llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition MachineFrameInfo.h:544

llvm::MachineFrameInfo::isFixedObjectIndex
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Definition MachineFrameInfo.h:716

llvm::MachineFrameInfo::getFunctionContextIndex
int getFunctionContextIndex() const
Return the index for the function context object.
Definition MachineFrameInfo.h:371

llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition MachineFunction.h:137

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getFunctionNumber
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
Definition MachineFunction.h:742

llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition MachineFunction.cpp:536

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition MachineFunction.h:778

llvm::MachineFunction::push_back
void push_back(MachineBasicBlock *MBB)
Definition MachineFunction.h:1001

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:772

llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition MachineFunction.cpp:309

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::iterator
BasicBlockListType::iterator iterator
Definition MachineFunction.h:966

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition MachineFunction.h:860

llvm::MachineFunction::getConstantPool
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Definition MachineFunction.h:794

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition MachineFunction.h:853

llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition MachineFunction.cpp:780

llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
Definition MachineFunction.cpp:499

llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition MachineFunction.h:1003

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition MachineFunction.h:758

llvm::MachineInstrBuilder
Definition MachineInstrBuilder.h:98

llvm::MachineInstrBuilder::addExternalSymbol
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:213

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:160

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition MachineInstrBuilder.h:253

llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition MachineInstrBuilder.h:181

llvm::MachineInstrBuilder::addConstantPoolIndex
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:187

llvm::MachineInstrBuilder::addRegMask
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
Definition MachineInstrBuilder.h:226

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition MachineInstrBuilder.h:126

llvm::MachineInstrBuilder::addJumpTableIndex
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:200

llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:175

llvm::MachineInstrBuilder::cloneMemRefs
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Definition MachineInstrBuilder.h:242

llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition MachineInstrBuilder.h:152

llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition MachineInstrBuilder.h:301

llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition MachineInstrBuilder.h:231

llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition MachineInstrBuilder.h:118

llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition MachineInstrBuilder.h:145

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::readsRegister
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
Definition MachineInstr.h:1486

llvm::MachineInstr::definesRegister
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
Definition MachineInstr.h:1515

llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition MachineInstr.h:88

llvm::MachineInstr::mop_iterator
MachineOperand * mop_iterator
iterator/begin/end - Iterate over all operands of a machine instruction.
Definition MachineInstr.h:680

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:594

llvm::MachineJumpTableInfo::createJumpTableIndex
LLVM_ABI unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
Definition MachineFunction.cpp:1377

llvm::MachineJumpTableInfo::EK_Inline
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
Definition MachineJumpTableInfo.h:84

llvm::MachineJumpTableInfo::EK_BlockAddress
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Definition MachineJumpTableInfo.h:55

llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition MachineMemOperand.h:130

llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition MachineMemOperand.h:133

llvm::MachineMemOperand::MOVolatile
@ MOVolatile
The memory access is volatile.
Definition MachineMemOperand.h:141

llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition MachineMemOperand.h:145

llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition MachineMemOperand.h:137

llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition MachineMemOperand.h:143

llvm::MachineMemOperand::MONone
@ MONone
Definition MachineMemOperand.h:135

llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition MachineMemOperand.h:147

llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition MachineMemOperand.h:139

llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition MachineMemOperand.h:227

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:48

llvm::MachineOperand::setIsRenamable
LLVM_ABI void setIsRenamable(bool Val=true)
Definition MachineOperand.cpp:141

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:328

llvm::MachineOperand::setReg
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Definition MachineOperand.cpp:60

llvm::MachineOperand::isDef
bool isDef() const
Definition MachineOperand.h:383

llvm::MachineOperand::isDead
bool isDead() const
Definition MachineOperand.h:393

llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition MachineOperand.h:824

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:368

llvm::MachineOperand::setIsDef
LLVM_ABI void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Definition MachineOperand.cpp:106

llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition MachineOperand.h:842

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MaskedLoadSDNode
This class is used to represent an MLOAD node.
Definition SelectionDAGNodes.h:2865

llvm::MaskedStoreSDNode
This class is used to represent an MSTORE node.
Definition SelectionDAGNodes.h:2894

llvm::MemIntrinsicSDNode
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
Definition SelectionDAGNodes.h:1667

llvm::MemSDNode
This is an abstract virtual class for memory operations.
Definition SelectionDAGNodes.h:1408

llvm::MemSDNode::getBaseAlign
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
Definition SelectionDAGNodes.h:1425

llvm::MemSDNode::getAlign
Align getAlign() const
Definition SelectionDAGNodes.h:1426

llvm::MemSDNode::isVolatile
bool isVolatile() const
Definition SelectionDAGNodes.h:1447

llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition SelectionDAGNodes.h:1456

llvm::MemSDNode::isSimple
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Definition SelectionDAGNodes.h:1485

llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition SelectionDAGNodes.h:1492

llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition SelectionDAGNodes.h:1494

llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition SelectionDAGNodes.h:1519

llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition SelectionDAGNodes.h:1488

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition DerivedTypes.h:722

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition SelectionDAGNodes.h:1236

llvm::SDLoc::getDebugLoc
const DebugLoc & getDebugLoc() const
Definition SelectionDAGNodes.h:1252

llvm::SDNode
Represents one node in the SelectionDAG.
Definition SelectionDAGNodes.h:512

llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition SelectionDAGNodes.h:703

llvm::SDNode::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this node.
Definition SelectionDAGNodes.h:775

llvm::SDNode::isOnlyUserOf
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
Definition SelectionDAG.cpp:13052

llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition SelectionDAGNodes.h:895

llvm::SDNode::getFlags
SDNodeFlags getFlags() const
Definition SelectionDAGNodes.h:1096

llvm::SDNode::hasPredecessorHelper
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
Definition SelectionDAGNodes.h:964

llvm::SDNode::getAsZExtVal
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Definition SelectionDAGNodes.h:1794

llvm::SDNode::use_empty
bool use_empty() const
Return true if there are no uses of this node.
Definition SelectionDAGNodes.h:772

llvm::SDNode::getNumValues
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Definition SelectionDAGNodes.h:1112

llvm::SDNode::getNumOperands
unsigned getNumOperands() const
Return the number of values used by this operation.
Definition SelectionDAGNodes.h:1024

llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition SelectionDAGNodes.h:1045

llvm::SDNode::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Definition SelectionDAGNodes.h:1790

llvm::SDNode::getConstantOperandAPInt
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
Definition SelectionDAGNodes.h:1798

llvm::SDNode::isPredecessorOf
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
Definition SelectionDAGNodes.h:945

llvm::SDNode::hasAnyUseOfValue
LLVM_ABI bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
Definition SelectionDAG.cpp:13041

llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition SelectionDAGNodes.h:1115

llvm::SDNode::setCFIType
void setCFIType(uint32_t Type)
Definition SelectionDAGNodes.h:1108

llvm::SDNode::isUndef
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Definition SelectionDAGNodes.h:710

llvm::SDNode::users
iterator_range< user_iterator > users()
Definition SelectionDAGNodes.h:907

llvm::SDNode::setFlags
void setFlags(SDNodeFlags NewFlags)
Definition SelectionDAGNodes.h:1097

llvm::SDNode::user_begin
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
Definition SelectionDAGNodes.h:903

llvm::SDUse
Represents a use of a SDNode.
Definition SelectionDAGNodes.h:286

llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition SelectionDAGNodes.h:147

llvm::SDValue::isUndef
bool isUndef() const
Definition SelectionDAGNodes.h:1303

llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition SelectionDAGNodes.h:161

llvm::SDValue::hasOneUse
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
Definition SelectionDAGNodes.h:1313

llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition SelectionDAGNodes.h:181

llvm::SDValue::dump
void dump() const
Definition SelectionDAGNodes.h:1321

llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition SelectionDAGNodes.h:1271

llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition SelectionDAGNodes.h:1279

llvm::SDValue::getConstantOperandAPInt
const APInt & getConstantOperandAPInt(unsigned i) const
Definition SelectionDAGNodes.h:1287

llvm::SDValue::getScalarValueSizeInBits
uint64_t getScalarValueSizeInBits() const
Definition SelectionDAGNodes.h:205

llvm::SDValue::getResNo
unsigned getResNo() const
get the index which selects a specific result in the SDNode
Definition SelectionDAGNodes.h:158

llvm::SDValue::getConstantOperandVal
uint64_t getConstantOperandVal(unsigned i) const
Definition SelectionDAGNodes.h:1283

llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition SelectionDAGNodes.h:1267

llvm::SDValue::getNumOperands
unsigned getNumOperands() const
Definition SelectionDAGNodes.h:1275

llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition SelectionDAG.h:229

llvm::SelectionDAG::getExtLoad
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition SelectionDAG.cpp:9767

llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition SelectionDAG.h:758

llvm::SelectionDAG::getStackArgumentTokenFactor
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
Definition SelectionDAG.cpp:8390

llvm::SelectionDAG::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
Definition SelectionDAG.h:500

llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition SelectionDAG.h:813

llvm::SelectionDAG::getMergeValues
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition SelectionDAG.cpp:9510

llvm::SelectionDAG::getVTList
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition SelectionDAG.cpp:11177

llvm::SelectionDAG::getSplatValue
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
Definition SelectionDAG.cpp:3089

llvm::SelectionDAG::getAllOnesConstant
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition SelectionDAG.cpp:1794

llvm::SelectionDAG::getMachineNode
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition SelectionDAG.cpp:11618

llvm::SelectionDAG::getNodeIfExists
LLVM_ABI SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags, bool AllowCommute=false)
Get the specified node if it's already available, or else return NULL.
Definition SelectionDAG.cpp:11765

llvm::SelectionDAG::getSetCC
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition SelectionDAG.h:1324

llvm::SelectionDAG::UnrollVectorOp
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
Definition SelectionDAG.cpp:13262

llvm::SelectionDAG::haveNoCommonBitsSet
LLVM_ABI bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
Definition SelectionDAG.cpp:6190

llvm::SelectionDAG::getRegister
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
Definition SelectionDAG.cpp:2322

llvm::SelectionDAG::getLoad
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition SelectionDAG.cpp:9750

llvm::SelectionDAG::getMemIntrinsicNode
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition SelectionDAG.cpp:9521

llvm::SelectionDAG::addNoMergeSiteInfo
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
Definition SelectionDAG.h:2562

llvm::SelectionDAG::SplitVectorOperand
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
Definition SelectionDAG.h:2473

llvm::SelectionDAG::getNOT
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition SelectionDAG.cpp:1616

llvm::SelectionDAG::getTargetLoweringInfo
const TargetLowering & getTargetLoweringInfo() const
Definition SelectionDAG.h:504

llvm::SelectionDAG::getTargetJumpTable
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition SelectionDAG.h:768

llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition SelectionDAG.h:1179

llvm::SelectionDAG::getCALLSEQ_END
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition SelectionDAG.h:1156

llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition SelectionDAG.h:868

llvm::SelectionDAG::getBitcast
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
Definition SelectionDAG.cpp:2427

llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition SelectionDAG.h:839

llvm::SelectionDAG::getSelect
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition SelectionDAG.h:1353

llvm::SelectionDAG::getNegative
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
Definition SelectionDAG.cpp:1611

llvm::SelectionDAG::setNodeMemRefs
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
Definition SelectionDAG.cpp:11386

llvm::SelectionDAG::getZeroExtendInReg
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition SelectionDAG.cpp:1562

llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition SelectionDAG.h:498

llvm::SelectionDAG::getConstant
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition SelectionDAG.cpp:1660

llvm::SelectionDAG::getSignedTargetConstant
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition SelectionDAG.h:719

llvm::SelectionDAG::getTruncStore
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition SelectionDAG.cpp:9876

llvm::SelectionDAG::ReplaceAllUsesWith
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
Definition SelectionDAG.cpp:12135

llvm::SelectionDAG::SplitVector
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
Definition SelectionDAG.cpp:13546

llvm::SelectionDAG::getStore
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition SelectionDAG.cpp:9800

llvm::SelectionDAG::getSignedConstant
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Definition SelectionDAG.cpp:1788

llvm::SelectionDAG::InferPtrAlign
LLVM_ABI MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
Definition SelectionDAG.cpp:13450

llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition SelectionDAG.h:1144

llvm::SelectionDAG::getTargetExtractSubreg
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition SelectionDAG.cpp:11736

llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition SelectionDAG.h:1363

llvm::SelectionDAG::getSExtOrTrunc
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
Definition SelectionDAG.cpp:1496

llvm::SelectionDAG::isKnownNeverZero
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
Definition SelectionDAG.cpp:5953

llvm::SelectionDAG::getExternalSymbol
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
Definition SelectionDAG.cpp:2046

llvm::SelectionDAG::getTarget
const TargetMachine & getTarget() const
Definition SelectionDAG.h:499

llvm::SelectionDAG::getIntPtrConstant
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition SelectionDAG.cpp:1800

llvm::SelectionDAG::getValueType
LLVM_ABI SDValue getValueType(EVT)
Definition SelectionDAG.cpp:2032

llvm::SelectionDAG::getNode
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition SelectionDAG.cpp:10808

llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition SelectionDAG.h:707

llvm::SelectionDAG::ComputeNumSignBits
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
Definition SelectionDAG.cpp:4596

llvm::SelectionDAG::getVectorIdxConstant
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition SelectionDAG.cpp:1818

llvm::SelectionDAG::ReplaceAllUsesOfValueWith
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition SelectionDAG.cpp:12296

llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition SelectionDAG.h:493

llvm::SelectionDAG::getFrameIndex
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition SelectionDAG.cpp:1919

llvm::SelectionDAG::computeKnownBits
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
Definition SelectionDAG.cpp:3222

llvm::SelectionDAG::getRegisterMask
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
Definition SelectionDAG.cpp:2338

llvm::SelectionDAG::getZExtOrTrunc
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
Definition SelectionDAG.cpp:1502

llvm::SelectionDAG::getCondCode
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
Definition SelectionDAG.cpp:2073

llvm::SelectionDAG::addCallSiteInfo
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
Definition SelectionDAG.h:2513

llvm::SelectionDAG::MaskedValueIsZero
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
Definition SelectionDAG.cpp:2780

llvm::SelectionDAG::getObjectPtrOffset
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
Definition SelectionDAG.h:1129

llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition SelectionDAG.h:511

llvm::SelectionDAG::getTargetExternalSymbol
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
Definition SelectionDAG.cpp:2063

llvm::SelectionDAG::CreateStackTemporary
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
Definition SelectionDAG.cpp:2551

llvm::SelectionDAG::getTargetConstantPool
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition SelectionDAG.h:777

llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition SelectionDAG.h:581

llvm::SelectionDAG::getMaskedLoad
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
Definition SelectionDAG.cpp:10398

llvm::SelectionDAG::getDenormalMode
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
Definition SelectionDAG.h:2577

llvm::SelectionDAG::SplitScalar
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
Definition SelectionDAG.cpp:13486

llvm::SelectionDAG::getVectorShuffle
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
Definition SelectionDAG.cpp:2141

llvm::SelectionDAG::getLogicalNOT
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
Definition SelectionDAG.cpp:1620

llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition Instructions.h:1935

llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition Instructions.h:1976

llvm::ShuffleVectorInst::getShuffleMask
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition Instructions.cpp:1861

llvm::ShuffleVectorInst::isIdentityMask
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
Definition Instructions.cpp:1951

llvm::ShuffleVectorSDNode
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
Definition SelectionDAGNodes.h:1691

llvm::ShuffleVectorSDNode::getMaskElt
int getMaskElt(unsigned Idx) const
Definition SelectionDAGNodes.h:1709

llvm::ShuffleVectorSDNode::getSplatIndex
int getSplatIndex() const
Definition SelectionDAGNodes.h:1716

llvm::ShuffleVectorSDNode::getMask
ArrayRef< int > getMask() const
Definition SelectionDAGNodes.h:1704

llvm::ShuffleVectorSDNode::isSplat
bool isSplat() const
Definition SelectionDAGNodes.h:1714

llvm::ShuffleVectorSDNode::isSplatMask
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
Definition SelectionDAG.cpp:13955

llvm::SmallPtrSetImpl::insert_range
void insert_range(Range &&R)
Definition SmallPtrSet.h:474

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:389

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:527

llvm::SmallSet::empty
bool empty() const
Definition SmallSet.h:168

llvm::SmallSet::erase
bool erase(const T &V)
Definition SmallSet.h:199

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:574

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:674

llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition SmallVector.h:812

llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition SmallVector.h:639

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:417

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition SmallVector.h:273

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:80

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition SmallVector.h:271

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:83

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1203

llvm::StoreInst
An instruction for storing to memory.
Definition Instructions.h:297

llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition SelectionDAGNodes.h:2586

llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition SelectionDAGNodes.h:2605

llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition SelectionDAGNodes.h:2604

llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition SelectionDAGNodes.h:2602

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::StringRef::bytes_end
const unsigned char * bytes_end() const
Definition StringRef.h:127

llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146

llvm::StringRef::bytes_begin
const unsigned char * bytes_begin() const
Definition StringRef.h:124

llvm::StructType::get
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413

llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition TargetInstrInfo.h:114

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:266

llvm::TargetLoweringBase::ArgListEntry
Definition TargetLowering.h:301

llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition TargetLowering.h:1476

llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition TargetLowering.h:2582

llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition TargetLowering.h:2645

llvm::TargetLoweringBase::finalizeLowering
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
Definition TargetLoweringBase.cpp:2436

llvm::TargetLoweringBase::setMaxDivRemBitWidthSupported
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
Definition TargetLowering.h:2874

llvm::TargetLoweringBase::PredictableSelectIsExpensive
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
Definition TargetLowering.h:3950

llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition TargetLowering.h:1733

llvm::TargetLoweringBase::Custom
@ Custom
Definition TargetLowering.h:207

llvm::TargetLoweringBase::Expand
@ Expand
Definition TargetLowering.h:205

llvm::TargetLoweringBase::Promote
@ Promote
Definition TargetLowering.h:204

llvm::TargetLoweringBase::Legal
@ Legal
Definition TargetLowering.h:203

llvm::TargetLoweringBase::LibCall
@ LibCall
Definition TargetLowering.h:206

llvm::TargetLoweringBase::getLibcallCallingConv
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
Definition TargetLowering.h:3652

llvm::TargetLoweringBase::MaxStoresPerMemcpyOptSize
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
Definition TargetLowering.h:3911

llvm::TargetLoweringBase::getRegClassFor
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
Definition TargetLowering.h:1062

llvm::TargetLoweringBase::ShiftLegalizationStrategy
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
Definition TargetLowering.h:1097

llvm::TargetLoweringBase::ShiftLegalizationStrategy::LowerToLibcall
@ LowerToLibcall
Definition TargetLowering.h:1100

llvm::TargetLoweringBase::setMinStackArgumentAlignment
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
Definition TargetLowering.h:2860

llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition TargetLowering.h:374

llvm::TargetLoweringBase::TypeSoftPromoteHalf
@ TypeSoftPromoteHalf
Definition TargetLowering.h:222

llvm::TargetLoweringBase::setIndexedMaskedLoadAction
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
Definition TargetLowering.h:2752

llvm::TargetLoweringBase::setIndexedLoadAction
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
Definition TargetLowering.h:2718

llvm::TargetLoweringBase::setPrefLoopAlignment
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
Definition TargetLowering.h:2854

llvm::TargetLoweringBase::setMaxAtomicSizeInBitsSupported
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Definition TargetLowering.h:2868

llvm::TargetLoweringBase::getSchedulingPreference
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
Definition TargetLowering.h:1049

llvm::TargetLoweringBase::setMinFunctionAlignment
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
Definition TargetLowering.h:2841

llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition TargetLowering.h:3896

llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition TargetLowering.h:2568

llvm::TargetLoweringBase::MaxStoresPerMemmove
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
Definition TargetLowering.h:3944

llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition TargetLoweringBase.cpp:1424

llvm::TargetLoweringBase::MaxStoresPerMemmoveOptSize
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
Definition TargetLowering.h:3946

llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition TargetLowering.h:2628

llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition TargetLowering.h:1113

llvm::TargetLoweringBase::setIndexedStoreAction
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
Definition TargetLowering.h:2735

llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition TargetLowering.h:381

llvm::TargetLoweringBase::setPrefFunctionAlignment
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
Definition TargetLowering.h:2847

llvm::TargetLoweringBase::getMaxSupportedInterleaveFactor
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
Definition TargetLowering.h:3234

llvm::TargetLoweringBase::setIndexedMaskedStoreAction
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
Definition TargetLowering.h:2762

llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition TargetLowering.h:3894

llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition TargetLowering.h:2708

llvm::TargetLoweringBase::ZeroOrOneBooleanContent
@ ZeroOrOneBooleanContent
Definition TargetLowering.h:239

llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition TargetLowering.h:240

llvm::TargetLoweringBase::preferredShiftLegalizationStrategy
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
Definition TargetLowering.h:1103

llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition TargetLowering.h:1359

llvm::TargetLoweringBase::setStackPointerRegisterToSaveRestore
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
Definition TargetLowering.h:2603

llvm::TargetLoweringBase::AddPromotedToType
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
Definition TargetLowering.h:2812

llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition TargetLowering.h:256

llvm::TargetLoweringBase::AtomicExpansionKind::LLSC
@ LLSC
Definition TargetLowering.h:260

llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg
Definition TargetLowering.h:264

llvm::TargetLoweringBase::AtomicExpansionKind::LLOnly
@ LLOnly
Definition TargetLowering.h:262

llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None
Definition TargetLowering.h:257

llvm::TargetLoweringBase::AtomicExpansionKind::Expand
@ Expand
Definition TargetLowering.h:270

llvm::TargetLoweringBase::findRepresentativeClass
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
Definition TargetLoweringBase.cpp:1397

llvm::TargetLoweringBase::getLibcallImpl
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
Definition TargetLowering.h:3616

llvm::TargetLoweringBase::getLibcallImplName
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
Definition TargetLowering.h:3627

llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition TargetLowering.h:2833

llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition TargetLowering.h:2662

llvm::TargetLoweringBase::setLibcallImpl
void setLibcallImpl(RTLIB::Libcall Call, RTLIB::LibcallImpl Impl)
Definition TargetLowering.h:3611

llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition TargetLowering.h:1160

llvm::TargetLoweringBase::getLibcallName
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
Definition TargetLowering.h:3622

llvm::TargetLoweringBase::ArgListTy
std::vector< ArgListEntry > ArgListTy
Definition TargetLowering.h:341

llvm::TargetLoweringBase::MaxStoresPerMemcpy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
Definition TargetLowering.h:3909

llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition TargetLowering.h:2587

llvm::TargetLoweringBase::insertSSPDeclarations
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
Definition TargetLoweringBase.cpp:2136

llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition TargetLowering.h:3973

llvm::TargetLowering::ConstraintType
ConstraintType
Definition TargetLowering.h:5155

llvm::TargetLowering::C_RegisterClass
@ C_RegisterClass
Definition TargetLowering.h:5157

llvm::TargetLowering::C_Memory
@ C_Memory
Definition TargetLowering.h:5158

llvm::TargetLowering::C_Immediate
@ C_Immediate
Definition TargetLowering.h:5160

llvm::TargetLowering::SimplifyDemandedVectorElts
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
Definition TargetLowering.cpp:3171

llvm::TargetLowering::softenSetCCOperands
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
Definition TargetLowering.cpp:312

llvm::TargetLowering::makeLibCall
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Definition TargetLowering.cpp:155

llvm::TargetLowering::getConstraintType
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
Definition TargetLowering.cpp:5721

llvm::TargetLowering::parametersInCSRMatch
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
Definition TargetLowering.cpp:91

llvm::TargetLowering::LowerToTLSEmulatedModel
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
Definition TargetLowering.cpp:10716

llvm::TargetLowering::ConstraintWeight
ConstraintWeight
Definition TargetLowering.h:5165

llvm::TargetLowering::CW_Invalid
@ CW_Invalid
Definition TargetLowering.h:5167

llvm::TargetLowering::CW_SpecificReg
@ CW_SpecificReg
Definition TargetLowering.h:5174

llvm::TargetLowering::CW_Register
@ CW_Register
Definition TargetLowering.h:5175

llvm::TargetLowering::CW_Default
@ CW_Default
Definition TargetLowering.h:5178

llvm::TargetLowering::LowerCallTo
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
Definition SelectionDAGBuilder.cpp:11134

llvm::TargetLowering::expandDIVREMByConstant
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
Definition TargetLowering.cpp:7999

llvm::TargetLowering::isPositionIndependent
bool isPositionIndependent() const
Definition TargetLowering.cpp:55

llvm::TargetLowering::getSingleConstraintMatchWeight
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
Definition TargetLowering.cpp:6165

llvm::TargetLowering::buildLegalVectorShuffle
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
Definition TargetLowering.cpp:3978

llvm::TargetLowering::getRegForInlineAsmConstraint
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
Definition TargetLowering.cpp:5865

llvm::TargetLowering::SimplifyDemandedBits
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Definition TargetLowering.cpp:1162

llvm::TargetLowering::SimplifyDemandedBitsForTargetNode
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
Definition TargetLowering.cpp:3951

llvm::TargetLowering::TargetLowering
TargetLowering(const TargetLowering &)=delete

llvm::TargetLowering::isConstTrueVal
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
Definition TargetLowering.cpp:4063

llvm::TargetLowering::getRoundingControlRegisters
virtual ArrayRef< MCPhysReg > getRoundingControlRegisters() const
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
Definition TargetLowering.h:5086

llvm::TargetLowering::LowerAsmOperandForConstraint
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Definition TargetLowering.cpp:5783

llvm::TargetLowering::canCreateUndefOrPoisonForTargetNode
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Definition TargetLowering.cpp:4019

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition TargetMachine.h:83

llvm::TargetMachine::getTLSModel
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
Definition TargetMachine.cpp:262

llvm::TargetMachine::getExceptionModel
ExceptionHandling getExceptionModel() const
Return the ExceptionHandling to use, considering TargetOptions and the Triple's default.
Definition TargetMachine.h:248

llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition TargetMachine.h:132

llvm::TargetMachine::useEmulatedTLS
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Definition TargetMachine.cpp:259

llvm::TargetMachine::getSubtargetImpl
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition TargetMachine.h:139

llvm::TargetMachine::Options
TargetOptions Options
Definition TargetMachine.h:124

llvm::TargetOptions::EnableFastISel
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
Definition TargetOptions.h:220

llvm::TargetOptions::GuaranteedTailCallOpt
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
Definition TargetOptions.h:209

llvm::TargetOptions::EmitCallGraphSection
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
Definition TargetOptions.h:311

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition TargetRegisterInfo.h:242

llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.

llvm::Target
Target - Wrapper for Target specific information.
Definition TargetRegistry.h:146

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47

llvm::Triple::getObjectFormat
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition Triple.h:439

llvm::Triple::MachO
@ MachO
Definition Triple.h:326

llvm::Triple::ELF
@ ELF
Definition Triple.h:324

llvm::Triple::COFF
@ COFF
Definition Triple.h:322

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45

llvm::Type::getInt64Ty
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273

llvm::Type::getInt32Ty
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267

llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153

llvm::Type::getVoidTy
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280

llvm::Type::getInt8Ty
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352

llvm::Type::getPrimitiveSizeInBits
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197

llvm::Type::getInt16Ty
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:295

llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128

llvm::Type::getScalarSizeInBits
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230

llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240

llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition Use.h:35

llvm::Use::getOperandNo
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35

llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61

llvm::User
Definition User.h:44

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:232

llvm::User::getNumOperands
unsigned getNumOperands() const
Definition User.h:254

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:430

llvm::VectorType::getElementType
Type * getElementType() const
Definition DerivedTypes.h:463

llvm::cl::opt
Definition CommandLine.h:1455

llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202

llvm::detail::DenseSetImpl::contains
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

llvm::iterator_range::end
IteratorT end() const
Definition iterator_range.h:58

uint32_t

uint64_t

uint8_t

unsigned

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

TargetMachine.h

false
Definition MachinePipeliner.cpp:244

llvm::AArch64CC::AL
@ AL
Definition AArch64BaseInfo.h:269

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition AMDGPUMetadata.h:396

llvm::ARMCC::getOppositeCondition
static CondCodes getOppositeCondition(CondCodes CC)
Definition ARMBaseInfo.h:48

llvm::ARMCC::CondCodes
CondCodes
Definition ARMBaseInfo.h:30

llvm::ARMCC::HS
@ HS
Definition ARMBaseInfo.h:33

llvm::ARMCC::EQ
@ EQ
Definition ARMBaseInfo.h:31

llvm::ARMCC::LE
@ LE
Definition ARMBaseInfo.h:44

llvm::ARMCC::LS
@ LS
Definition ARMBaseInfo.h:40

llvm::ARMCC::LT
@ LT
Definition ARMBaseInfo.h:42

llvm::ARMCC::HI
@ HI
Definition ARMBaseInfo.h:39

llvm::ARMCC::GE
@ GE
Definition ARMBaseInfo.h:41

llvm::ARMCC::LO
@ LO
Definition ARMBaseInfo.h:34

llvm::ARMCC::PL
@ PL
Definition ARMBaseInfo.h:36

llvm::ARMCC::AL
@ AL
Definition ARMBaseInfo.h:45

llvm::ARMCC::VS
@ VS
Definition ARMBaseInfo.h:37

llvm::ARMCC::GT
@ GT
Definition ARMBaseInfo.h:43

llvm::ARMCC::VC
@ VC
Definition ARMBaseInfo.h:38

llvm::ARMCC::NE
@ NE
Definition ARMBaseInfo.h:32

llvm::ARMCC::MI
@ MI
Definition ARMBaseInfo.h:35

llvm::ARMCP::CPLSDA
@ CPLSDA
Definition ARMConstantPoolValue.h:41

llvm::ARMCP::CPBlockAddress
@ CPBlockAddress
Definition ARMConstantPoolValue.h:40

llvm::ARMCP::CPValue
@ CPValue
Definition ARMConstantPoolValue.h:38

llvm::ARMCP::SECREL
@ SECREL
Thread Pointer Offset.
Definition ARMConstantPoolValue.h:52

llvm::ARMCP::SBREL
@ SBREL
Section Relative (Windows TLS)
Definition ARMConstantPoolValue.h:53

llvm::ARMCP::TLSGD
@ TLSGD
None.
Definition ARMConstantPoolValue.h:48

llvm::ARMCP::GOTTPOFF
@ GOTTPOFF
Global Offset Table, PC Relative.
Definition ARMConstantPoolValue.h:50

llvm::ARMCP::TPOFF
@ TPOFF
Global Offset Table, Thread Pointer Offset.
Definition ARMConstantPoolValue.h:51

llvm::ARMII::TOF
TOF
Target Operand Flag enum.
Definition ARMBaseInfo.h:242

llvm::ARMII::MO_NONLAZY
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition ARMBaseInfo.h:288

llvm::ARMII::MO_SBREL
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition ARMBaseInfo.h:270

llvm::ARMII::MO_DLLIMPORT
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition ARMBaseInfo.h:275

llvm::ARMII::MO_NO_FLAG
@ MO_NO_FLAG
Definition ARMBaseInfo.h:246

llvm::ARMII::MO_GOT
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition ARMBaseInfo.h:266

llvm::ARMII::MO_COFFSTUB
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition ARMBaseInfo.h:263

llvm::ARMISD::VLD1x3_UPD
@ VLD1x3_UPD
Definition ARMSelectionDAGInfo.h:60

llvm::ARMISD::VST3LN_UPD
@ VST3LN_UPD
Definition ARMSelectionDAGInfo.h:67

llvm::ARMISD::VST4LN_UPD
@ VST4LN_UPD
Definition ARMSelectionDAGInfo.h:68

llvm::ARMISD::VLD3_UPD
@ VLD3_UPD
Definition ARMSelectionDAGInfo.h:50

llvm::ARMISD::VST1x4_UPD
@ VST1x4_UPD
Definition ARMSelectionDAGInfo.h:71

llvm::ARMISD::VLD1x4_UPD
@ VLD1x4_UPD
Definition ARMSelectionDAGInfo.h:61

llvm::ARMISD::VLD2_UPD
@ VLD2_UPD
Definition ARMSelectionDAGInfo.h:49

llvm::ARMISD::VLD1x2_UPD
@ VLD1x2_UPD
Definition ARMSelectionDAGInfo.h:59

llvm::ARMISD::VST3_UPD
@ VST3_UPD
Definition ARMSelectionDAGInfo.h:65

llvm::ARMISD::VST2LN_UPD
@ VST2LN_UPD
Definition ARMSelectionDAGInfo.h:66

llvm::ARMISD::VLD3DUP_UPD
@ VLD3DUP_UPD
Definition ARMSelectionDAGInfo.h:57

llvm::ARMISD::VST1_UPD
@ VST1_UPD
Definition ARMSelectionDAGInfo.h:64

llvm::ARMISD::VLD4DUP
@ VLD4DUP
Definition ARMSelectionDAGInfo.h:45

llvm::ARMISD::VLD1DUP
@ VLD1DUP
Definition ARMSelectionDAGInfo.h:42

llvm::ARMISD::VLD1DUP_UPD
@ VLD1DUP_UPD
Definition ARMSelectionDAGInfo.h:55

llvm::ARMISD::VLD2DUP
@ VLD2DUP
Definition ARMSelectionDAGInfo.h:43

llvm::ARMISD::VLD4DUP_UPD
@ VLD4DUP_UPD
Definition ARMSelectionDAGInfo.h:58

llvm::ARMISD::MVESEXT
@ MVESEXT
Definition ARMSelectionDAGInfo.h:29

llvm::ARMISD::BUILD_VECTOR
@ BUILD_VECTOR
Definition ARMSelectionDAGInfo.h:38

llvm::ARMISD::VLD4_UPD
@ VLD4_UPD
Definition ARMSelectionDAGInfo.h:51

llvm::ARMISD::MVEZEXT
@ MVEZEXT
Definition ARMSelectionDAGInfo.h:30

llvm::ARMISD::VLD3LN_UPD
@ VLD3LN_UPD
Definition ARMSelectionDAGInfo.h:53

llvm::ARMISD::VST1x3_UPD
@ VST1x3_UPD
Definition ARMSelectionDAGInfo.h:70

llvm::ARMISD::VLD1_UPD
@ VLD1_UPD
Definition ARMSelectionDAGInfo.h:48

llvm::ARMISD::VLD3DUP
@ VLD3DUP
Definition ARMSelectionDAGInfo.h:44

llvm::ARMISD::VST1x2_UPD
@ VST1x2_UPD
Definition ARMSelectionDAGInfo.h:69

llvm::ARMISD::VLD2DUP_UPD
@ VLD2DUP_UPD
Definition ARMSelectionDAGInfo.h:56

llvm::ARMISD::VLD2LN_UPD
@ VLD2LN_UPD
Definition ARMSelectionDAGInfo.h:52

llvm::ARMISD::VLD4LN_UPD
@ VLD4LN_UPD
Definition ARMSelectionDAGInfo.h:54

llvm::ARMISD::MVETRUNC
@ MVETRUNC
Definition ARMSelectionDAGInfo.h:31

llvm::ARMVCC::Then
@ Then
Definition ARMBaseInfo.h:91

llvm::ARMVCC::None
@ None
Definition ARMBaseInfo.h:90

llvm::ARM_AM::getShiftOpcForNode
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
Definition ARMSelectionDAGInfo.h:78

llvm::ARM_AM::getSOImmVal
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
Definition ARMAddressingModes.h:149

llvm::ARM_AM::getFP32Imm
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
Definition ARMAddressingModes.h:692

llvm::ARM_AM::decodeVMOVModImm
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
Definition ARMAddressingModes.h:544

llvm::ARM_AM::getAM2Offset
unsigned getAM2Offset(unsigned AM2Opc)
Definition ARMAddressingModes.h:406

llvm::ARM_AM::isThumbImmShiftedVal
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
Definition ARMAddressingModes.h:221

llvm::ARM_AM::getT2SOImmVal
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
Definition ARMAddressingModes.h:307

llvm::ARM_AM::createVMOVModImm
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
Definition ARMAddressingModes.h:533

llvm::ARM_AM::getFP64Imm
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
Definition ARMAddressingModes.h:720

llvm::ARM_AM::sub
@ sub
Definition ARMAddressingModes.h:38

llvm::ARM_AM::getFP16Imm
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
Definition ARMAddressingModes.h:654

llvm::ARM_AM::getSORegOpc
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
Definition ARMAddressingModes.h:98

llvm::ARM_AM::ShiftOpc
ShiftOpc
Definition ARMAddressingModes.h:27

llvm::ARM_AM::no_shift
@ no_shift
Definition ARMAddressingModes.h:28

llvm::ARM_AM::lsl
@ lsl
Definition ARMAddressingModes.h:30

llvm::ARM_AM::getFP32FP16Imm
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
Definition ARMAddressingModes.h:679

llvm::ARM_AM::getAM2Op
AddrOpc getAM2Op(unsigned AM2Opc)
Definition ARMAddressingModes.h:409

llvm::ARM_MB::MemBOpt
MemBOpt
Definition ARMBaseInfo.h:58

llvm::ARM_MB::SY
@ SY
Definition ARMBaseInfo.h:74

llvm::ARM_MB::ISH
@ ISH
Definition ARMBaseInfo.h:70

llvm::ARM_MB::LD
@ LD
Definition ARMBaseInfo.h:72

llvm::ARM_MB::ISHST
@ ISHST
Definition ARMBaseInfo.h:69

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::ARM::isBitFieldInvertedMask
bool isBitFieldInvertedMask(unsigned v)
Definition ARMISelLowering.cpp:20696

llvm::ARM::FPStatusBits
const unsigned FPStatusBits
Definition ARMISelLowering.h:70

llvm::ARM::PredBlockMask::TT
@ TT
Definition ARMBaseInfo.h:107

llvm::ARM::FPReservedBits
const unsigned FPReservedBits
Definition ARMISelLowering.h:74

llvm::ARM::rmMask
@ rmMask
Definition ARMISelLowering.h:62

llvm::ARM::RoundingBitsPos
const unsigned RoundingBitsPos
Definition ARMISelLowering.h:66

llvm::ARM::createFastISel
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
Definition ARMFastISel.cpp:3153

llvm::ARM::ProfileKind::M
@ M
Definition ARMTargetParser.h:171

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::COFF::Entry
@ Entry
Definition COFF.h:862

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::Swift
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69

llvm::CallingConv::ARM_APCS
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition CallingConv.h:107

llvm::CallingConv::CFGuard_Check
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition CallingConv.h:82

llvm::CallingConv::PreserveMost
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63

llvm::CallingConv::ARM_AAPCS
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition CallingConv.h:111

llvm::CallingConv::CXX_FAST_TLS
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72

llvm::CallingConv::GHC
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50

llvm::CallingConv::PreserveAll
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66

llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41

llvm::CallingConv::Tail
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76

llvm::CallingConv::SwiftTail
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87

llvm::CallingConv::ARM_AAPCS_VFP
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition CallingConv.h:114

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::CodeModel::Medium
@ Medium
Definition CodeGen.h:31

llvm::CodeModel::Large
@ Large
Definition CodeGen.h:31

llvm::CodeModel::Tiny
@ Tiny
Definition CodeGen.h:31

llvm::CodeModel::Small
@ Small
Definition CodeGen.h:31

llvm::CodeModel::Kernel
@ Kernel
Definition CodeGen.h:31

llvm::FloatABI::Hard
@ Hard
Definition CodeGen.h:69

llvm::GraphProgram::Name
Name
Definition GraphWriter.h:51

llvm::HexagonISD::CP
@ CP
Definition HexagonISelLowering.h:54

llvm::HexagonISD::JT
@ JT
Definition HexagonISelLowering.h:53

llvm::IRSimilarity::Legal
@ Legal
Definition IRSimilarityIdentifier.h:77

llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition SelectionDAGNodes.h:3323

llvm::ISD::NodeType
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41

llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807

llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256

llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780

llvm::ISD::STRICT_FSETCC
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504

llvm::ISD::STRICT_FSIN
@ STRICT_FSIN
Definition ISDOpcodes.h:435

llvm::ISD::EH_SJLJ_LONGJMP
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163

llvm::ISD::SREM
@ SREM
Definition ISDOpcodes.h:264

llvm::ISD::FGETSIGN
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:531

llvm::ISD::SMUL_LOHI
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270

llvm::ISD::UDIV
@ UDIV
Definition ISDOpcodes.h:263

llvm::ISD::INSERT_SUBVECTOR
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593

llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition ISDOpcodes.h:869

llvm::ISD::UMIN
@ UMIN
Definition ISDOpcodes.h:726

llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771

llvm::ISD::ROTR
@ ROTR
Definition ISDOpcodes.h:766

llvm::ISD::ConstantFP
@ ConstantFP
Definition ISDOpcodes.h:87

llvm::ISD::STRICT_FATAN2
@ STRICT_FATAN2
Definition ISDOpcodes.h:441

llvm::ISD::UADDO
@ UADDO
Definition ISDOpcodes.h:344

llvm::ISD::SDIV
@ SDIV
Definition ISDOpcodes.h:262

llvm::ISD::STRICT_FCEIL
@ STRICT_FCEIL
Definition ISDOpcodes.h:454

llvm::ISD::STRICT_FTANH
@ STRICT_FTANH
Definition ISDOpcodes.h:444

llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259

llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition ISDOpcodes.h:425

llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841

llvm::ISD::FSUB
@ FSUB
Definition ISDOpcodes.h:411

llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511

llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215

llvm::ISD::RETURNADDR
@ RETURNADDR
Definition ISDOpcodes.h:111

llvm::ISD::EH_SJLJ_SETUP_DISPATCH
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition ISDOpcodes.h:167

llvm::ISD::GlobalAddress
@ GlobalAddress
Definition ISDOpcodes.h:88

llvm::ISD::SINT_TO_FP
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868

llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577

llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410

llvm::ISD::ABS
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744

llvm::ISD::UDIVREM
@ UDIVREM
Definition ISDOpcodes.h:276

llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275

llvm::ISD::SRL
@ SRL
Definition ISDOpcodes.h:764

llvm::ISD::STRICT_FSETCCS
@ STRICT_FSETCCS
Definition ISDOpcodes.h:505

llvm::ISD::STRICT_FLOG2
@ STRICT_FLOG2
Definition ISDOpcodes.h:449

llvm::ISD::STRICT_FDIV
@ STRICT_FDIV
Definition ISDOpcodes.h:423

llvm::ISD::BUILD_PAIR
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249

llvm::ISD::STRICT_FSQRT
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431

llvm::ISD::BUILTIN_OP_END
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition ISDOpcodes.h:1584

llvm::ISD::UCMP
@ UCMP
Definition ISDOpcodes.h:733

llvm::ISD::GlobalTLSAddress
@ GlobalTLSAddress
Definition ISDOpcodes.h:89

llvm::ISD::SRA
@ SRA
Definition ISDOpcodes.h:763

llvm::ISD::FrameIndex
@ FrameIndex
Definition ISDOpcodes.h:90

llvm::ISD::STRICT_FMUL
@ STRICT_FMUL
Definition ISDOpcodes.h:422

llvm::ISD::USUBO
@ USUBO
Definition ISDOpcodes.h:348

llvm::ISD::AVGFLOORU
@ AVGFLOORU
Definition ISDOpcodes.h:708

llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832

llvm::ISD::STRICT_FASIN
@ STRICT_FASIN
Definition ISDOpcodes.h:438

llvm::ISD::AVGCEILS
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:712

llvm::ISD::SCALAR_TO_VECTOR
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662

llvm::ISD::UADDSAT
@ UADDSAT
Definition ISDOpcodes.h:361

llvm::ISD::STRICT_FATAN
@ STRICT_FATAN
Definition ISDOpcodes.h:440

llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779

llvm::ISD::WRITE_REGISTER
@ WRITE_REGISTER
Definition ISDOpcodes.h:135

llvm::ISD::STRICT_FPOW
@ STRICT_FPOW
Definition ISDOpcodes.h:432

llvm::ISD::SETCCCARRY
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition ISDOpcodes.h:815

llvm::ISD::STRICT_LROUND
@ STRICT_LROUND
Definition ISDOpcodes.h:459

llvm::ISD::CTTZ
@ CTTZ
Definition ISDOpcodes.h:772

llvm::ISD::SSUBO
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347

llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition ISDOpcodes.h:915

llvm::ISD::OR
@ OR
Definition ISDOpcodes.h:737

llvm::ISD::SSUBSAT
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369

llvm::ISD::UMULO
@ UMULO
Definition ISDOpcodes.h:352

llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition ISDOpcodes.h:822

llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784

llvm::ISD::UMUL_LOHI
@ UMUL_LOHI
Definition ISDOpcodes.h:271

llvm::ISD::STRICT_FPOWI
@ STRICT_FPOWI
Definition ISDOpcodes.h:433

llvm::ISD::UNDEF
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228

llvm::ISD::RegisterMask
@ RegisterMask
Definition ISDOpcodes.h:85

llvm::ISD::EXTRACT_ELEMENT
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242

llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81

llvm::ISD::AVGCEILU
@ AVGCEILU
Definition ISDOpcodes.h:713

llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225

llvm::ISD::SADDO
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343

llvm::ISD::STRICT_FTRUNC
@ STRICT_FTRUNC
Definition ISDOpcodes.h:458

llvm::ISD::USUBSAT
@ USUBSAT
Definition ISDOpcodes.h:370

llvm::ISD::GET_ROUNDING
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958

llvm::ISD::MULHU
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701

llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762

llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642

llvm::ISD::EXTRACT_SUBVECTOR
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607

llvm::ISD::READ_REGISTER
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134

llvm::ISD::XOR
@ XOR
Definition ISDOpcodes.h:738

llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569

llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219

llvm::ISD::ZERO_EXTEND
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838

llvm::ISD::FP_TO_UINT_SAT
@ FP_TO_UINT_SAT
Definition ISDOpcodes.h:934

llvm::ISD::CTPOP
@ CTPOP
Definition ISDOpcodes.h:774

llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799

llvm::ISD::FMUL
@ FMUL
Definition ISDOpcodes.h:412

llvm::ISD::STRICT_FSINH
@ STRICT_FSINH
Definition ISDOpcodes.h:442

llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition ISDOpcodes.h:823

llvm::ISD::SUB
@ SUB
Definition ISDOpcodes.h:260

llvm::ISD::MULHS
@ MULHS
Definition ISDOpcodes.h:702

llvm::ISD::SMULO
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:351

llvm::ISD::STRICT_LRINT
@ STRICT_LRINT
Definition ISDOpcodes.h:461

llvm::ISD::ConstantPool
@ ConstantPool
Definition ISDOpcodes.h:92

llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876

llvm::ISD::SMIN
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724

llvm::ISD::MCSymbol
@ MCSymbol
Definition ISDOpcodes.h:188

llvm::ISD::Constant
@ Constant
Definition ISDOpcodes.h:86

llvm::ISD::STRICT_FREM
@ STRICT_FREM
Definition ISDOpcodes.h:424

llvm::ISD::STRICT_FROUND
@ STRICT_FROUND
Definition ISDOpcodes.h:456

llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793

llvm::ISD::UADDO_CARRY
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323

llvm::ISD::STRICT_FFLOOR
@ STRICT_FFLOOR
Definition ISDOpcodes.h:455

llvm::ISD::STRICT_FROUNDEVEN
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457

llvm::ISD::STRICT_FEXP
@ STRICT_FEXP
Definition ISDOpcodes.h:445

llvm::ISD::FDIV
@ FDIV
Definition ISDOpcodes.h:413

llvm::ISD::FRAMEADDR
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110

llvm::ISD::FREM
@ FREM
Definition ISDOpcodes.h:414

llvm::ISD::STRICT_FP_TO_UINT
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471

llvm::ISD::STRICT_FP_ROUND
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493

llvm::ISD::STRICT_FP_TO_SINT
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470

llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914

llvm::ISD::STRICT_FP_EXTEND
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498

llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736

llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200

llvm::ISD::USUBO_CARRY
@ USUBO_CARRY
Definition ISDOpcodes.h:324

llvm::ISD::STRICT_FCOSH
@ STRICT_FCOSH
Definition ISDOpcodes.h:443

llvm::ISD::SCMP
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition ISDOpcodes.h:732

llvm::ISD::AVGFLOORS
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:707

llvm::ISD::STRICT_FADD
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420

llvm::ISD::UREM
@ UREM
Definition ISDOpcodes.h:265

llvm::ISD::STRICT_FLOG10
@ STRICT_FLOG10
Definition ISDOpcodes.h:448

llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558

llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53

llvm::ISD::STRICT_LLRINT
@ STRICT_LLRINT
Definition ISDOpcodes.h:462

llvm::ISD::STRICT_FEXP2
@ STRICT_FEXP2
Definition ISDOpcodes.h:446

llvm::ISD::STRICT_FSUB
@ STRICT_FSUB
Definition ISDOpcodes.h:421

llvm::ISD::STRICT_FLOG
@ STRICT_FLOG
Definition ISDOpcodes.h:447

llvm::ISD::STRICT_FTAN
@ STRICT_FTAN
Definition ISDOpcodes.h:437

llvm::ISD::MUL
@ MUL
Definition ISDOpcodes.h:261

llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947

llvm::ISD::CTLZ
@ CTLZ
Definition ISDOpcodes.h:773

llvm::ISD::SPONENTRY
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition ISDOpcodes.h:122

llvm::ISD::STRICT_FLDEXP
@ STRICT_FLDEXP
Definition ISDOpcodes.h:434

llvm::ISD::STRICT_LLROUND
@ STRICT_LLROUND
Definition ISDOpcodes.h:460

llvm::ISD::STRICT_FCOS
@ STRICT_FCOS
Definition ISDOpcodes.h:436

llvm::ISD::STRICT_FNEARBYINT
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451

llvm::ISD::FP_TO_SINT_SAT
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933

llvm::ISD::EH_SJLJ_SETJMP
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157

llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844

llvm::ISD::ROTL
@ ROTL
Definition ISDOpcodes.h:765

llvm::ISD::BlockAddress
@ BlockAddress
Definition ISDOpcodes.h:94

llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821

llvm::ISD::BITREVERSE
@ BITREVERSE
Definition ISDOpcodes.h:775

llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527

llvm::ISD::SADDSAT
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360

llvm::ISD::AssertZext
@ AssertZext
Definition ISDOpcodes.h:63

llvm::ISD::SMAX
@ SMAX
Definition ISDOpcodes.h:725

llvm::ISD::STRICT_FRINT
@ STRICT_FRINT
Definition ISDOpcodes.h:450

llvm::ISD::UMAX
@ UMAX
Definition ISDOpcodes.h:727

llvm::ISD::ABDS
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719

llvm::ISD::INTRINSIC_W_CHAIN
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208

llvm::ISD::STRICT_FACOS
@ STRICT_FACOS
Definition ISDOpcodes.h:439

llvm::ISD::ABDU
@ ABDU
Definition ISDOpcodes.h:720

llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549

llvm::ISD::isNormalStore
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Definition SelectionDAGNodes.h:3354

llvm::ISD::isZEXTLoad
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
Definition SelectionDAGNodes.h:3341

llvm::ISD::getSetCCInverse
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition SelectionDAG.cpp:627

llvm::ISD::isEXTLoad
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
Definition SelectionDAGNodes.h:3329

llvm::ISD::getSetCCSwappedOperands
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition SelectionDAG.cpp:604

llvm::ISD::isBuildVectorAllZeros
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
Definition SelectionDAG.cpp:270

llvm::ISD::isSignedIntSetCC
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition ISDOpcodes.h:1734

llvm::ISD::isConstantSplatVector
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
Definition SelectionDAG.cpp:150

llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition ISDOpcodes.h:1650

llvm::ISD::POST_DEC
@ POST_DEC
Definition ISDOpcodes.h:1650

llvm::ISD::PRE_DEC
@ PRE_DEC
Definition ISDOpcodes.h:1650

llvm::ISD::POST_INC
@ POST_INC
Definition ISDOpcodes.h:1650

llvm::ISD::PRE_INC
@ PRE_INC
Definition ISDOpcodes.h:1650

llvm::ISD::UNINDEXED
@ UNINDEXED
Definition ISDOpcodes.h:1650

llvm::ISD::isSEXTLoad
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
Definition SelectionDAGNodes.h:3335

llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition ISDOpcodes.h:1701

llvm::ISD::SETOEQ
@ SETOEQ
Definition ISDOpcodes.h:1704

llvm::ISD::SETUNE
@ SETUNE
Definition ISDOpcodes.h:1717

llvm::ISD::SETUEQ
@ SETUEQ
Definition ISDOpcodes.h:1712

llvm::ISD::SETOLE
@ SETOLE
Definition ISDOpcodes.h:1708

llvm::ISD::SETOLT
@ SETOLT
Definition ISDOpcodes.h:1707

llvm::ISD::SETNE
@ SETNE
Definition ISDOpcodes.h:1726

llvm::ISD::SETUGT
@ SETUGT
Definition ISDOpcodes.h:1713

llvm::ISD::SETOGT
@ SETOGT
Definition ISDOpcodes.h:1705

llvm::ISD::SETULT
@ SETULT
Definition ISDOpcodes.h:1715

llvm::ISD::SETUO
@ SETUO
Definition ISDOpcodes.h:1711

llvm::ISD::SETONE
@ SETONE
Definition ISDOpcodes.h:1709

llvm::ISD::SETGT
@ SETGT
Definition ISDOpcodes.h:1722

llvm::ISD::SETLT
@ SETLT
Definition ISDOpcodes.h:1724

llvm::ISD::SETO
@ SETO
Definition ISDOpcodes.h:1710

llvm::ISD::SETGE
@ SETGE
Definition ISDOpcodes.h:1723

llvm::ISD::SETUGE
@ SETUGE
Definition ISDOpcodes.h:1714

llvm::ISD::SETLE
@ SETLE
Definition ISDOpcodes.h:1725

llvm::ISD::SETULE
@ SETULE
Definition ISDOpcodes.h:1716

llvm::ISD::SETOGE
@ SETOGE
Definition ISDOpcodes.h:1706

llvm::ISD::SETEQ
@ SETEQ
Definition ISDOpcodes.h:1721

llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition ISDOpcodes.h:1681

llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition ISDOpcodes.h:1681

llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition ISDOpcodes.h:1681

llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition ISDOpcodes.h:1681

llvm::ISD::EXTLOAD
@ EXTLOAD
Definition ISDOpcodes.h:1681

llvm::ISD::LAST_INDEXED_MODE
static const int LAST_INDEXED_MODE
Definition ISDOpcodes.h:1652

llvm::ISD::isNormalLoad
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Definition SelectionDAGNodes.h:3316

llvm::Intrinsic
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
Definition GenericSSAContext.h:27

llvm::Intrinsic::getOrInsertDeclaration
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition Intrinsics.cpp:730

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::LegalizeActions::LegalizeAction
LegalizeAction
Definition LegalizerInfo.h:45

llvm::M68k::MemAddrModeKind::j
@ j
Definition M68kBaseInfo.h:52

llvm::M68k::MemAddrModeKind::U
@ U
Definition M68kBaseInfo.h:61

llvm::M68k::MemAddrModeKind::V
@ V
Definition M68kBaseInfo.h:63

llvm::MCID
Definition MCInstrDesc.h:155

llvm::MCOI::TIED_TO
@ TIED_TO
Definition MCInstrDesc.h:37

llvm::MipsISD::Ret
@ Ret
Definition MipsISelLowering.h:117

llvm::MipsISD::ThreadPointer
@ ThreadPointer
Definition MipsISelLowering.h:89

llvm::MipsISD::Ins
@ Ins
Definition MipsISelLowering.h:158

llvm::NVPTX::Const
@ Const
Definition NVPTX.h:184

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:462

llvm::RTLIB::getSINTTOFP
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:305

llvm::RTLIB::getUINTTOFP
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:353

llvm::RTLIB::getFPTOUINT
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:256

llvm::RTLIB::getFPTOSINT
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:207

llvm::RTLIB::getFPEXT
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:125

llvm::RTLIB::getFPROUND
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Definition TargetLoweringBase.cpp:160

llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition MachineInstrBuilder.h:49

llvm::RegState::Dead
@ Dead
Unused definition.
Definition MachineInstrBuilder.h:53

llvm::RegState::Define
@ Define
Register definition.
Definition MachineInstrBuilder.h:47

llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition MachineInstrBuilder.h:66

llvm::RegState::Kill
@ Kill
The last use of a register.
Definition MachineInstrBuilder.h:51

llvm::SI
Definition SIInstrInfo.h:1803

llvm::SPII::Store
@ Store
Definition SparcInstrInfo.h:33

llvm::SPII::Load
@ Load
Definition SparcInstrInfo.h:32

llvm::Sched::Preference
Preference
Definition TargetLowering.h:103

llvm::Sched::RegPressure
@ RegPressure
Definition TargetLowering.h:106

llvm::Sched::Hybrid
@ Hybrid
Definition TargetLowering.h:107

llvm::Sched::Source
@ Source
Definition TargetLowering.h:105

llvm::Sched::ILP
@ ILP
Definition TargetLowering.h:108

llvm::SyncScope::SingleThread
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55

llvm::SyncScope::ID
uint8_t ID
Definition LLVMContext.h:47

llvm::TLSModel::Model
Model
Definition CodeGen.h:45

llvm::TLSModel::LocalDynamic
@ LocalDynamic
Definition CodeGen.h:47

llvm::TLSModel::InitialExec
@ InitialExec
Definition CodeGen.h:48

llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition CodeGen.h:46

llvm::TLSModel::LocalExec
@ LocalExec
Definition CodeGen.h:49

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
Definition X86BaseInfo.h:109

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:445

llvm::codeview::EncodedFramePtrReg::StackPtr
@ StackPtr
Definition CodeView.h:525

llvm::codeview::FrameCookieKind::Copy
@ Copy
Definition CodeView.h:494

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:408

llvm::dwarf_linker::DebugSectionKind::DebugLoc
@ DebugLoc
Definition DWARFLinkerBase.h:34

llvm::dwarf::Index
Index
Definition Dwarf.h:903

llvm::lltok::APFloat
@ APFloat
Definition LLToken.h:519

llvm::logicalview::LVAttributeKind::Argument
@ Argument
Definition LVOptions.h:95

llvm::ms_demangle::QualifierMangleMode::Result
@ Result
Definition MicrosoftDemangle.h:132

llvm::pdb::PDB_SymType::Callee
@ Callee
Definition PDBTypes.h:282

llvm::pdb::PDB_LocType::TLS
@ TLS
Definition PDBTypes.h:295

llvm::pdb::PDB_LocType::Slot
@ Slot
Definition PDBTypes.h:300

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm::sframe::BaseReg::SP
@ SP
Definition SFrame.h:79

llvm::sframe::Flags
Flags
Definition SFrame.h:39

llvm::tgtok::TrueVal
@ TrueVal
Definition TGLexer.h:57

llvm::tgtok::FalseVal
@ FalseVal
Definition TGLexer.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316

llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition SparseBitVector.h:874

llvm::RetFastCC_ARM_APCS
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::ThreadPriority::Low
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280

llvm::Offset
@ Offset
Definition DWP.cpp:532

llvm::Length
@ Length
Definition DWP.cpp:532

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::stable_sort
void stable_sort(R &&Range)
Definition STLExtras.h:2058

llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725

llvm::HasLowerConstantMaterializationCost
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
Definition ARMBaseInstrInfo.cpp:5512

llvm::Read
@ Read
Definition CodeGenData.h:108

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:369

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::isNullConstant
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition SelectionDAG.cpp:12728

llvm::Depth
@ Depth
Definition SIMachineScheduler.h:36

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition AtomicOrdering.h:125

llvm::countr_one
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293

llvm::Int32Ty
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296

llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition CallingConvLower.h:157

llvm::CC_ARM_AAPCS
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::isMask_32
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:255

llvm::ExceptionHandling
ExceptionHandling
Definition CodeGen.h:53

llvm::ExceptionHandling::SjLj
@ SjLj
setjmp/longjmp based exceptions
Definition CodeGen.h:56

llvm::RetCC_ARM_AAPCS_VFP
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::RetCC_ARM_APCS
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::bit_width
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303

llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136

llvm::isUIntN
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243

llvm::RetCC_ARM_AAPCS
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::concatenateVectors
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition VectorUtils.cpp:1228

llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284

llvm::shuffle
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition STLExtras.h:1516

llvm::CC_ARM_APCS_GHC
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::predOps
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Definition ARMBaseInstrInfo.h:542

llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition AddressRanges.h:151

llvm::isShiftedMask_32
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267

llvm::isConstOrConstSplatFP
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
Definition SelectionDAG.cpp:12917

llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202

llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition AtomicOrdering.h:133

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753

llvm::has_single_bit
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331

llvm::countl_zero
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236

llvm::isBitwiseNot
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
Definition SelectionDAG.cpp:12861

llvm::HexPrintStyle::Upper
@ Upper
Definition NativeFormatting.h:23

llvm::HexPrintStyle::Lower
@ Lower
Definition NativeFormatting.h:23

llvm::getImm
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition SPIRVUtils.cpp:998

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::FastCC_ARM_APCS
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::get
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
Definition PointerIntPair.h:268

llvm::ComplexDeinterleavingOperation
ComplexDeinterleavingOperation
Definition ComplexDeinterleavingPass.h:35

llvm::ComplexDeinterleavingOperation::CAdd
@ CAdd
Definition ComplexDeinterleavingPass.h:36

llvm::ComplexDeinterleavingOperation::CMulPartial
@ CMulPartial
Definition ComplexDeinterleavingPass.h:37

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::CC_ARM_Win32_CFGuard_Check
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167

llvm::Count
FunctionAddr VTableAddr Count
Definition InstrProf.h:139

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189

llvm::to_vector
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition SmallVector.h:1305

llvm::CodeGenOptLevel::None
@ None
-O0
Definition CodeGen.h:83

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1129

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::errs
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition raw_ostream.cpp:897

llvm::VMOVModImmType
VMOVModImmType
Definition ARMISelLowering.h:724

llvm::OtherModImm
@ OtherModImm
Definition ARMISelLowering.h:728

llvm::VMVNModImm
@ VMVNModImm
Definition ARMISelLowering.h:726

llvm::MVEVMVNModImm
@ MVEVMVNModImm
Definition ARMISelLowering.h:727

llvm::VMOVModImm
@ VMOVModImm
Definition ARMISelLowering.h:725

llvm::PackElem::Hi
@ Hi
Definition VECustomDAG.h:132

llvm::PackElem::Lo
@ Lo
Definition VECustomDAG.h:131

llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition AtomicOrdering.h:56

llvm::AtomicOrdering::Monotonic
@ Monotonic
Definition AtomicOrdering.h:59

llvm::AtomicOrdering::Unordered
@ Unordered
Definition AtomicOrdering.h:58

llvm::AtomicOrdering::NotAtomic
@ NotAtomic
Definition AtomicOrdering.h:57

llvm::AtomicOrdering::AcquireRelease
@ AcquireRelease
Definition AtomicOrdering.h:63

llvm::AtomicOrdering::Acquire
@ Acquire
Definition AtomicOrdering.h:61

llvm::AtomicOrdering::Release
@ Release
Definition AtomicOrdering.h:62

llvm::AtomicOrdering::SequentiallyConsistent
@ SequentiallyConsistent
Definition AtomicOrdering.h:64

llvm::ComplexDeinterleavingRotation
ComplexDeinterleavingRotation
Definition ComplexDeinterleavingPass.h:50

llvm::ComplexDeinterleavingRotation::Rotation_270
@ Rotation_270
Definition ComplexDeinterleavingPass.h:54

llvm::ComplexDeinterleavingRotation::Rotation_90
@ Rotation_90
Definition ComplexDeinterleavingPass.h:52

llvm::Data
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189

llvm::CombineLevel
CombineLevel
Definition DAGCombine.h:15

llvm::BeforeLegalizeTypes
@ BeforeLegalizeTypes
Definition DAGCombine.h:16

llvm::LEB128Sign::Signed
@ Signed
Definition LEB128.h:232

llvm::LEB128Sign::Unsigned
@ Unsigned
Definition LEB128.h:232

llvm::ConstantMaterializationCost
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
Definition ARMBaseInstrInfo.cpp:5479

llvm::RecurKind::Mul
@ Mul
Product of integers.
Definition IVDescriptors.h:40

llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
Definition IVDescriptors.h:42

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:38

llvm::RecurKind::Add
@ Add
Sum of integers.
Definition IVDescriptors.h:37

llvm::RecurKind::FAdd
@ FAdd
Sum of floats.
Definition IVDescriptors.h:48

llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::isConstOrConstSplat
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
Definition SelectionDAG.cpp:12871

llvm::AbsoluteValue
constexpr U AbsoluteValue(T X)
Return the absolute value of a signed integer, converted to the corresponding unsigned integer type.
Definition MathExtras.h:592

llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition AtomicOrdering.h:129

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:219

llvm::t1CondCodeOp
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
Definition ARMBaseInstrInfo.h:557

llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758

llvm::isOneConstant
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Definition SelectionDAG.cpp:12747

llvm::isIntN
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248

llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201

llvm::condCodeOp
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
Definition ARMBaseInstrInfo.h:550

llvm::isVREVMask
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
Definition ARMTargetTransformInfo.h:361

llvm::seq
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305

llvm::gettBLXrOpcode
unsigned gettBLXrOpcode(const MachineFunction &MF)
Definition ARMBaseInstrInfo.cpp:6531

llvm::CC_ARM_APCS
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::AllocTokenMode::Increment
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26

llvm::CC_ARM_AAPCS_VFP
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

llvm::createSequentialMask
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition VectorUtils.cpp:1173

llvm::isShiftedUInt
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198

llvm::fltNanEncoding::AllOnes
@ AllOnes
Definition APFloat.cpp:92

llvm::convertAddSubFlagsOpcode
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
Definition ARMBaseInstrInfo.cpp:2307

llvm::isAllOnesConstant
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition SelectionDAG.cpp:12742

llvm::PerfectShuffleTable
static const unsigned PerfectShuffleTable[6561+1]
Definition AArch64PerfectShuffle.h:28

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869

raw_ostream.h

N
#define N

BaseUpdateTarget
Load/store instruction that can be merged with a base address update.
Definition ARMISelLowering.cpp:15627

BaseUpdateTarget::isStore
bool isStore
Definition ARMISelLowering.cpp:15630

BaseUpdateTarget::isIntrinsic
bool isIntrinsic
Definition ARMISelLowering.cpp:15629

BaseUpdateTarget::AddrOpIdx
unsigned AddrOpIdx
Definition ARMISelLowering.cpp:15631

BaseUpdateTarget::N
SDNode * N
Definition ARMISelLowering.cpp:15628

BaseUpdateUser
Definition ARMISelLowering.cpp:15634

BaseUpdateUser::N
SDNode * N
Instruction that updates a pointer.
Definition ARMISelLowering.cpp:15636

BaseUpdateUser::ConstInc
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
Definition ARMISelLowering.cpp:15640

BaseUpdateUser::Inc
SDValue Inc
Pointer increment operand.
Definition ARMISelLowering.cpp:15638

llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::Align::value
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77

llvm::DenormalMode::getIEEE
static constexpr DenormalMode getIEEE()
Definition FloatingPointMode.h:115

llvm::EVT
Extended Value Type.
Definition ValueTypes.h:35

llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94

llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137

llvm::EVT::getVectorVT
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74

llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284

llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300

llvm::EVT::isFloatingPoint
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147

llvm::EVT::getVectorElementCount
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350

llvm::EVT::getDoubleNumVectorElementsVT
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463

llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373

llvm::EVT::getVectorMinNumElements
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359

llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385

llvm::EVT::isPow2VectorType
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470

llvm::EVT::getEVT
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition ValueTypes.cpp:301

llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316

llvm::EVT::is128BitVector
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207

llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65

llvm::EVT::getFixedSizeInBits
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381

llvm::EVT::isFixedLengthVector
bool isFixedLengthVector() const
Definition ValueTypes.h:181

llvm::EVT::getFloatingPointVT
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59

llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168

llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323

llvm::EVT::getTypeForEVT
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition ValueTypes.cpp:218

llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328

llvm::EVT::isScalarInteger
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157

llvm::EVT::changeVectorElementType
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102

llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336

llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308

llvm::EVT::getHalfNumVectorElementsVT
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453

llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152

llvm::EVT::is64BitVector
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:202

llvm::ISD::ArgFlagsTy
Definition TargetCallingConv.h:27

llvm::ISD::ArgFlagsTy::isSExt
bool isSExt() const
Definition TargetCallingConv.h:79

llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition TargetCallingConv.h:204

llvm::ISD::InputArg::ArgVT
EVT ArgVT
Usually the non-legalized type of the argument, which is the EVT corresponding to the OrigTy IR type.
Definition TargetCallingConv.h:211

llvm::ISD::InputArg::Flags
ArgFlagsTy Flags
Definition TargetCallingConv.h:205

llvm::KnownBits
Definition KnownBits.h:24

llvm::KnownBits::makeConstant
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:301

llvm::KnownBits::isUnknown
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66

llvm::KnownBits::getBitWidth
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44

llvm::KnownBits::zext
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172

llvm::KnownBits::resetAll
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74

llvm::KnownBits::intersectWith
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311

llvm::KnownBits::sext
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180

llvm::KnownBits::add
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347

llvm::KnownBits::One
APInt One
Definition KnownBits.h:26

llvm::KnownBits::mul
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition KnownBits.cpp:800

llvm::KnownBits::Zero
APInt Zero
Definition KnownBits.h:25

llvm::KnownBits::getSignedMinValue
APInt getSignedMinValue() const
Return the minimal signed value possible given these KnownBits.
Definition KnownBits.h:135

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:314

llvm::MachineFunction::CallSiteInfo::ArgRegPairs
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
Definition MachineFunction.h:517

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition MachineMemOperand.h:42

llvm::MachinePointerInfo::getJumpTable
static LLVM_ABI MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
Definition MachineOperand.cpp:1084

llvm::MachinePointerInfo::getStack
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
Definition MachineOperand.cpp:1092

llvm::MachinePointerInfo::getConstantPool
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
Definition MachineOperand.cpp:1073

llvm::MachinePointerInfo::getWithOffset
MachinePointerInfo getWithOffset(int64_t O) const
Definition MachineMemOperand.h:82

llvm::MachinePointerInfo::getGOT
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
Definition MachineOperand.cpp:1088

llvm::MachinePointerInfo::getFixedStack
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition MachineOperand.cpp:1079

llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106

llvm::MemOp
Definition TargetLowering.h:118

llvm::SDNodeFlags
These are IR-level optimization flags that may be propagated to SDNodes.
Definition SelectionDAGNodes.h:384

llvm::SDNodeFlags::hasNoSignedZeros
bool hasNoSignedZeros() const
Definition SelectionDAGNodes.h:481

llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition SelectionDAGNodes.h:80

llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Definition TargetLowering.h:2918

llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition TargetLowering.h:2920

llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition TargetLowering.h:2919

llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition TargetLowering.h:2921

llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition TargetLowering.h:2922

llvm::TargetLoweringBase::IntrinsicInfo
Definition TargetLowering.h:1219

llvm::TargetLowering::AsmOperandInfo
This contains information for each constraint that we are lowering.
Definition TargetLowering.h:5182

llvm::TargetLowering::CallLoweringInfo
This structure contains all information that is necessary for lowering calls.
Definition TargetLowering.h:4731

llvm::TargetLowering::CallLoweringInfo::IsTailCall
bool IsTailCall
Definition TargetLowering.h:4750

llvm::TargetLowering::CallLoweringInfo::setInRegister
CallLoweringInfo & setInRegister(bool Value=true)
Definition TargetLowering.h:4851

llvm::TargetLowering::CallLoweringInfo::Callee
SDValue Callee
Definition TargetLowering.h:4757

llvm::TargetLowering::CallLoweringInfo::setLibCallee
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
Definition TargetLowering.h:4788

llvm::TargetLowering::CallLoweringInfo::DL
SDLoc DL
Definition TargetLowering.h:4760

llvm::TargetLowering::CallLoweringInfo::IsVarArg
bool IsVarArg
Definition TargetLowering.h:4739

llvm::TargetLowering::CallLoweringInfo::Ins
SmallVector< ISD::InputArg, 32 > Ins
Definition TargetLowering.h:4764

llvm::TargetLowering::CallLoweringInfo::CFIType
const ConstantInt * CFIType
Definition TargetLowering.h:4766

llvm::TargetLowering::CallLoweringInfo::setZExtResult
CallLoweringInfo & setZExtResult(bool Value=true)
Definition TargetLowering.h:4886

llvm::TargetLowering::CallLoweringInfo::setDebugLoc
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
Definition TargetLowering.h:4777

llvm::TargetLowering::CallLoweringInfo::Chain
SDValue Chain
Definition TargetLowering.h:4732

llvm::TargetLowering::CallLoweringInfo::NoMerge
bool NoMerge
Definition TargetLowering.h:4746

llvm::TargetLowering::CallLoweringInfo::setSExtResult
CallLoweringInfo & setSExtResult(bool Value=true)
Definition TargetLowering.h:4881

llvm::TargetLowering::CallLoweringInfo::CB
const CallBase * CB
Definition TargetLowering.h:4761

llvm::TargetLowering::CallLoweringInfo::Outs
SmallVector< ISD::OutputArg, 32 > Outs
Definition TargetLowering.h:4762

llvm::TargetLowering::CallLoweringInfo::OutVals
SmallVector< SDValue, 32 > OutVals
Definition TargetLowering.h:4763

llvm::TargetLowering::CallLoweringInfo::CallConv
CallingConv::ID CallConv
Definition TargetLowering.h:4756

llvm::TargetLowering::CallLoweringInfo::DAG
SelectionDAG & DAG
Definition TargetLowering.h:4759

llvm::TargetLowering::CallLoweringInfo::setChain
CallLoweringInfo & setChain(SDValue InChain)
Definition TargetLowering.h:4782

llvm::TargetLowering::CallLoweringInfo::setCallee
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
Definition TargetLowering.h:4809

llvm::TargetLowering::CallLoweringInfo::DoesNotReturn
bool DoesNotReturn
Definition TargetLowering.h:4741

llvm::TargetLowering::DAGCombinerInfo
Definition TargetLowering.h:4435

llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG
bool isAfterLegalizeDAG() const
Definition TargetLowering.h:4448

llvm::TargetLowering::DAGCombinerInfo::AddToWorklist
LLVM_ABI void AddToWorklist(SDNode *N)
Definition DAGCombiner.cpp:935

llvm::TargetLowering::DAGCombinerInfo::isCalledByLegalizer
bool isCalledByLegalizer() const
Definition TargetLowering.h:4450

llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalize
bool isBeforeLegalize() const
Definition TargetLowering.h:4446

llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition TargetLowering.h:4441

llvm::TargetLowering::DAGCombinerInfo::CombineTo
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
Definition DAGCombiner.cpp:940

llvm::TargetLowering::MakeLibCallOptions
This structure is used to pass arguments to makeLibCall function.
Definition TargetLowering.h:4927

llvm::TargetLowering::TargetLoweringOpt
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Definition TargetLowering.h:4141

llvm::TargetLowering::TargetLoweringOpt::DAG
SelectionDAG & DAG
Definition TargetLowering.h:4142

llvm::TargetLowering::TargetLoweringOpt::CombineTo
bool CombineTo(SDValue O, SDValue N)
Definition TargetLowering.h:4155

llvm::TargetLowering::TargetLoweringOpt::LegalOps
bool LegalOps
Definition TargetLowering.h:4144

llvm::cl::desc
Definition CommandLine.h:411