doxygen/AArch64InstrInfo_8cpp_source.html

//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file contains the AArch64 implementation of the TargetInstrInfo class.

//

//===----------------------------------------------------------------------===//


#include "AArch64InstrInfo.h"

#include "AArch64ExpandImm.h"

#include "AArch64MachineFunctionInfo.h"

#include "AArch64PointerAuth.h"

#include "AArch64Subtarget.h"

#include "MCTargetDesc/AArch64AddressingModes.h"

#include "MCTargetDesc/AArch64MCTargetDesc.h"

#include "Utils/AArch64BaseInfo.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/CodeGen/CFIInstBuilder.h"

#include "llvm/CodeGen/LivePhysRegs.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineCombinerPattern.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include "llvm/CodeGen/MachineMemOperand.h"

#include "llvm/CodeGen/MachineModuleInfo.h"

#include "llvm/CodeGen/MachineOperand.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/RegisterScavenging.h"

#include "llvm/CodeGen/StackMaps.h"

#include "llvm/CodeGen/TargetRegisterInfo.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/IR/DebugInfoMetadata.h"

#include "llvm/IR/DebugLoc.h"

#include "llvm/IR/GlobalValue.h"

#include "llvm/IR/Module.h"

#include "llvm/MC/MCAsmInfo.h"

#include "llvm/MC/MCInst.h"

#include "llvm/MC/MCInstBuilder.h"

#include "llvm/MC/MCInstrDesc.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CodeGen.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/LEB128.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/Target/TargetOptions.h"

#include <cassert>

#include <cstdint>

#include <iterator>

#include <utility>


using namespace llvm;


#define GET_INSTRINFO_CTOR_DTOR

#include "AArch64GenInstrInfo.inc"


static cl::opt<unsigned>

    CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),

                       cl::desc("Restrict range of CB instructions (DEBUG)"));


static cl::opt<unsigned> TBZDisplacementBits(

    "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),

    cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));


static cl::opt<unsigned> CBZDisplacementBits(

    "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),

    cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));


static cl::opt<unsigned>

    BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),

                        cl::desc("Restrict range of Bcc instructions (DEBUG)"));


static cl::opt<unsigned>

    BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),

                      cl::desc("Restrict range of B instructions (DEBUG)"));


static cl::opt<unsigned> GatherOptSearchLimit(

    "aarch64-search-limit", cl::Hidden, cl::init(2048),

    cl::desc("Restrict range of instructions to search for the "

             "machine-combiner gather pattern optimization"));


AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)

    : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN,

                          AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),

      RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}


/// GetInstSize - Return the number of bytes of code the specified

/// instruction may be.  This returns the maximum number of bytes.


unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {

  const MachineBasicBlock &MBB = *MI.getParent();

  const MachineFunction *MF = MBB.getParent();

  const Function &F = MF->getFunction();

  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();


  {

    auto Op = MI.getOpcode();

    if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)

      return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);

  }


  // Meta-instructions emit no code.

  if (MI.isMetaInstruction())

    return 0;


  // FIXME: We currently only handle pseudoinstructions that don't get expanded

  //        before the assembly printer.

  unsigned NumBytes = 0;

  const MCInstrDesc &Desc = MI.getDesc();


  if (!MI.isBundle() && isTailCallReturnInst(MI)) {

    NumBytes = Desc.getSize() ? Desc.getSize() : 4;


    const auto *MFI = MF->getInfo<AArch64FunctionInfo>();

    if (!MFI->shouldSignReturnAddress(MF))

      return NumBytes;


    const auto &STI = MF->getSubtarget<AArch64Subtarget>();

    auto Method = STI.getAuthenticatedLRCheckMethod(*MF);

    NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);

    return NumBytes;

  }


  // Size should be preferably set in

  // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).

  // Specific cases handle instructions of variable sizes

  switch (Desc.getOpcode()) {

  default:

    if (Desc.getSize())

      return Desc.getSize();


    // Anything not explicitly designated otherwise (i.e. pseudo-instructions

    // with fixed constant size but not specified in .td file) is a normal

    // 4-byte insn.

    NumBytes = 4;

    break;

  case TargetOpcode::STACKMAP:

    // The upper bound for a stackmap intrinsic is the full length of its shadow

    NumBytes = StackMapOpers(&MI).getNumPatchBytes();

    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");

    break;

  case TargetOpcode::PATCHPOINT:

    // The size of the patchpoint intrinsic is the number of bytes requested

    NumBytes = PatchPointOpers(&MI).getNumPatchBytes();

    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");

    break;

  case TargetOpcode::STATEPOINT:

    NumBytes = StatepointOpers(&MI).getNumPatchBytes();

    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");

    // No patch bytes means a normal call inst is emitted

    if (NumBytes == 0)

      NumBytes = 4;

    break;

  case TargetOpcode::PATCHABLE_FUNCTION_ENTER:

    // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER

    // instructions are expanded to the specified number of NOPs. Otherwise,

    // they are expanded to 36-byte XRay sleds.

    NumBytes =

        F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;

    break;

  case TargetOpcode::PATCHABLE_FUNCTION_EXIT:

  case TargetOpcode::PATCHABLE_TAIL_CALL:

  case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:

    // An XRay sled can be 4 bytes of alignment plus a 32-byte block.

    NumBytes = 36;

    break;

  case TargetOpcode::PATCHABLE_EVENT_CALL:

    // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).

    NumBytes = 24;

    break;


  case AArch64::SPACE:

    NumBytes = MI.getOperand(1).getImm();

    break;

  case TargetOpcode::BUNDLE:

    NumBytes = getInstBundleLength(MI);

    break;

  }


  return NumBytes;

}


unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {

  unsigned Size = 0;

  MachineBasicBlock::const_instr_iterator I = MI.getIterator();

  MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();

  while (++I != E && I->isInsideBundle()) {

    assert(!I->isBundle() && "No nested bundle!");

    Size += getInstSizeInBytes(*I);

  }

  return Size;

}


static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,

                            SmallVectorImpl<MachineOperand> &Cond) {

  // Block ends with fall-through condbranch.

  switch (LastInst->getOpcode()) {

  default:

    llvm_unreachable("Unknown branch instruction?");

  case AArch64::Bcc:

    Target = LastInst->getOperand(1).getMBB();

    Cond.push_back(LastInst->getOperand(0));

    break;

  case AArch64::CBZW:

  case AArch64::CBZX:

  case AArch64::CBNZW:

  case AArch64::CBNZX:

    Target = LastInst->getOperand(1).getMBB();

    Cond.push_back(MachineOperand::CreateImm(-1));

    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));

    Cond.push_back(LastInst->getOperand(0));

    break;

  case AArch64::TBZW:

  case AArch64::TBZX:

  case AArch64::TBNZW:

  case AArch64::TBNZX:

    Target = LastInst->getOperand(2).getMBB();

    Cond.push_back(MachineOperand::CreateImm(-1));

    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));

    Cond.push_back(LastInst->getOperand(0));

    Cond.push_back(LastInst->getOperand(1));

    break;

  case AArch64::CBWPri:

  case AArch64::CBXPri:

  case AArch64::CBWPrr:

  case AArch64::CBXPrr:

    Target = LastInst->getOperand(3).getMBB();

    Cond.push_back(MachineOperand::CreateImm(-1));

    Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));

    Cond.push_back(LastInst->getOperand(0));

    Cond.push_back(LastInst->getOperand(1));

    Cond.push_back(LastInst->getOperand(2));

    break;

  }

}


static unsigned getBranchDisplacementBits(unsigned Opc) {

  switch (Opc) {

  default:

    llvm_unreachable("unexpected opcode!");

  case AArch64::B:

    return BDisplacementBits;

  case AArch64::TBNZW:

  case AArch64::TBZW:

  case AArch64::TBNZX:

  case AArch64::TBZX:

    return TBZDisplacementBits;

  case AArch64::CBNZW:

  case AArch64::CBZW:

  case AArch64::CBNZX:

  case AArch64::CBZX:

    return CBZDisplacementBits;

  case AArch64::Bcc:

    return BCCDisplacementBits;

  case AArch64::CBWPri:

  case AArch64::CBXPri:

  case AArch64::CBWPrr:

  case AArch64::CBXPrr:

    return CBDisplacementBits;

  }

}


bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,

                                             int64_t BrOffset) const {

  unsigned Bits = getBranchDisplacementBits(BranchOp);

  assert(Bits >= 3 && "max branch displacement must be enough to jump"

                      "over conditional branch expansion");

  return isIntN(Bits, BrOffset / 4);

}


MachineBasicBlock *


AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {

  switch (MI.getOpcode()) {

  default:

    llvm_unreachable("unexpected opcode!");

  case AArch64::B:

    return MI.getOperand(0).getMBB();

  case AArch64::TBZW:

  case AArch64::TBNZW:

  case AArch64::TBZX:

  case AArch64::TBNZX:

    return MI.getOperand(2).getMBB();

  case AArch64::CBZW:

  case AArch64::CBNZW:

  case AArch64::CBZX:

  case AArch64::CBNZX:

  case AArch64::Bcc:

    return MI.getOperand(1).getMBB();

  case AArch64::CBWPri:

  case AArch64::CBXPri:

  case AArch64::CBWPrr:

  case AArch64::CBXPrr:

    return MI.getOperand(3).getMBB();

  }

}


void AArch64InstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,

                                            MachineBasicBlock &NewDestBB,

                                            MachineBasicBlock &RestoreBB,

                                            const DebugLoc &DL,

                                            int64_t BrOffset,

                                            RegScavenger *RS) const {

  assert(RS && "RegScavenger required for long branching");

  assert(MBB.empty() &&

         "new block should be inserted for expanding unconditional branch");

  assert(MBB.pred_size() == 1);

  assert(RestoreBB.empty() &&

         "restore block should be inserted for restoring clobbered registers");


  auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {

    // Offsets outside of the signed 33-bit range are not supported for ADRP +

    // ADD.

    if (!isInt<33>(BrOffset))

      report_fatal_error(

          "Branch offsets outside of the signed 33-bit range not supported");


    BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)

        .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);

    BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)

        .addReg(Reg)

        .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)

        .addImm(0);

    BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);

  };


  RS->enterBasicBlockEnd(MBB);

  // If X16 is unused, we can rely on the linker to insert a range extension

  // thunk if NewDestBB is out of range of a single B instruction.

  constexpr Register Reg = AArch64::X16;

  if (!RS->isRegUsed(Reg)) {

    insertUnconditionalBranch(MBB, &NewDestBB, DL);

    RS->setRegUsed(Reg);

    return;

  }


  // If there's a free register and it's worth inflating the code size,

  // manually insert the indirect branch.

  Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);

  if (Scavenged != AArch64::NoRegister &&

      MBB.getSectionID() == MBBSectionID::ColdSectionID) {

    buildIndirectBranch(Scavenged, NewDestBB);

    RS->setRegUsed(Scavenged);

    return;

  }


  // Note: Spilling X16 briefly moves the stack pointer, making it incompatible

  // with red zones.

  AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();

  if (!AFI || AFI->hasRedZone().value_or(true))

    report_fatal_error(

        "Unable to insert indirect branch inside function that has red zone");


  // Otherwise, spill X16 and defer range extension to the linker.

  BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))

      .addReg(AArch64::SP, RegState::Define)

      .addReg(Reg)

      .addReg(AArch64::SP)

      .addImm(-16);


  BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);


  BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))

      .addReg(AArch64::SP, RegState::Define)

      .addReg(Reg, RegState::Define)

      .addReg(AArch64::SP)

      .addImm(16);

}


// Branch analysis.


bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,

                                     MachineBasicBlock *&TBB,

                                     MachineBasicBlock *&FBB,

                                     SmallVectorImpl<MachineOperand> &Cond,

                                     bool AllowModify) const {

  // If the block has no terminators, it just falls into the block after it.

  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();

  if (I == MBB.end())

    return false;


  // Skip over SpeculationBarrierEndBB terminators

  if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||

      I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {

    --I;

  }


  if (!isUnpredicatedTerminator(*I))

    return false;


  // Get the last instruction in the block.

  MachineInstr *LastInst = &*I;


  // If there is only one terminator instruction, process it.

  unsigned LastOpc = LastInst->getOpcode();

  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {

    if (isUncondBranchOpcode(LastOpc)) {

      TBB = LastInst->getOperand(0).getMBB();

      return false;

    }

    if (isCondBranchOpcode(LastOpc)) {

      // Block ends with fall-through condbranch.

      parseCondBranch(LastInst, TBB, Cond);

      return false;

    }

    return true; // Can't handle indirect branch.

  }


  // Get the instruction before it if it is a terminator.

  MachineInstr *SecondLastInst = &*I;

  unsigned SecondLastOpc = SecondLastInst->getOpcode();


  // If AllowModify is true and the block ends with two or more unconditional

  // branches, delete all but the first unconditional branch.

  if (AllowModify && isUncondBranchOpcode(LastOpc)) {

    while (isUncondBranchOpcode(SecondLastOpc)) {

      LastInst->eraseFromParent();

      LastInst = SecondLastInst;

      LastOpc = LastInst->getOpcode();

      if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {

        // Return now the only terminator is an unconditional branch.

        TBB = LastInst->getOperand(0).getMBB();

        return false;

      }

      SecondLastInst = &*I;

      SecondLastOpc = SecondLastInst->getOpcode();

    }

  }


  // If we're allowed to modify and the block ends in a unconditional branch

  // which could simply fallthrough, remove the branch.  (Note: This case only

  // matters when we can't understand the whole sequence, otherwise it's also

  // handled by BranchFolding.cpp.)

  if (AllowModify && isUncondBranchOpcode(LastOpc) &&

      MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {

    LastInst->eraseFromParent();

    LastInst = SecondLastInst;

    LastOpc = LastInst->getOpcode();

    if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {

      assert(!isUncondBranchOpcode(LastOpc) &&

             "unreachable unconditional branches removed above");


      if (isCondBranchOpcode(LastOpc)) {

        // Block ends with fall-through condbranch.

        parseCondBranch(LastInst, TBB, Cond);

        return false;

      }

      return true; // Can't handle indirect branch.

    }

    SecondLastInst = &*I;

    SecondLastOpc = SecondLastInst->getOpcode();

  }


  // If there are three terminators, we don't know what sort of block this is.

  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))

    return true;


  // If the block ends with a B and a Bcc, handle it.

  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {

    parseCondBranch(SecondLastInst, TBB, Cond);

    FBB = LastInst->getOperand(0).getMBB();

    return false;

  }


  // If the block ends with two unconditional branches, handle it.  The second

  // one is not executed, so remove it.

  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {

    TBB = SecondLastInst->getOperand(0).getMBB();

    I = LastInst;

    if (AllowModify)

      I->eraseFromParent();

    return false;

  }


  // ...likewise if it ends with an indirect branch followed by an unconditional

  // branch.

  if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {

    I = LastInst;

    if (AllowModify)

      I->eraseFromParent();

    return true;

  }


  // Otherwise, can't handle this.

  return true;

}


bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,

                                              MachineBranchPredicate &MBP,

                                              bool AllowModify) const {

  // For the moment, handle only a block which ends with a cb(n)zx followed by

  // a fallthrough.  Why this?  Because it is a common form.

  // TODO: Should we handle b.cc?


  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();

  if (I == MBB.end())

    return true;


  // Skip over SpeculationBarrierEndBB terminators

  if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||

      I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {

    --I;

  }


  if (!isUnpredicatedTerminator(*I))

    return true;


  // Get the last instruction in the block.

  MachineInstr *LastInst = &*I;

  unsigned LastOpc = LastInst->getOpcode();

  if (!isCondBranchOpcode(LastOpc))

    return true;


  switch (LastOpc) {

  default:

    return true;

  case AArch64::CBZW:

  case AArch64::CBZX:

  case AArch64::CBNZW:

  case AArch64::CBNZX:

    break;

  };


  MBP.TrueDest = LastInst->getOperand(1).getMBB();

  assert(MBP.TrueDest && "expected!");

  MBP.FalseDest = MBB.getNextNode();


  MBP.ConditionDef = nullptr;

  MBP.SingleUseCondition = false;


  MBP.LHS = LastInst->getOperand(0);

  MBP.RHS = MachineOperand::CreateImm(0);

  MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)

                      ? MachineBranchPredicate::PRED_NE

                      : MachineBranchPredicate::PRED_EQ;

  return false;

}


bool AArch64InstrInfo::reverseBranchCondition(

    SmallVectorImpl<MachineOperand> &Cond) const {

  if (Cond[0].getImm() != -1) {

    // Regular Bcc

    AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();

    Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));

  } else {

    // Folded compare-and-branch

    switch (Cond[1].getImm()) {

    default:

      llvm_unreachable("Unknown conditional branch!");

    case AArch64::CBZW:

      Cond[1].setImm(AArch64::CBNZW);

      break;

    case AArch64::CBNZW:

      Cond[1].setImm(AArch64::CBZW);

      break;

    case AArch64::CBZX:

      Cond[1].setImm(AArch64::CBNZX);

      break;

    case AArch64::CBNZX:

      Cond[1].setImm(AArch64::CBZX);

      break;

    case AArch64::TBZW:

      Cond[1].setImm(AArch64::TBNZW);

      break;

    case AArch64::TBNZW:

      Cond[1].setImm(AArch64::TBZW);

      break;

    case AArch64::TBZX:

      Cond[1].setImm(AArch64::TBNZX);

      break;

    case AArch64::TBNZX:

      Cond[1].setImm(AArch64::TBZX);

      break;


    // Cond is { -1, Opcode, CC, Op0, Op1 }

    case AArch64::CBWPri:

    case AArch64::CBXPri:

    case AArch64::CBWPrr:

    case AArch64::CBXPrr: {

      // Pseudos using standard 4bit Arm condition codes

      AArch64CC::CondCode CC =

          static_cast<AArch64CC::CondCode>(Cond[2].getImm());

      Cond[2].setImm(AArch64CC::getInvertedCondCode(CC));

    }

    }

  }


  return false;

}


unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,

                                        int *BytesRemoved) const {

  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();

  if (I == MBB.end())

    return 0;


  if (!isUncondBranchOpcode(I->getOpcode()) &&

      !isCondBranchOpcode(I->getOpcode()))

    return 0;


  // Remove the branch.

  I->eraseFromParent();


  I = MBB.end();


  if (I == MBB.begin()) {

    if (BytesRemoved)

      *BytesRemoved = 4;

    return 1;

  }

  --I;

  if (!isCondBranchOpcode(I->getOpcode())) {

    if (BytesRemoved)

      *BytesRemoved = 4;

    return 1;

  }


  // Remove the branch.

  I->eraseFromParent();

  if (BytesRemoved)

    *BytesRemoved = 8;


  return 2;

}


void AArch64InstrInfo::instantiateCondBranch(

    MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,

    ArrayRef<MachineOperand> Cond) const {

  if (Cond[0].getImm() != -1) {

    // Regular Bcc

    BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);

  } else {

    // Folded compare-and-branch

    // Note that we use addOperand instead of addReg to keep the flags.


    // cbz, cbnz

    const MachineInstrBuilder MIB =

        BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);


    // tbz/tbnz

    if (Cond.size() > 3)

      MIB.add(Cond[3]);


    // cb

    if (Cond.size() > 4)

      MIB.add(Cond[4]);


    MIB.addMBB(TBB);

  }

}


unsigned AArch64InstrInfo::insertBranch(

    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,

    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {

  // Shouldn't be a fall through.

  assert(TBB && "insertBranch must not be told to insert a fallthrough");


  if (!FBB) {

    if (Cond.empty()) // Unconditional branch?

      BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);

    else

      instantiateCondBranch(MBB, DL, TBB, Cond);


    if (BytesAdded)

      *BytesAdded = 4;


    return 1;

  }


  // Two-way conditional branch.

  instantiateCondBranch(MBB, DL, TBB, Cond);

  BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);


  if (BytesAdded)

    *BytesAdded = 8;


  return 2;

}


// Find the original register that VReg is copied from.


static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {

  while (Register::isVirtualRegister(VReg)) {

    const MachineInstr *DefMI = MRI.getVRegDef(VReg);

    if (!DefMI->isFullCopy())

      return VReg;

    VReg = DefMI->getOperand(1).getReg();

  }

  return VReg;

}


// Determine if VReg is defined by an instruction that can be folded into a

// csel instruction. If so, return the folded opcode, and the replacement

// register.


static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,

                                unsigned *NewVReg = nullptr) {

  VReg = removeCopies(MRI, VReg);

  if (!Register::isVirtualRegister(VReg))

    return 0;


  bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));

  const MachineInstr *DefMI = MRI.getVRegDef(VReg);

  unsigned Opc = 0;

  unsigned SrcOpNum = 0;

  switch (DefMI->getOpcode()) {

  case AArch64::ADDSXri:

  case AArch64::ADDSWri:

    // if NZCV is used, do not fold.

    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,

                                         true) == -1)

      return 0;

    // fall-through to ADDXri and ADDWri.

    [[fallthrough]];

  case AArch64::ADDXri:

  case AArch64::ADDWri:

    // add x, 1 -> csinc.

    if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||

        DefMI->getOperand(3).getImm() != 0)

      return 0;

    SrcOpNum = 1;

    Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;

    break;


  case AArch64::ORNXrr:

  case AArch64::ORNWrr: {

    // not x -> csinv, represented as orn dst, xzr, src.

    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());

    if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)

      return 0;

    SrcOpNum = 2;

    Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;

    break;

  }


  case AArch64::SUBSXrr:

  case AArch64::SUBSWrr:

    // if NZCV is used, do not fold.

    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,

                                         true) == -1)

      return 0;

    // fall-through to SUBXrr and SUBWrr.

    [[fallthrough]];

  case AArch64::SUBXrr:

  case AArch64::SUBWrr: {

    // neg x -> csneg, represented as sub dst, xzr, src.

    unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());

    if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)

      return 0;

    SrcOpNum = 2;

    Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;

    break;

  }

  default:

    return 0;

  }

  assert(Opc && SrcOpNum && "Missing parameters");


  if (NewVReg)

    *NewVReg = DefMI->getOperand(SrcOpNum).getReg();

  return Opc;

}


bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,

                                       ArrayRef<MachineOperand> Cond,

                                       Register DstReg, Register TrueReg,

                                       Register FalseReg, int &CondCycles,

                                       int &TrueCycles,

                                       int &FalseCycles) const {

  // Check register classes.

  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();

  const TargetRegisterClass *RC =

      RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));

  if (!RC)

    return false;


  // Also need to check the dest regclass, in case we're trying to optimize

  // something like:

  // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2

  if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))

    return false;


  // Expanding cbz/tbz requires an extra cycle of latency on the condition.

  unsigned ExtraCondLat = Cond.size() != 1;


  // GPRs are handled by csel.

  // FIXME: Fold in x+1, -x, and ~x when applicable.

  if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||

      AArch64::GPR32allRegClass.hasSubClassEq(RC)) {

    // Single-cycle csel, csinc, csinv, and csneg.

    CondCycles = 1 + ExtraCondLat;

    TrueCycles = FalseCycles = 1;

    if (canFoldIntoCSel(MRI, TrueReg))

      TrueCycles = 0;

    else if (canFoldIntoCSel(MRI, FalseReg))

      FalseCycles = 0;

    return true;

  }


  // Scalar floating point is handled by fcsel.

  // FIXME: Form fabs, fmin, and fmax when applicable.

  if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||

      AArch64::FPR32RegClass.hasSubClassEq(RC)) {

    CondCycles = 5 + ExtraCondLat;

    TrueCycles = FalseCycles = 2;

    return true;

  }


  // Can't do vectors.

  return false;

}


void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,

                                    MachineBasicBlock::iterator I,

                                    const DebugLoc &DL, Register DstReg,

                                    ArrayRef<MachineOperand> Cond,

                                    Register TrueReg, Register FalseReg) const {

  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();


  // Parse the condition code, see parseCondBranch() above.

  AArch64CC::CondCode CC;

  switch (Cond.size()) {

  default:

    llvm_unreachable("Unknown condition opcode in Cond");

  case 1: // b.cc

    CC = AArch64CC::CondCode(Cond[0].getImm());

    break;

  case 3: { // cbz/cbnz

    // We must insert a compare against 0.

    bool Is64Bit;

    switch (Cond[1].getImm()) {

    default:

      llvm_unreachable("Unknown branch opcode in Cond");

    case AArch64::CBZW:

      Is64Bit = false;

      CC = AArch64CC::EQ;

      break;

    case AArch64::CBZX:

      Is64Bit = true;

      CC = AArch64CC::EQ;

      break;

    case AArch64::CBNZW:

      Is64Bit = false;

      CC = AArch64CC::NE;

      break;

    case AArch64::CBNZX:

      Is64Bit = true;

      CC = AArch64CC::NE;

      break;

    }

    Register SrcReg = Cond[2].getReg();

    if (Is64Bit) {

      // cmp reg, #0 is actually subs xzr, reg, #0.

      MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);

      BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)

          .addReg(SrcReg)

          .addImm(0)

          .addImm(0);

    } else {

      MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);

      BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)

          .addReg(SrcReg)

          .addImm(0)

          .addImm(0);

    }

    break;

  }

  case 4: { // tbz/tbnz

    // We must insert a tst instruction.

    switch (Cond[1].getImm()) {

    default:

      llvm_unreachable("Unknown branch opcode in Cond");

    case AArch64::TBZW:

    case AArch64::TBZX:

      CC = AArch64CC::EQ;

      break;

    case AArch64::TBNZW:

    case AArch64::TBNZX:

      CC = AArch64CC::NE;

      break;

    }

    // cmp reg, #foo is actually ands xzr, reg, #1<<foo.

    if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)

      BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)

          .addReg(Cond[2].getReg())

          .addImm(

              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));

    else

      BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)

          .addReg(Cond[2].getReg())

          .addImm(

              AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));

    break;

  }

  case 5: { // cb

    // We must insert a cmp, that is a subs

    //            0       1   2    3    4

    // Cond is { -1, Opcode, CC, Op0, Op1 }

    unsigned SUBSOpC, SUBSDestReg;

    bool IsImm = false;

    CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());

    switch (Cond[1].getImm()) {

    default:

      llvm_unreachable("Unknown branch opcode in Cond");

    case AArch64::CBWPri:

      SUBSOpC = AArch64::SUBSWri;

      SUBSDestReg = AArch64::WZR;

      IsImm = true;

      break;

    case AArch64::CBXPri:

      SUBSOpC = AArch64::SUBSXri;

      SUBSDestReg = AArch64::XZR;

      IsImm = true;

      break;

    case AArch64::CBWPrr:

      SUBSOpC = AArch64::SUBSWrr;

      SUBSDestReg = AArch64::WZR;

      IsImm = false;

      break;

    case AArch64::CBXPrr:

      SUBSOpC = AArch64::SUBSXrr;

      SUBSDestReg = AArch64::XZR;

      IsImm = false;

      break;

    }


    if (IsImm)

      BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)

          .addReg(Cond[3].getReg())

          .addImm(Cond[4].getImm())

          .addImm(0);

    else

      BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)

          .addReg(Cond[3].getReg())

          .addReg(Cond[4].getReg());

  }

  }


  unsigned Opc = 0;

  const TargetRegisterClass *RC = nullptr;

  bool TryFold = false;

  if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {

    RC = &AArch64::GPR64RegClass;

    Opc = AArch64::CSELXr;

    TryFold = true;

  } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {

    RC = &AArch64::GPR32RegClass;

    Opc = AArch64::CSELWr;

    TryFold = true;

  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {

    RC = &AArch64::FPR64RegClass;

    Opc = AArch64::FCSELDrrr;

  } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {

    RC = &AArch64::FPR32RegClass;

    Opc = AArch64::FCSELSrrr;

  }

  assert(RC && "Unsupported regclass");


  // Try folding simple instructions into the csel.

  if (TryFold) {

    unsigned NewVReg = 0;

    unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);

    if (FoldedOpc) {

      // The folded opcodes csinc, csinc and csneg apply the operation to

      // FalseReg, so we need to invert the condition.

      CC = AArch64CC::getInvertedCondCode(CC);

      TrueReg = FalseReg;

    } else

      FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);


    // Fold the operation. Leave any dead instructions for DCE to clean up.

    if (FoldedOpc) {

      FalseReg = NewVReg;

      Opc = FoldedOpc;

      // The extends the live range of NewVReg.

      MRI.clearKillFlags(NewVReg);

    }

  }


  // Pull all virtual register into the appropriate class.

  MRI.constrainRegClass(TrueReg, RC);

  MRI.constrainRegClass(FalseReg, RC);


  // Insert the csel.

  BuildMI(MBB, I, DL, get(Opc), DstReg)

      .addReg(TrueReg)

      .addReg(FalseReg)

      .addImm(CC);

}


// Return true if Imm can be loaded into a register by a "cheap" sequence of

// instructions. For now, "cheap" means at most two instructions.


static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {

  if (BitSize == 32)

    return true;


  assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");

  uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());

  SmallVector<AArch64_IMM::ImmInsnModel, 4> Is;

  AArch64_IMM::expandMOVImm(Imm, BitSize, Is);


  return Is.size() <= 2;

}


// FIXME: this implementation should be micro-architecture dependent, so a

// micro-architecture target hook should be introduced here in future.


bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {

  if (Subtarget.hasExynosCheapAsMoveHandling()) {

    if (isExynosCheapAsMove(MI))

      return true;

    return MI.isAsCheapAsAMove();

  }


  switch (MI.getOpcode()) {

  default:

    return MI.isAsCheapAsAMove();


  case AArch64::ADDWrs:

  case AArch64::ADDXrs:

  case AArch64::SUBWrs:

  case AArch64::SUBXrs:

    return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;


  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or

  // ORRXri, it is as cheap as MOV.

  // Likewise if it can be expanded to MOVZ/MOVN/MOVK.

  case AArch64::MOVi32imm:

    return isCheapImmediate(MI, 32);

  case AArch64::MOVi64imm:

    return isCheapImmediate(MI, 64);

  }

}


bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    return false;


  case AArch64::ADDWrs:

  case AArch64::ADDXrs:

  case AArch64::ADDSWrs:

  case AArch64::ADDSXrs: {

    unsigned Imm = MI.getOperand(3).getImm();

    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);

    if (ShiftVal == 0)

      return true;

    return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;

  }


  case AArch64::ADDWrx:

  case AArch64::ADDXrx:

  case AArch64::ADDXrx64:

  case AArch64::ADDSWrx:

  case AArch64::ADDSXrx:

  case AArch64::ADDSXrx64: {

    unsigned Imm = MI.getOperand(3).getImm();

    switch (AArch64_AM::getArithExtendType(Imm)) {

    default:

      return false;

    case AArch64_AM::UXTB:

    case AArch64_AM::UXTH:

    case AArch64_AM::UXTW:

    case AArch64_AM::UXTX:

      return AArch64_AM::getArithShiftValue(Imm) <= 4;

    }

  }


  case AArch64::SUBWrs:

  case AArch64::SUBSWrs: {

    unsigned Imm = MI.getOperand(3).getImm();

    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);

    return ShiftVal == 0 ||

           (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);

  }


  case AArch64::SUBXrs:

  case AArch64::SUBSXrs: {

    unsigned Imm = MI.getOperand(3).getImm();

    unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);

    return ShiftVal == 0 ||

           (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);

  }


  case AArch64::SUBWrx:

  case AArch64::SUBXrx:

  case AArch64::SUBXrx64:

  case AArch64::SUBSWrx:

  case AArch64::SUBSXrx:

  case AArch64::SUBSXrx64: {

    unsigned Imm = MI.getOperand(3).getImm();

    switch (AArch64_AM::getArithExtendType(Imm)) {

    default:

      return false;

    case AArch64_AM::UXTB:

    case AArch64_AM::UXTH:

    case AArch64_AM::UXTW:

    case AArch64_AM::UXTX:

      return AArch64_AM::getArithShiftValue(Imm) == 0;

    }

  }


  case AArch64::LDRBBroW:

  case AArch64::LDRBBroX:

  case AArch64::LDRBroW:

  case AArch64::LDRBroX:

  case AArch64::LDRDroW:

  case AArch64::LDRDroX:

  case AArch64::LDRHHroW:

  case AArch64::LDRHHroX:

  case AArch64::LDRHroW:

  case AArch64::LDRHroX:

  case AArch64::LDRQroW:

  case AArch64::LDRQroX:

  case AArch64::LDRSBWroW:

  case AArch64::LDRSBWroX:

  case AArch64::LDRSBXroW:

  case AArch64::LDRSBXroX:

  case AArch64::LDRSHWroW:

  case AArch64::LDRSHWroX:

  case AArch64::LDRSHXroW:

  case AArch64::LDRSHXroX:

  case AArch64::LDRSWroW:

  case AArch64::LDRSWroX:

  case AArch64::LDRSroW:

  case AArch64::LDRSroX:

  case AArch64::LDRWroW:

  case AArch64::LDRWroX:

  case AArch64::LDRXroW:

  case AArch64::LDRXroX:

  case AArch64::PRFMroW:

  case AArch64::PRFMroX:

  case AArch64::STRBBroW:

  case AArch64::STRBBroX:

  case AArch64::STRBroW:

  case AArch64::STRBroX:

  case AArch64::STRDroW:

  case AArch64::STRDroX:

  case AArch64::STRHHroW:

  case AArch64::STRHHroX:

  case AArch64::STRHroW:

  case AArch64::STRHroX:

  case AArch64::STRQroW:

  case AArch64::STRQroX:

  case AArch64::STRSroW:

  case AArch64::STRSroX:

  case AArch64::STRWroW:

  case AArch64::STRWroX:

  case AArch64::STRXroW:

  case AArch64::STRXroX: {

    unsigned IsSigned = MI.getOperand(3).getImm();

    return !IsSigned;

  }

  }

}


bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {

  unsigned Opc = MI.getOpcode();

  switch (Opc) {

    default:

      return false;

    case AArch64::SEH_StackAlloc:

    case AArch64::SEH_SaveFPLR:

    case AArch64::SEH_SaveFPLR_X:

    case AArch64::SEH_SaveReg:

    case AArch64::SEH_SaveReg_X:

    case AArch64::SEH_SaveRegP:

    case AArch64::SEH_SaveRegP_X:

    case AArch64::SEH_SaveFReg:

    case AArch64::SEH_SaveFReg_X:

    case AArch64::SEH_SaveFRegP:

    case AArch64::SEH_SaveFRegP_X:

    case AArch64::SEH_SetFP:

    case AArch64::SEH_AddFP:

    case AArch64::SEH_Nop:

    case AArch64::SEH_PrologEnd:

    case AArch64::SEH_EpilogStart:

    case AArch64::SEH_EpilogEnd:

    case AArch64::SEH_PACSignLR:

    case AArch64::SEH_SaveAnyRegQP:

    case AArch64::SEH_SaveAnyRegQPX:

    case AArch64::SEH_AllocZ:

    case AArch64::SEH_SaveZReg:

    case AArch64::SEH_SavePReg:

      return true;

  }

}


bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,

                                             Register &SrcReg, Register &DstReg,

                                             unsigned &SubIdx) const {

  switch (MI.getOpcode()) {

  default:

    return false;

  case AArch64::SBFMXri: // aka sxtw

  case AArch64::UBFMXri: // aka uxtw

    // Check for the 32 -> 64 bit extension case, these instructions can do

    // much more.

    if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)

      return false;

    // This is a signed or unsigned 32 -> 64 bit extension.

    SrcReg = MI.getOperand(1).getReg();

    DstReg = MI.getOperand(0).getReg();

    SubIdx = AArch64::sub_32;

    return true;

  }

}


bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(

    const MachineInstr &MIa, const MachineInstr &MIb) const {

  const TargetRegisterInfo *TRI = &getRegisterInfo();

  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;

  int64_t OffsetA = 0, OffsetB = 0;

  TypeSize WidthA(0, false), WidthB(0, false);

  bool OffsetAIsScalable = false, OffsetBIsScalable = false;


  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");

  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");


  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||

      MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())

    return false;


  // Retrieve the base, offset from the base and width. Width

  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If

  // base are identical, and the offset of a lower memory access +

  // the width doesn't overlap the offset of a higher memory access,

  // then the memory accesses are different.

  // If OffsetAIsScalable and OffsetBIsScalable are both true, they

  // are assumed to have the same scale (vscale).

  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,

                                   WidthA, TRI) &&

      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,

                                   WidthB, TRI)) {

    if (BaseOpA->isIdenticalTo(*BaseOpB) &&

        OffsetAIsScalable == OffsetBIsScalable) {

      int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;

      int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;

      TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;

      if (LowWidth.isScalable() == OffsetAIsScalable &&

          LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)

        return true;

    }

  }

  return false;

}


bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,

                                            const MachineBasicBlock *MBB,

                                            const MachineFunction &MF) const {

  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))

    return true;


  // Do not move an instruction that can be recognized as a branch target.

  if (hasBTISemantics(MI))

    return true;


  switch (MI.getOpcode()) {

  case AArch64::HINT:

    // CSDB hints are scheduling barriers.

    if (MI.getOperand(0).getImm() == 0x14)

      return true;

    break;

  case AArch64::DSB:

  case AArch64::ISB:

    // DSB and ISB also are scheduling barriers.

    return true;

  case AArch64::MSRpstatesvcrImm1:

    // SMSTART and SMSTOP are also scheduling barriers.

    return true;

  default:;

  }

  if (isSEHInstruction(MI))

    return true;

  auto Next = std::next(MI.getIterator());

  return Next != MBB->end() && Next->isCFIInstruction();

}


/// analyzeCompare - For a comparison instruction, return the source registers

/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.

/// Return true if the comparison instruction can be analyzed.


bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,

                                      Register &SrcReg2, int64_t &CmpMask,

                                      int64_t &CmpValue) const {

  // The first operand can be a frame index where we'd normally expect a

  // register.

  // FIXME: Pass subregisters out of analyzeCompare

  assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");

  if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())

    return false;


  switch (MI.getOpcode()) {

  default:

    break;

  case AArch64::PTEST_PP:

  case AArch64::PTEST_PP_ANY:

  case AArch64::PTEST_PP_FIRST:

    SrcReg = MI.getOperand(0).getReg();

    SrcReg2 = MI.getOperand(1).getReg();

    if (MI.getOperand(2).getSubReg())

      return false;


    // Not sure about the mask and value for now...

    CmpMask = ~0;

    CmpValue = 0;

    return true;

  case AArch64::SUBSWrr:

  case AArch64::SUBSWrs:

  case AArch64::SUBSWrx:

  case AArch64::SUBSXrr:

  case AArch64::SUBSXrs:

  case AArch64::SUBSXrx:

  case AArch64::ADDSWrr:

  case AArch64::ADDSWrs:

  case AArch64::ADDSWrx:

  case AArch64::ADDSXrr:

  case AArch64::ADDSXrs:

  case AArch64::ADDSXrx:

    // Replace SUBSWrr with SUBWrr if NZCV is not used.

    SrcReg = MI.getOperand(1).getReg();

    SrcReg2 = MI.getOperand(2).getReg();


    // FIXME: Pass subregisters out of analyzeCompare

    if (MI.getOperand(2).getSubReg())

      return false;


    CmpMask = ~0;

    CmpValue = 0;

    return true;

  case AArch64::SUBSWri:

  case AArch64::ADDSWri:

  case AArch64::SUBSXri:

  case AArch64::ADDSXri:

    SrcReg = MI.getOperand(1).getReg();

    SrcReg2 = 0;

    CmpMask = ~0;

    CmpValue = MI.getOperand(2).getImm();

    return true;

  case AArch64::ANDSWri:

  case AArch64::ANDSXri:

    // ANDS does not use the same encoding scheme as the others xxxS

    // instructions.

    SrcReg = MI.getOperand(1).getReg();

    SrcReg2 = 0;

    CmpMask = ~0;

    CmpValue = AArch64_AM::decodeLogicalImmediate(

                   MI.getOperand(2).getImm(),

                   MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);

    return true;

  }


  return false;

}


static bool UpdateOperandRegClass(MachineInstr &Instr) {

  MachineBasicBlock *MBB = Instr.getParent();

  assert(MBB && "Can't get MachineBasicBlock here");

  MachineFunction *MF = MBB->getParent();

  assert(MF && "Can't get MachineFunction here");

  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();

  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();

  MachineRegisterInfo *MRI = &MF->getRegInfo();


  for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;

       ++OpIdx) {

    MachineOperand &MO = Instr.getOperand(OpIdx);

    const TargetRegisterClass *OpRegCstraints =

        Instr.getRegClassConstraint(OpIdx, TII, TRI);


    // If there's no constraint, there's nothing to do.

    if (!OpRegCstraints)

      continue;

    // If the operand is a frame index, there's nothing to do here.

    // A frame index operand will resolve correctly during PEI.

    if (MO.isFI())

      continue;


    assert(MO.isReg() &&

           "Operand has register constraints without being a register!");


    Register Reg = MO.getReg();

    if (Reg.isPhysical()) {

      if (!OpRegCstraints->contains(Reg))

        return false;

    } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&

               !MRI->constrainRegClass(Reg, OpRegCstraints))

      return false;

  }


  return true;

}


/// Return the opcode that does not set flags when possible - otherwise

/// return the original opcode. The caller is responsible to do the actual

/// substitution and legality checking.


static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {

  // Don't convert all compare instructions, because for some the zero register

  // encoding becomes the sp register.

  bool MIDefinesZeroReg = false;

  if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||

      MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))

    MIDefinesZeroReg = true;


  switch (MI.getOpcode()) {

  default:

    return MI.getOpcode();

  case AArch64::ADDSWrr:

    return AArch64::ADDWrr;

  case AArch64::ADDSWri:

    return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;

  case AArch64::ADDSWrs:

    return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;

  case AArch64::ADDSWrx:

    return AArch64::ADDWrx;

  case AArch64::ADDSXrr:

    return AArch64::ADDXrr;

  case AArch64::ADDSXri:

    return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;

  case AArch64::ADDSXrs:

    return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;

  case AArch64::ADDSXrx:

    return AArch64::ADDXrx;

  case AArch64::SUBSWrr:

    return AArch64::SUBWrr;

  case AArch64::SUBSWri:

    return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;

  case AArch64::SUBSWrs:

    return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;

  case AArch64::SUBSWrx:

    return AArch64::SUBWrx;

  case AArch64::SUBSXrr:

    return AArch64::SUBXrr;

  case AArch64::SUBSXri:

    return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;

  case AArch64::SUBSXrs:

    return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;

  case AArch64::SUBSXrx:

    return AArch64::SUBXrx;

  }

}


enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };


/// True when condition flags are accessed (either by writing or reading)

/// on the instruction trace starting at From and ending at To.

///

/// Note: If From and To are from different blocks it's assumed CC are accessed

///       on the path.


static bool areCFlagsAccessedBetweenInstrs(

    MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,

    const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {

  // Early exit if To is at the beginning of the BB.

  if (To == To->getParent()->begin())

    return true;


  // Check whether the instructions are in the same basic block

  // If not, assume the condition flags might get modified somewhere.

  if (To->getParent() != From->getParent())

    return true;


  // From must be above To.

  assert(std::any_of(

      ++To.getReverse(), To->getParent()->rend(),

      [From](MachineInstr &MI) { return MI.getIterator() == From; }));


  // We iterate backward starting at \p To until we hit \p From.

  for (const MachineInstr &Instr :

       instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {

    if (((AccessToCheck & AK_Write) &&

         Instr.modifiesRegister(AArch64::NZCV, TRI)) ||

        ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))

      return true;

  }

  return false;

}


std::optional<unsigned>

AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,

                                      MachineInstr *Pred,

                                      const MachineRegisterInfo *MRI) const {

  unsigned MaskOpcode = Mask->getOpcode();

  unsigned PredOpcode = Pred->getOpcode();

  bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);

  bool PredIsWhileLike = isWhileOpcode(PredOpcode);


  if (PredIsWhileLike) {

    // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc

    // instruction and the condition is "any" since WHILcc does an implicit

    // PTEST(ALL, PG) check and PG is always a subset of ALL.

    if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)

      return PredOpcode;


    // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is

    // redundant since WHILE performs an implicit PTEST with an all active

    // mask.

    if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&

        getElementSizeForOpcode(MaskOpcode) ==

            getElementSizeForOpcode(PredOpcode))

      return PredOpcode;


    // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since

    // WHILEcc performs an implicit PTEST with an all active mask, setting

    // the N flag as the PTEST_FIRST would.

    if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&

        isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)

      return PredOpcode;


    return {};

  }


  if (PredIsPTestLike) {

    // For PTEST(PG, PG), PTEST is redundant when PG is the result of an

    // instruction that sets the flags as PTEST would and the condition is

    // "any" since PG is always a subset of the governing predicate of the

    // ptest-like instruction.

    if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)

      return PredOpcode;


    auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());


    // If the PTEST like instruction's general predicate is not `Mask`, attempt

    // to look through a copy and try again. This is because some instructions

    // take a predicate whose register class is a subset of its result class.

    if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&

        PTestLikeMask->getOperand(1).getReg().isVirtual())

      PTestLikeMask =

          MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());


    // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the

    // the element size matches and either the PTEST_LIKE instruction uses

    // the same all active mask or the condition is "any".

    if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&

        getElementSizeForOpcode(MaskOpcode) ==

            getElementSizeForOpcode(PredOpcode)) {

      if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)

        return PredOpcode;

    }


    // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the

    // flags are set based on the same mask 'PG', but PTEST_LIKE must operate

    // on 8-bit predicates like the PTEST.  Otherwise, for instructions like

    // compare that also support 16/32/64-bit predicates, the implicit PTEST

    // performed by the compare could consider fewer lanes for these element

    // sizes.

    //

    // For example, consider

    //

    //   ptrue p0.b                    ; P0=1111-1111-1111-1111

    //   index z0.s, #0, #1            ; Z0=<0,1,2,3>

    //   index z1.s, #1, #1            ; Z1=<1,2,3,4>

    //   cmphi p1.s, p0/z, z1.s, z0.s  ; P1=0001-0001-0001-0001

    //                                 ;       ^ last active

    //   ptest p0, p1.b                ; P1=0001-0001-0001-0001

    //                                 ;     ^ last active

    //

    // where the compare generates a canonical all active 32-bit predicate

    // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last

    // active flag, whereas the PTEST instruction with the same mask doesn't.

    // For PTEST_ANY this doesn't apply as the flags in this case would be

    // identical regardless of element size.

    uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);

    if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||

                                  PTest->getOpcode() == AArch64::PTEST_PP_ANY))

      return PredOpcode;


    return {};

  }


  // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the

  // opcode so the PTEST becomes redundant.

  switch (PredOpcode) {

  case AArch64::AND_PPzPP:

  case AArch64::BIC_PPzPP:

  case AArch64::EOR_PPzPP:

  case AArch64::NAND_PPzPP:

  case AArch64::NOR_PPzPP:

  case AArch64::ORN_PPzPP:

  case AArch64::ORR_PPzPP:

  case AArch64::BRKA_PPzP:

  case AArch64::BRKPA_PPzPP:

  case AArch64::BRKB_PPzP:

  case AArch64::BRKPB_PPzPP:

  case AArch64::RDFFR_PPz: {

    // Check to see if our mask is the same. If not the resulting flag bits

    // may be different and we can't remove the ptest.

    auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());

    if (Mask != PredMask)

      return {};

    break;

  }

  case AArch64::BRKN_PPzP: {

    // BRKN uses an all active implicit mask to set flags unlike the other

    // flag-setting instructions.

    // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).

    if ((MaskOpcode != AArch64::PTRUE_B) ||

        (Mask->getOperand(1).getImm() != 31))

      return {};

    break;

  }

  case AArch64::PTRUE_B:

    // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)

    break;

  default:

    // Bail out if we don't recognize the input

    return {};

  }


  return convertToFlagSettingOpc(PredOpcode);

}


/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating

/// operation which could set the flags in an identical manner

bool AArch64InstrInfo::optimizePTestInstr(

    MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,

    const MachineRegisterInfo *MRI) const {

  auto *Mask = MRI->getUniqueVRegDef(MaskReg);

  auto *Pred = MRI->getUniqueVRegDef(PredReg);


  if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {

    // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies

    // before the branch to extract each subregister.

    auto Op = Pred->getOperand(1);

    if (Op.isReg() && Op.getReg().isVirtual() &&

        Op.getSubReg() == AArch64::psub0)

      Pred = MRI->getUniqueVRegDef(Op.getReg());

  }


  unsigned PredOpcode = Pred->getOpcode();

  auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);

  if (!NewOp)

    return false;


  const TargetRegisterInfo *TRI = &getRegisterInfo();


  // If another instruction between Pred and PTest accesses flags, don't remove

  // the ptest or update the earlier instruction to modify them.

  if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))

    return false;


  // If we pass all the checks, it's safe to remove the PTEST and use the flags

  // as they are prior to PTEST. Sometimes this requires the tested PTEST

  // operand to be replaced with an equivalent instruction that also sets the

  // flags.

  PTest->eraseFromParent();

  if (*NewOp != PredOpcode) {

    Pred->setDesc(get(*NewOp));

    bool succeeded = UpdateOperandRegClass(*Pred);

    (void)succeeded;

    assert(succeeded && "Operands have incompatible register classes!");

    Pred->addRegisterDefined(AArch64::NZCV, TRI);

  }


  // Ensure that the flags def is live.

  if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {

    unsigned i = 0, e = Pred->getNumOperands();

    for (; i != e; ++i) {

      MachineOperand &MO = Pred->getOperand(i);

      if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {

        MO.setIsDead(false);

        break;

      }

    }

  }

  return true;

}


/// Try to optimize a compare instruction. A compare instruction is an

/// instruction which produces AArch64::NZCV. It can be truly compare

/// instruction

/// when there are no uses of its destination register.

///

/// The following steps are tried in order:

/// 1. Convert CmpInstr into an unconditional version.

/// 2. Remove CmpInstr if above there is an instruction producing a needed

///    condition code or an instruction which can be converted into such an

///    instruction.

///    Only comparison with zero is supported.


bool AArch64InstrInfo::optimizeCompareInstr(

    MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,

    int64_t CmpValue, const MachineRegisterInfo *MRI) const {

  assert(CmpInstr.getParent());

  assert(MRI);


  // Replace SUBSWrr with SUBWrr if NZCV is not used.

  int DeadNZCVIdx =

      CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);

  if (DeadNZCVIdx != -1) {

    if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||

        CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {

      CmpInstr.eraseFromParent();

      return true;

    }

    unsigned Opc = CmpInstr.getOpcode();

    unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);

    if (NewOpc == Opc)

      return false;

    const MCInstrDesc &MCID = get(NewOpc);

    CmpInstr.setDesc(MCID);

    CmpInstr.removeOperand(DeadNZCVIdx);

    bool succeeded = UpdateOperandRegClass(CmpInstr);

    (void)succeeded;

    assert(succeeded && "Some operands reg class are incompatible!");

    return true;

  }


  if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||

      CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||

      CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)

    return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);


  if (SrcReg2 != 0)

    return false;


  // CmpInstr is a Compare instruction if destination register is not used.

  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))

    return false;


  if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))

    return true;

  return (CmpValue == 0 || CmpValue == 1) &&

         removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);

}


/// Get opcode of S version of Instr.

/// If Instr is S version its opcode is returned.

/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version

/// or we are not interested in it.


static unsigned sForm(MachineInstr &Instr) {

  switch (Instr.getOpcode()) {

  default:

    return AArch64::INSTRUCTION_LIST_END;


  case AArch64::ADDSWrr:

  case AArch64::ADDSWri:

  case AArch64::ADDSXrr:

  case AArch64::ADDSXri:

  case AArch64::SUBSWrr:

  case AArch64::SUBSWri:

  case AArch64::SUBSXrr:

  case AArch64::SUBSXri:

    return Instr.getOpcode();


  case AArch64::ADDWrr:

    return AArch64::ADDSWrr;

  case AArch64::ADDWri:

    return AArch64::ADDSWri;

  case AArch64::ADDXrr:

    return AArch64::ADDSXrr;

  case AArch64::ADDXri:

    return AArch64::ADDSXri;

  case AArch64::ADCWr:

    return AArch64::ADCSWr;

  case AArch64::ADCXr:

    return AArch64::ADCSXr;

  case AArch64::SUBWrr:

    return AArch64::SUBSWrr;

  case AArch64::SUBWri:

    return AArch64::SUBSWri;

  case AArch64::SUBXrr:

    return AArch64::SUBSXrr;

  case AArch64::SUBXri:

    return AArch64::SUBSXri;

  case AArch64::SBCWr:

    return AArch64::SBCSWr;

  case AArch64::SBCXr:

    return AArch64::SBCSXr;

  case AArch64::ANDWri:

    return AArch64::ANDSWri;

  case AArch64::ANDXri:

    return AArch64::ANDSXri;

  }

}


/// Check if AArch64::NZCV should be alive in successors of MBB.


static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) {

  for (auto *BB : MBB->successors())

    if (BB->isLiveIn(AArch64::NZCV))

      return true;

  return false;

}


/// \returns The condition code operand index for \p Instr if it is a branch

/// or select and -1 otherwise.

static int


findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {

  switch (Instr.getOpcode()) {

  default:

    return -1;


  case AArch64::Bcc: {

    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);

    assert(Idx >= 2);

    return Idx - 2;

  }


  case AArch64::CSINVWr:

  case AArch64::CSINVXr:

  case AArch64::CSINCWr:

  case AArch64::CSINCXr:

  case AArch64::CSELWr:

  case AArch64::CSELXr:

  case AArch64::CSNEGWr:

  case AArch64::CSNEGXr:

  case AArch64::FCSELSrrr:

  case AArch64::FCSELDrrr: {

    int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);

    assert(Idx >= 1);

    return Idx - 1;

  }

  }

}


/// Find a condition code used by the instruction.

/// Returns AArch64CC::Invalid if either the instruction does not use condition

/// codes or we don't optimize CmpInstr in the presence of such instructions.


static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {

  int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr);

  return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(

                          Instr.getOperand(CCIdx).getImm())

                    : AArch64CC::Invalid;

}


static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {

  assert(CC != AArch64CC::Invalid);

  UsedNZCV UsedFlags;

  switch (CC) {

  default:

    break;


  case AArch64CC::EQ: // Z set

  case AArch64CC::NE: // Z clear

    UsedFlags.Z = true;

    break;


  case AArch64CC::HI: // Z clear and C set

  case AArch64CC::LS: // Z set   or  C clear

    UsedFlags.Z = true;

    [[fallthrough]];

  case AArch64CC::HS: // C set

  case AArch64CC::LO: // C clear

    UsedFlags.C = true;

    break;


  case AArch64CC::MI: // N set

  case AArch64CC::PL: // N clear

    UsedFlags.N = true;

    break;


  case AArch64CC::VS: // V set

  case AArch64CC::VC: // V clear

    UsedFlags.V = true;

    break;


  case AArch64CC::GT: // Z clear, N and V the same

  case AArch64CC::LE: // Z set,   N and V differ

    UsedFlags.Z = true;

    [[fallthrough]];

  case AArch64CC::GE: // N and V the same

  case AArch64CC::LT: // N and V differ

    UsedFlags.N = true;

    UsedFlags.V = true;

    break;

  }

  return UsedFlags;

}


/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV

/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.

/// \returns std::nullopt otherwise.

///

/// Collect instructions using that flags in \p CCUseInstrs if provided.

std::optional<UsedNZCV>


llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,

                       const TargetRegisterInfo &TRI,

                       SmallVectorImpl<MachineInstr *> *CCUseInstrs) {

  MachineBasicBlock *CmpParent = CmpInstr.getParent();

  if (MI.getParent() != CmpParent)

    return std::nullopt;


  if (areCFlagsAliveInSuccessors(CmpParent))

    return std::nullopt;


  UsedNZCV NZCVUsedAfterCmp;

  for (MachineInstr &Instr : instructionsWithoutDebug(

           std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {

    if (Instr.readsRegister(AArch64::NZCV, &TRI)) {

      AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);

      if (CC == AArch64CC::Invalid) // Unsupported conditional instruction

        return std::nullopt;

      NZCVUsedAfterCmp |= getUsedNZCV(CC);

      if (CCUseInstrs)

        CCUseInstrs->push_back(&Instr);

    }

    if (Instr.modifiesRegister(AArch64::NZCV, &TRI))

      break;

  }

  return NZCVUsedAfterCmp;

}


static bool isADDSRegImm(unsigned Opcode) {

  return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;

}


static bool isSUBSRegImm(unsigned Opcode) {

  return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;

}


/// Check if CmpInstr can be substituted by MI.

///

/// CmpInstr can be substituted:

/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'

/// - and, MI and CmpInstr are from the same MachineBB

/// - and, condition flags are not alive in successors of the CmpInstr parent

/// - and, if MI opcode is the S form there must be no defs of flags between

///        MI and CmpInstr

///        or if MI opcode is not the S form there must be neither defs of flags

///        nor uses of flags between MI and CmpInstr.

/// - and, if C/V flags are not used after CmpInstr

///        or if N flag is used but MI produces poison value if signed overflow

///        occurs.


static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,

                                       const TargetRegisterInfo &TRI) {

  // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction

  // that may or may not set flags.

  assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);


  const unsigned CmpOpcode = CmpInstr.getOpcode();

  if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))

    return false;


  assert((CmpInstr.getOperand(2).isImm() &&

          CmpInstr.getOperand(2).getImm() == 0) &&

         "Caller guarantees that CmpInstr compares with constant 0");


  std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);

  if (!NZVCUsed || NZVCUsed->C)

    return false;


  // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either

  // '%vreg = add ...' or '%vreg = sub ...'.

  // Condition flag V is used to indicate signed overflow.

  // 1) MI and CmpInstr set N and V to the same value.

  // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when

  //    signed overflow occurs, so CmpInstr could still be simplified away.

  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))

    return false;


  AccessKind AccessToCheck = AK_Write;

  if (sForm(MI) != MI.getOpcode())

    AccessToCheck = AK_All;

  return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);

}


/// Substitute an instruction comparing to zero with another instruction

/// which produces needed condition flags.

///

/// Return true on success.

bool AArch64InstrInfo::substituteCmpToZero(

    MachineInstr &CmpInstr, unsigned SrcReg,

    const MachineRegisterInfo &MRI) const {

  // Get the unique definition of SrcReg.

  MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);

  if (!MI)

    return false;


  const TargetRegisterInfo &TRI = getRegisterInfo();


  unsigned NewOpc = sForm(*MI);

  if (NewOpc == AArch64::INSTRUCTION_LIST_END)

    return false;


  if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))

    return false;


  // Update the instruction to set NZCV.

  MI->setDesc(get(NewOpc));

  CmpInstr.eraseFromParent();

  bool succeeded = UpdateOperandRegClass(*MI);

  (void)succeeded;

  assert(succeeded && "Some operands reg class are incompatible!");

  MI->addRegisterDefined(AArch64::NZCV, &TRI);

  return true;

}


/// \returns True if \p CmpInstr can be removed.

///

/// \p IsInvertCC is true if, after removing \p CmpInstr, condition

/// codes used in \p CCUseInstrs must be inverted.


static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,

                                 int CmpValue, const TargetRegisterInfo &TRI,

                                 SmallVectorImpl<MachineInstr *> &CCUseInstrs,

                                 bool &IsInvertCC) {

  assert((CmpValue == 0 || CmpValue == 1) &&

         "Only comparisons to 0 or 1 considered for removal!");


  // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'

  unsigned MIOpc = MI.getOpcode();

  if (MIOpc == AArch64::CSINCWr) {

    if (MI.getOperand(1).getReg() != AArch64::WZR ||

        MI.getOperand(2).getReg() != AArch64::WZR)

      return false;

  } else if (MIOpc == AArch64::CSINCXr) {

    if (MI.getOperand(1).getReg() != AArch64::XZR ||

        MI.getOperand(2).getReg() != AArch64::XZR)

      return false;

  } else {

    return false;

  }

  AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);

  if (MICC == AArch64CC::Invalid)

    return false;


  // NZCV needs to be defined

  if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)

    return false;


  // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'

  const unsigned CmpOpcode = CmpInstr.getOpcode();

  bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);

  if (CmpValue && !IsSubsRegImm)

    return false;

  if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))

    return false;


  // MI conditions allowed: eq, ne, mi, pl

  UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);

  if (MIUsedNZCV.C || MIUsedNZCV.V)

    return false;


  std::optional<UsedNZCV> NZCVUsedAfterCmp =

      examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);

  // Condition flags are not used in CmpInstr basic block successors and only

  // Z or N flags allowed to be used after CmpInstr within its basic block

  if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)

    return false;

  // Z or N flag used after CmpInstr must correspond to the flag used in MI

  if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||

      (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))

    return false;

  // If CmpInstr is comparison to zero MI conditions are limited to eq, ne

  if (MIUsedNZCV.N && !CmpValue)

    return false;


  // There must be no defs of flags between MI and CmpInstr

  if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))

    return false;


  // Condition code is inverted in the following cases:

  // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'

  // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'

  IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||

               (!CmpValue && MICC == AArch64CC::NE);

  return true;

}


/// Remove comparison in csinc-cmp sequence

///

/// Examples:

/// 1. \code

///   csinc w9, wzr, wzr, ne

///   cmp   w9, #0

///   b.eq

///    \endcode

/// to

///    \code

///   csinc w9, wzr, wzr, ne

///   b.ne

///    \endcode

///

/// 2. \code

///   csinc x2, xzr, xzr, mi

///   cmp   x2, #1

///   b.pl

///    \endcode

/// to

///    \code

///   csinc x2, xzr, xzr, mi

///   b.pl

///    \endcode

///

/// \param  CmpInstr comparison instruction

/// \return True when comparison removed

bool AArch64InstrInfo::removeCmpToZeroOrOne(

    MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,

    const MachineRegisterInfo &MRI) const {

  MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);

  if (!MI)

    return false;

  const TargetRegisterInfo &TRI = getRegisterInfo();

  SmallVector<MachineInstr *, 4> CCUseInstrs;

  bool IsInvertCC = false;

  if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,

                            IsInvertCC))

    return false;

  // Make transformation

  CmpInstr.eraseFromParent();

  if (IsInvertCC) {

    // Invert condition codes in CmpInstr CC users

    for (MachineInstr *CCUseInstr : CCUseInstrs) {

      int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);

      assert(Idx >= 0 && "Unexpected instruction using CC.");

      MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);

      AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(

          static_cast<AArch64CC::CondCode>(CCOperand.getImm()));

      CCOperand.setImm(CCUse);

    }

  }

  return true;

}


bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {

  if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&

      MI.getOpcode() != AArch64::CATCHRET)

    return false;


  MachineBasicBlock &MBB = *MI.getParent();

  auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();

  auto TRI = Subtarget.getRegisterInfo();

  DebugLoc DL = MI.getDebugLoc();


  if (MI.getOpcode() == AArch64::CATCHRET) {

    // Skip to the first instruction before the epilog.

    const TargetInstrInfo *TII =

      MBB.getParent()->getSubtarget().getInstrInfo();

    MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();

    auto MBBI = MachineBasicBlock::iterator(MI);

    MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);

    while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&

           FirstEpilogSEH != MBB.begin())

      FirstEpilogSEH = std::prev(FirstEpilogSEH);

    if (FirstEpilogSEH != MBB.begin())

      FirstEpilogSEH = std::next(FirstEpilogSEH);

    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))

        .addReg(AArch64::X0, RegState::Define)

        .addMBB(TargetMBB);

    BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))

        .addReg(AArch64::X0, RegState::Define)

        .addReg(AArch64::X0)

        .addMBB(TargetMBB)

        .addImm(0);

    TargetMBB->setMachineBlockAddressTaken();

    return true;

  }


  Register Reg = MI.getOperand(0).getReg();

  Module &M = *MBB.getParent()->getFunction().getParent();

  if (M.getStackProtectorGuard() == "sysreg") {

    const AArch64SysReg::SysReg *SrcReg =

        AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());

    if (!SrcReg)

      report_fatal_error("Unknown SysReg for Stack Protector Guard Register");


    // mrs xN, sysreg

    BuildMI(MBB, MI, DL, get(AArch64::MRS))

        .addDef(Reg, RegState::Renamable)

        .addImm(SrcReg->Encoding);

    int Offset = M.getStackProtectorGuardOffset();

    if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {

      // ldr xN, [xN, #offset]

      BuildMI(MBB, MI, DL, get(AArch64::LDRXui))

          .addDef(Reg)

          .addUse(Reg, RegState::Kill)

          .addImm(Offset / 8);

    } else if (Offset >= -256 && Offset <= 255) {

      // ldur xN, [xN, #offset]

      BuildMI(MBB, MI, DL, get(AArch64::LDURXi))

          .addDef(Reg)

          .addUse(Reg, RegState::Kill)

          .addImm(Offset);

    } else if (Offset >= -4095 && Offset <= 4095) {

      if (Offset > 0) {

        // add xN, xN, #offset

        BuildMI(MBB, MI, DL, get(AArch64::ADDXri))

            .addDef(Reg)

            .addUse(Reg, RegState::Kill)

            .addImm(Offset)

            .addImm(0);

      } else {

        // sub xN, xN, #offset

        BuildMI(MBB, MI, DL, get(AArch64::SUBXri))

            .addDef(Reg)

            .addUse(Reg, RegState::Kill)

            .addImm(-Offset)

            .addImm(0);

      }

      // ldr xN, [xN]

      BuildMI(MBB, MI, DL, get(AArch64::LDRXui))

          .addDef(Reg)

          .addUse(Reg, RegState::Kill)

          .addImm(0);

    } else {

      // Cases that are larger than +/- 4095 and not a multiple of 8, or larger

      // than 23760.

      // It might be nice to use AArch64::MOVi32imm here, which would get

      // expanded in PreSched2 after PostRA, but our lone scratch Reg already

      // contains the MRS result. findScratchNonCalleeSaveRegister() in

      // AArch64FrameLowering might help us find such a scratch register

      // though. If we failed to find a scratch register, we could emit a

      // stream of add instructions to build up the immediate. Or, we could try

      // to insert a AArch64::MOVi32imm before register allocation so that we

      // didn't need to scavenge for a scratch register.

      report_fatal_error("Unable to encode Stack Protector Guard Offset");

    }

    MBB.erase(MI);

    return true;

  }


  const GlobalValue *GV =

      cast<GlobalValue>((*MI.memoperands_begin())->getValue());

  const TargetMachine &TM = MBB.getParent()->getTarget();

  unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);

  const unsigned char MO_NC = AArch64II::MO_NC;


  if ((OpFlags & AArch64II::MO_GOT) != 0) {

    BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)

        .addGlobalAddress(GV, 0, OpFlags);

    if (Subtarget.isTargetILP32()) {

      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);

      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))

          .addDef(Reg32, RegState::Dead)

          .addUse(Reg, RegState::Kill)

          .addImm(0)

          .addMemOperand(*MI.memoperands_begin())

          .addDef(Reg, RegState::Implicit);

    } else {

      BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)

          .addReg(Reg, RegState::Kill)

          .addImm(0)

          .addMemOperand(*MI.memoperands_begin());

    }

  } else if (TM.getCodeModel() == CodeModel::Large) {

    assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");

    BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)

        .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)

        .addImm(0);

    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)

        .addReg(Reg, RegState::Kill)

        .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)

        .addImm(16);

    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)

        .addReg(Reg, RegState::Kill)

        .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)

        .addImm(32);

    BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)

        .addReg(Reg, RegState::Kill)

        .addGlobalAddress(GV, 0, AArch64II::MO_G3)

        .addImm(48);

    BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)

        .addReg(Reg, RegState::Kill)

        .addImm(0)

        .addMemOperand(*MI.memoperands_begin());

  } else if (TM.getCodeModel() == CodeModel::Tiny) {

    BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)

        .addGlobalAddress(GV, 0, OpFlags);

  } else {

    BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)

        .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);

    unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;

    if (Subtarget.isTargetILP32()) {

      unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);

      BuildMI(MBB, MI, DL, get(AArch64::LDRWui))

          .addDef(Reg32, RegState::Dead)

          .addUse(Reg, RegState::Kill)

          .addGlobalAddress(GV, 0, LoFlags)

          .addMemOperand(*MI.memoperands_begin())

          .addDef(Reg, RegState::Implicit);

    } else {

      BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)

          .addReg(Reg, RegState::Kill)

          .addGlobalAddress(GV, 0, LoFlags)

          .addMemOperand(*MI.memoperands_begin());

    }

  }


  MBB.erase(MI);


  return true;

}


// Return true if this instruction simply sets its single destination register

// to zero. This is equivalent to a register rename of the zero-register.


bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    break;

  case AArch64::MOVZWi:

  case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)

    if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {

      assert(MI.getDesc().getNumOperands() == 3 &&

             MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");

      return true;

    }

    break;

  case AArch64::ANDWri: // and Rd, Rzr, #imm

    return MI.getOperand(1).getReg() == AArch64::WZR;

  case AArch64::ANDXri:

    return MI.getOperand(1).getReg() == AArch64::XZR;

  case TargetOpcode::COPY:

    return MI.getOperand(1).getReg() == AArch64::WZR;

  }

  return false;

}


// Return true if this instruction simply renames a general register without

// modifying bits.


bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    break;

  case TargetOpcode::COPY: {

    // GPR32 copies will by lowered to ORRXrs

    Register DstReg = MI.getOperand(0).getReg();

    return (AArch64::GPR32RegClass.contains(DstReg) ||

            AArch64::GPR64RegClass.contains(DstReg));

  }

  case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)

    if (MI.getOperand(1).getReg() == AArch64::XZR) {

      assert(MI.getDesc().getNumOperands() == 4 &&

             MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");

      return true;

    }

    break;

  case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)

    if (MI.getOperand(2).getImm() == 0) {

      assert(MI.getDesc().getNumOperands() == 4 &&

             MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");

      return true;

    }

    break;

  }

  return false;

}


// Return true if this instruction simply renames a general register without

// modifying bits.


bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    break;

  case TargetOpcode::COPY: {

    Register DstReg = MI.getOperand(0).getReg();

    return AArch64::FPR128RegClass.contains(DstReg);

  }

  case AArch64::ORRv16i8:

    if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {

      assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&

             "invalid ORRv16i8 operands");

      return true;

    }

    break;

  }

  return false;

}


Register AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,

                                               int &FrameIndex) const {

  switch (MI.getOpcode()) {

  default:

    break;

  case AArch64::LDRWui:

  case AArch64::LDRXui:

  case AArch64::LDRBui:

  case AArch64::LDRHui:

  case AArch64::LDRSui:

  case AArch64::LDRDui:

  case AArch64::LDRQui:

  case AArch64::LDR_PXI:

    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&

        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {

      FrameIndex = MI.getOperand(1).getIndex();

      return MI.getOperand(0).getReg();

    }

    break;

  }


  return 0;

}


Register AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,

                                              int &FrameIndex) const {

  switch (MI.getOpcode()) {

  default:

    break;

  case AArch64::STRWui:

  case AArch64::STRXui:

  case AArch64::STRBui:

  case AArch64::STRHui:

  case AArch64::STRSui:

  case AArch64::STRDui:

  case AArch64::STRQui:

  case AArch64::STR_PXI:

    if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&

        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {

      FrameIndex = MI.getOperand(1).getIndex();

      return MI.getOperand(0).getReg();

    }

    break;

  }

  return 0;

}


/// Check all MachineMemOperands for a hint to suppress pairing.


bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {

  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {

    return MMO->getFlags() & MOSuppressPair;

  });

}


/// Set a flag on the first MachineMemOperand to suppress pairing.


void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {

  if (MI.memoperands_empty())

    return;

  (*MI.memoperands_begin())->setFlags(MOSuppressPair);

}


/// Check all MachineMemOperands for a hint that the load/store is strided.


bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {

  return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {

    return MMO->getFlags() & MOStridedAccess;

  });

}


bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {

  switch (Opc) {

  default:

    return false;

  case AArch64::STURSi:

  case AArch64::STRSpre:

  case AArch64::STURDi:

  case AArch64::STRDpre:

  case AArch64::STURQi:

  case AArch64::STRQpre:

  case AArch64::STURBBi:

  case AArch64::STURHHi:

  case AArch64::STURWi:

  case AArch64::STRWpre:

  case AArch64::STURXi:

  case AArch64::STRXpre:

  case AArch64::LDURSi:

  case AArch64::LDRSpre:

  case AArch64::LDURDi:

  case AArch64::LDRDpre:

  case AArch64::LDURQi:

  case AArch64::LDRQpre:

  case AArch64::LDURWi:

  case AArch64::LDRWpre:

  case AArch64::LDURXi:

  case AArch64::LDRXpre:

  case AArch64::LDRSWpre:

  case AArch64::LDURSWi:

  case AArch64::LDURHHi:

  case AArch64::LDURBBi:

  case AArch64::LDURSBWi:

  case AArch64::LDURSHWi:

    return true;

  }

}


std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {

  switch (Opc) {

  default: return {};

  case AArch64::PRFMui: return AArch64::PRFUMi;

  case AArch64::LDRXui: return AArch64::LDURXi;

  case AArch64::LDRWui: return AArch64::LDURWi;

  case AArch64::LDRBui: return AArch64::LDURBi;

  case AArch64::LDRHui: return AArch64::LDURHi;

  case AArch64::LDRSui: return AArch64::LDURSi;

  case AArch64::LDRDui: return AArch64::LDURDi;

  case AArch64::LDRQui: return AArch64::LDURQi;

  case AArch64::LDRBBui: return AArch64::LDURBBi;

  case AArch64::LDRHHui: return AArch64::LDURHHi;

  case AArch64::LDRSBXui: return AArch64::LDURSBXi;

  case AArch64::LDRSBWui: return AArch64::LDURSBWi;

  case AArch64::LDRSHXui: return AArch64::LDURSHXi;

  case AArch64::LDRSHWui: return AArch64::LDURSHWi;

  case AArch64::LDRSWui: return AArch64::LDURSWi;

  case AArch64::STRXui: return AArch64::STURXi;

  case AArch64::STRWui: return AArch64::STURWi;

  case AArch64::STRBui: return AArch64::STURBi;

  case AArch64::STRHui: return AArch64::STURHi;

  case AArch64::STRSui: return AArch64::STURSi;

  case AArch64::STRDui: return AArch64::STURDi;

  case AArch64::STRQui: return AArch64::STURQi;

  case AArch64::STRBBui: return AArch64::STURBBi;

  case AArch64::STRHHui: return AArch64::STURHHi;

  }

}


unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {

  switch (Opc) {

  default:

    llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");

  case AArch64::ADDG:

  case AArch64::LDAPURBi:

  case AArch64::LDAPURHi:

  case AArch64::LDAPURi:

  case AArch64::LDAPURSBWi:

  case AArch64::LDAPURSBXi:

  case AArch64::LDAPURSHWi:

  case AArch64::LDAPURSHXi:

  case AArch64::LDAPURSWi:

  case AArch64::LDAPURXi:

  case AArch64::LDR_PPXI:

  case AArch64::LDR_PXI:

  case AArch64::LDR_ZXI:

  case AArch64::LDR_ZZXI:

  case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:

  case AArch64::LDR_ZZZXI:

  case AArch64::LDR_ZZZZXI:

  case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:

  case AArch64::LDRBBui:

  case AArch64::LDRBui:

  case AArch64::LDRDui:

  case AArch64::LDRHHui:

  case AArch64::LDRHui:

  case AArch64::LDRQui:

  case AArch64::LDRSBWui:

  case AArch64::LDRSBXui:

  case AArch64::LDRSHWui:

  case AArch64::LDRSHXui:

  case AArch64::LDRSui:

  case AArch64::LDRSWui:

  case AArch64::LDRWui:

  case AArch64::LDRXui:

  case AArch64::LDURBBi:

  case AArch64::LDURBi:

  case AArch64::LDURDi:

  case AArch64::LDURHHi:

  case AArch64::LDURHi:

  case AArch64::LDURQi:

  case AArch64::LDURSBWi:

  case AArch64::LDURSBXi:

  case AArch64::LDURSHWi:

  case AArch64::LDURSHXi:

  case AArch64::LDURSi:

  case AArch64::LDURSWi:

  case AArch64::LDURWi:

  case AArch64::LDURXi:

  case AArch64::PRFMui:

  case AArch64::PRFUMi:

  case AArch64::ST2Gi:

  case AArch64::STGi:

  case AArch64::STLURBi:

  case AArch64::STLURHi:

  case AArch64::STLURWi:

  case AArch64::STLURXi:

  case AArch64::StoreSwiftAsyncContext:

  case AArch64::STR_PPXI:

  case AArch64::STR_PXI:

  case AArch64::STR_ZXI:

  case AArch64::STR_ZZXI:

  case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:

  case AArch64::STR_ZZZXI:

  case AArch64::STR_ZZZZXI:

  case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:

  case AArch64::STRBBui:

  case AArch64::STRBui:

  case AArch64::STRDui:

  case AArch64::STRHHui:

  case AArch64::STRHui:

  case AArch64::STRQui:

  case AArch64::STRSui:

  case AArch64::STRWui:

  case AArch64::STRXui:

  case AArch64::STURBBi:

  case AArch64::STURBi:

  case AArch64::STURDi:

  case AArch64::STURHHi:

  case AArch64::STURHi:

  case AArch64::STURQi:

  case AArch64::STURSi:

  case AArch64::STURWi:

  case AArch64::STURXi:

  case AArch64::STZ2Gi:

  case AArch64::STZGi:

  case AArch64::TAGPstack:

    return 2;

  case AArch64::LD1B_D_IMM:

  case AArch64::LD1B_H_IMM:

  case AArch64::LD1B_IMM:

  case AArch64::LD1B_S_IMM:

  case AArch64::LD1D_IMM:

  case AArch64::LD1H_D_IMM:

  case AArch64::LD1H_IMM:

  case AArch64::LD1H_S_IMM:

  case AArch64::LD1RB_D_IMM:

  case AArch64::LD1RB_H_IMM:

  case AArch64::LD1RB_IMM:

  case AArch64::LD1RB_S_IMM:

  case AArch64::LD1RD_IMM:

  case AArch64::LD1RH_D_IMM:

  case AArch64::LD1RH_IMM:

  case AArch64::LD1RH_S_IMM:

  case AArch64::LD1RSB_D_IMM:

  case AArch64::LD1RSB_H_IMM:

  case AArch64::LD1RSB_S_IMM:

  case AArch64::LD1RSH_D_IMM:

  case AArch64::LD1RSH_S_IMM:

  case AArch64::LD1RSW_IMM:

  case AArch64::LD1RW_D_IMM:

  case AArch64::LD1RW_IMM:

  case AArch64::LD1SB_D_IMM:

  case AArch64::LD1SB_H_IMM:

  case AArch64::LD1SB_S_IMM:

  case AArch64::LD1SH_D_IMM:

  case AArch64::LD1SH_S_IMM:

  case AArch64::LD1SW_D_IMM:

  case AArch64::LD1W_D_IMM:

  case AArch64::LD1W_IMM:

  case AArch64::LD2B_IMM:

  case AArch64::LD2D_IMM:

  case AArch64::LD2H_IMM:

  case AArch64::LD2W_IMM:

  case AArch64::LD3B_IMM:

  case AArch64::LD3D_IMM:

  case AArch64::LD3H_IMM:

  case AArch64::LD3W_IMM:

  case AArch64::LD4B_IMM:

  case AArch64::LD4D_IMM:

  case AArch64::LD4H_IMM:

  case AArch64::LD4W_IMM:

  case AArch64::LDG:

  case AArch64::LDNF1B_D_IMM:

  case AArch64::LDNF1B_H_IMM:

  case AArch64::LDNF1B_IMM:

  case AArch64::LDNF1B_S_IMM:

  case AArch64::LDNF1D_IMM:

  case AArch64::LDNF1H_D_IMM:

  case AArch64::LDNF1H_IMM:

  case AArch64::LDNF1H_S_IMM:

  case AArch64::LDNF1SB_D_IMM:

  case AArch64::LDNF1SB_H_IMM:

  case AArch64::LDNF1SB_S_IMM:

  case AArch64::LDNF1SH_D_IMM:

  case AArch64::LDNF1SH_S_IMM:

  case AArch64::LDNF1SW_D_IMM:

  case AArch64::LDNF1W_D_IMM:

  case AArch64::LDNF1W_IMM:

  case AArch64::LDNPDi:

  case AArch64::LDNPQi:

  case AArch64::LDNPSi:

  case AArch64::LDNPWi:

  case AArch64::LDNPXi:

  case AArch64::LDNT1B_ZRI:

  case AArch64::LDNT1D_ZRI:

  case AArch64::LDNT1H_ZRI:

  case AArch64::LDNT1W_ZRI:

  case AArch64::LDPDi:

  case AArch64::LDPQi:

  case AArch64::LDPSi:

  case AArch64::LDPWi:

  case AArch64::LDPXi:

  case AArch64::LDRBBpost:

  case AArch64::LDRBBpre:

  case AArch64::LDRBpost:

  case AArch64::LDRBpre:

  case AArch64::LDRDpost:

  case AArch64::LDRDpre:

  case AArch64::LDRHHpost:

  case AArch64::LDRHHpre:

  case AArch64::LDRHpost:

  case AArch64::LDRHpre:

  case AArch64::LDRQpost:

  case AArch64::LDRQpre:

  case AArch64::LDRSpost:

  case AArch64::LDRSpre:

  case AArch64::LDRWpost:

  case AArch64::LDRWpre:

  case AArch64::LDRXpost:

  case AArch64::LDRXpre:

  case AArch64::ST1B_D_IMM:

  case AArch64::ST1B_H_IMM:

  case AArch64::ST1B_IMM:

  case AArch64::ST1B_S_IMM:

  case AArch64::ST1D_IMM:

  case AArch64::ST1H_D_IMM:

  case AArch64::ST1H_IMM:

  case AArch64::ST1H_S_IMM:

  case AArch64::ST1W_D_IMM:

  case AArch64::ST1W_IMM:

  case AArch64::ST2B_IMM:

  case AArch64::ST2D_IMM:

  case AArch64::ST2H_IMM:

  case AArch64::ST2W_IMM:

  case AArch64::ST3B_IMM:

  case AArch64::ST3D_IMM:

  case AArch64::ST3H_IMM:

  case AArch64::ST3W_IMM:

  case AArch64::ST4B_IMM:

  case AArch64::ST4D_IMM:

  case AArch64::ST4H_IMM:

  case AArch64::ST4W_IMM:

  case AArch64::STGPi:

  case AArch64::STGPreIndex:

  case AArch64::STZGPreIndex:

  case AArch64::ST2GPreIndex:

  case AArch64::STZ2GPreIndex:

  case AArch64::STGPostIndex:

  case AArch64::STZGPostIndex:

  case AArch64::ST2GPostIndex:

  case AArch64::STZ2GPostIndex:

  case AArch64::STNPDi:

  case AArch64::STNPQi:

  case AArch64::STNPSi:

  case AArch64::STNPWi:

  case AArch64::STNPXi:

  case AArch64::STNT1B_ZRI:

  case AArch64::STNT1D_ZRI:

  case AArch64::STNT1H_ZRI:

  case AArch64::STNT1W_ZRI:

  case AArch64::STPDi:

  case AArch64::STPQi:

  case AArch64::STPSi:

  case AArch64::STPWi:

  case AArch64::STPXi:

  case AArch64::STRBBpost:

  case AArch64::STRBBpre:

  case AArch64::STRBpost:

  case AArch64::STRBpre:

  case AArch64::STRDpost:

  case AArch64::STRDpre:

  case AArch64::STRHHpost:

  case AArch64::STRHHpre:

  case AArch64::STRHpost:

  case AArch64::STRHpre:

  case AArch64::STRQpost:

  case AArch64::STRQpre:

  case AArch64::STRSpost:

  case AArch64::STRSpre:

  case AArch64::STRWpost:

  case AArch64::STRWpre:

  case AArch64::STRXpost:

  case AArch64::STRXpre:

    return 3;

  case AArch64::LDPDpost:

  case AArch64::LDPDpre:

  case AArch64::LDPQpost:

  case AArch64::LDPQpre:

  case AArch64::LDPSpost:

  case AArch64::LDPSpre:

  case AArch64::LDPWpost:

  case AArch64::LDPWpre:

  case AArch64::LDPXpost:

  case AArch64::LDPXpre:

  case AArch64::STGPpre:

  case AArch64::STGPpost:

  case AArch64::STPDpost:

  case AArch64::STPDpre:

  case AArch64::STPQpost:

  case AArch64::STPQpre:

  case AArch64::STPSpost:

  case AArch64::STPSpre:

  case AArch64::STPWpost:

  case AArch64::STPWpre:

  case AArch64::STPXpost:

  case AArch64::STPXpre:

    return 4;

  }

}


bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    return false;

  // Scaled instructions.

  case AArch64::STRSui:

  case AArch64::STRDui:

  case AArch64::STRQui:

  case AArch64::STRXui:

  case AArch64::STRWui:

  case AArch64::LDRSui:

  case AArch64::LDRDui:

  case AArch64::LDRQui:

  case AArch64::LDRXui:

  case AArch64::LDRWui:

  case AArch64::LDRSWui:

  // Unscaled instructions.

  case AArch64::STURSi:

  case AArch64::STRSpre:

  case AArch64::STURDi:

  case AArch64::STRDpre:

  case AArch64::STURQi:

  case AArch64::STRQpre:

  case AArch64::STURWi:

  case AArch64::STRWpre:

  case AArch64::STURXi:

  case AArch64::STRXpre:

  case AArch64::LDURSi:

  case AArch64::LDRSpre:

  case AArch64::LDURDi:

  case AArch64::LDRDpre:

  case AArch64::LDURQi:

  case AArch64::LDRQpre:

  case AArch64::LDURWi:

  case AArch64::LDRWpre:

  case AArch64::LDURXi:

  case AArch64::LDRXpre:

  case AArch64::LDURSWi:

  case AArch64::LDRSWpre:

  // SVE instructions.

  case AArch64::LDR_ZXI:

  case AArch64::STR_ZXI:

    return true;

  }

}


bool AArch64InstrInfo::isTailCallReturnInst(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    assert((!MI.isCall() || !MI.isReturn()) &&

           "Unexpected instruction - was a new tail call opcode introduced?");

    return false;

  case AArch64::TCRETURNdi:

  case AArch64::TCRETURNri:

  case AArch64::TCRETURNrix16x17:

  case AArch64::TCRETURNrix17:

  case AArch64::TCRETURNrinotx16:

  case AArch64::TCRETURNriALL:

  case AArch64::AUTH_TCRETURN:

  case AArch64::AUTH_TCRETURN_BTI:

    return true;

  }

}


unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) {

  switch (Opc) {

  default:

    llvm_unreachable("Opcode has no flag setting equivalent!");

  // 32-bit cases:

  case AArch64::ADDWri:

    return AArch64::ADDSWri;

  case AArch64::ADDWrr:

    return AArch64::ADDSWrr;

  case AArch64::ADDWrs:

    return AArch64::ADDSWrs;

  case AArch64::ADDWrx:

    return AArch64::ADDSWrx;

  case AArch64::ANDWri:

    return AArch64::ANDSWri;

  case AArch64::ANDWrr:

    return AArch64::ANDSWrr;

  case AArch64::ANDWrs:

    return AArch64::ANDSWrs;

  case AArch64::BICWrr:

    return AArch64::BICSWrr;

  case AArch64::BICWrs:

    return AArch64::BICSWrs;

  case AArch64::SUBWri:

    return AArch64::SUBSWri;

  case AArch64::SUBWrr:

    return AArch64::SUBSWrr;

  case AArch64::SUBWrs:

    return AArch64::SUBSWrs;

  case AArch64::SUBWrx:

    return AArch64::SUBSWrx;

  // 64-bit cases:

  case AArch64::ADDXri:

    return AArch64::ADDSXri;

  case AArch64::ADDXrr:

    return AArch64::ADDSXrr;

  case AArch64::ADDXrs:

    return AArch64::ADDSXrs;

  case AArch64::ADDXrx:

    return AArch64::ADDSXrx;

  case AArch64::ANDXri:

    return AArch64::ANDSXri;

  case AArch64::ANDXrr:

    return AArch64::ANDSXrr;

  case AArch64::ANDXrs:

    return AArch64::ANDSXrs;

  case AArch64::BICXrr:

    return AArch64::BICSXrr;

  case AArch64::BICXrs:

    return AArch64::BICSXrs;

  case AArch64::SUBXri:

    return AArch64::SUBSXri;

  case AArch64::SUBXrr:

    return AArch64::SUBSXrr;

  case AArch64::SUBXrs:

    return AArch64::SUBSXrs;

  case AArch64::SUBXrx:

    return AArch64::SUBSXrx;

  // SVE instructions:

  case AArch64::AND_PPzPP:

    return AArch64::ANDS_PPzPP;

  case AArch64::BIC_PPzPP:

    return AArch64::BICS_PPzPP;

  case AArch64::EOR_PPzPP:

    return AArch64::EORS_PPzPP;

  case AArch64::NAND_PPzPP:

    return AArch64::NANDS_PPzPP;

  case AArch64::NOR_PPzPP:

    return AArch64::NORS_PPzPP;

  case AArch64::ORN_PPzPP:

    return AArch64::ORNS_PPzPP;

  case AArch64::ORR_PPzPP:

    return AArch64::ORRS_PPzPP;

  case AArch64::BRKA_PPzP:

    return AArch64::BRKAS_PPzP;

  case AArch64::BRKPA_PPzPP:

    return AArch64::BRKPAS_PPzPP;

  case AArch64::BRKB_PPzP:

    return AArch64::BRKBS_PPzP;

  case AArch64::BRKPB_PPzPP:

    return AArch64::BRKPBS_PPzPP;

  case AArch64::BRKN_PPzP:

    return AArch64::BRKNS_PPzP;

  case AArch64::RDFFR_PPz:

    return AArch64::RDFFRS_PPz;

  case AArch64::PTRUE_B:

    return AArch64::PTRUES_B;

  }

}


// Is this a candidate for ld/st merging or pairing?  For example, we don't

// touch volatiles or load/stores that have a hint to avoid pair formation.


bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {


  bool IsPreLdSt = isPreLdSt(MI);


  // If this is a volatile load/store, don't mess with it.

  if (MI.hasOrderedMemoryRef())

    return false;


  // Make sure this is a reg/fi+imm (as opposed to an address reloc).

  // For Pre-inc LD/ST, the operand is shifted by one.

  assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||

          MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&

         "Expected a reg or frame index operand.");


  // For Pre-indexed addressing quadword instructions, the third operand is the

  // immediate value.

  bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();


  if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)

    return false;


  // Can't merge/pair if the instruction modifies the base register.

  // e.g., ldr x0, [x0]

  // This case will never occur with an FI base.

  // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or

  // STR<S,D,Q,W,X>pre, it can be merged.

  // For example:

  //   ldr q0, [x11, #32]!

  //   ldr q1, [x11, #16]

  //   to

  //   ldp q0, q1, [x11, #32]!

  if (MI.getOperand(1).isReg() && !IsPreLdSt) {

    Register BaseReg = MI.getOperand(1).getReg();

    const TargetRegisterInfo *TRI = &getRegisterInfo();

    if (MI.modifiesRegister(BaseReg, TRI))

      return false;

  }


  // Pairing SVE fills/spills is only valid for little-endian targets that

  // implement VLS 128.

  switch (MI.getOpcode()) {

  default:

    break;

  case AArch64::LDR_ZXI:

  case AArch64::STR_ZXI:

    if (!Subtarget.isLittleEndian() ||

        Subtarget.getSVEVectorSizeInBits() != 128)

      return false;

  }


  // Check if this load/store has a hint to avoid pair formation.

  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.

  if (isLdStPairSuppressed(MI))

    return false;


  // Do not pair any callee-save store/reload instructions in the

  // prologue/epilogue if the CFI information encoded the operations as separate

  // instructions, as that will cause the size of the actual prologue to mismatch

  // with the prologue size recorded in the Windows CFI.

  const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();

  bool NeedsWinCFI = MAI->usesWindowsCFI() &&

                     MI.getMF()->getFunction().needsUnwindTableEntry();

  if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||

                      MI.getFlag(MachineInstr::FrameDestroy)))

    return false;


  // On some CPUs quad load/store pairs are slower than two single load/stores.

  if (Subtarget.isPaired128Slow()) {

    switch (MI.getOpcode()) {

    default:

      break;

    case AArch64::LDURQi:

    case AArch64::STURQi:

    case AArch64::LDRQui:

    case AArch64::STRQui:

      return false;

    }

  }


  return true;

}


bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(

    const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,

    int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,

    const TargetRegisterInfo *TRI) const {

  if (!LdSt.mayLoadOrStore())

    return false;


  const MachineOperand *BaseOp;

  TypeSize WidthN(0, false);

  if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,

                                    WidthN, TRI))

    return false;

  // The maximum vscale is 16 under AArch64, return the maximal extent for the

  // vector.

  Width = LocationSize::precise(WidthN);

  BaseOps.push_back(BaseOp);

  return true;

}


std::optional<ExtAddrMode>


AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,

                                          const TargetRegisterInfo *TRI) const {

  const MachineOperand *Base; // Filled with the base operand of MI.

  int64_t Offset;             // Filled with the offset of MI.

  bool OffsetIsScalable;

  if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))

    return std::nullopt;


  if (!Base->isReg())

    return std::nullopt;

  ExtAddrMode AM;

  AM.BaseReg = Base->getReg();

  AM.Displacement = Offset;

  AM.ScaledReg = 0;

  AM.Scale = 0;

  return AM;

}


bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,

                                           Register Reg,

                                           const MachineInstr &AddrI,

                                           ExtAddrMode &AM) const {

  // Filter out instructions into which we cannot fold.

  unsigned NumBytes;

  int64_t OffsetScale = 1;

  switch (MemI.getOpcode()) {

  default:

    return false;


  case AArch64::LDURQi:

  case AArch64::STURQi:

    NumBytes = 16;

    break;


  case AArch64::LDURDi:

  case AArch64::STURDi:

  case AArch64::LDURXi:

  case AArch64::STURXi:

    NumBytes = 8;

    break;


  case AArch64::LDURWi:

  case AArch64::LDURSWi:

  case AArch64::STURWi:

    NumBytes = 4;

    break;


  case AArch64::LDURHi:

  case AArch64::STURHi:

  case AArch64::LDURHHi:

  case AArch64::STURHHi:

  case AArch64::LDURSHXi:

  case AArch64::LDURSHWi:

    NumBytes = 2;

    break;


  case AArch64::LDRBroX:

  case AArch64::LDRBBroX:

  case AArch64::LDRSBXroX:

  case AArch64::LDRSBWroX:

  case AArch64::STRBroX:

  case AArch64::STRBBroX:

  case AArch64::LDURBi:

  case AArch64::LDURBBi:

  case AArch64::LDURSBXi:

  case AArch64::LDURSBWi:

  case AArch64::STURBi:

  case AArch64::STURBBi:

  case AArch64::LDRBui:

  case AArch64::LDRBBui:

  case AArch64::LDRSBXui:

  case AArch64::LDRSBWui:

  case AArch64::STRBui:

  case AArch64::STRBBui:

    NumBytes = 1;

    break;


  case AArch64::LDRQroX:

  case AArch64::STRQroX:

  case AArch64::LDRQui:

  case AArch64::STRQui:

    NumBytes = 16;

    OffsetScale = 16;

    break;


  case AArch64::LDRDroX:

  case AArch64::STRDroX:

  case AArch64::LDRXroX:

  case AArch64::STRXroX:

  case AArch64::LDRDui:

  case AArch64::STRDui:

  case AArch64::LDRXui:

  case AArch64::STRXui:

    NumBytes = 8;

    OffsetScale = 8;

    break;


  case AArch64::LDRWroX:

  case AArch64::LDRSWroX:

  case AArch64::STRWroX:

  case AArch64::LDRWui:

  case AArch64::LDRSWui:

  case AArch64::STRWui:

    NumBytes = 4;

    OffsetScale = 4;

    break;


  case AArch64::LDRHroX:

  case AArch64::STRHroX:

  case AArch64::LDRHHroX:

  case AArch64::STRHHroX:

  case AArch64::LDRSHXroX:

  case AArch64::LDRSHWroX:

  case AArch64::LDRHui:

  case AArch64::STRHui:

  case AArch64::LDRHHui:

  case AArch64::STRHHui:

  case AArch64::LDRSHXui:

  case AArch64::LDRSHWui:

    NumBytes = 2;

    OffsetScale = 2;

    break;

  }


  // Check the fold operand is not the loaded/stored value.

  const MachineOperand &BaseRegOp = MemI.getOperand(0);

  if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)

    return false;


  // Handle memory instructions with a [Reg, Reg] addressing mode.

  if (MemI.getOperand(2).isReg()) {

    // Bail if the addressing mode already includes extension of the offset

    // register.

    if (MemI.getOperand(3).getImm())

      return false;


    // Check if we actually have a scaled offset.

    if (MemI.getOperand(4).getImm() == 0)

      OffsetScale = 1;


    // If the address instructions is folded into the base register, then the

    // addressing mode must not have a scale. Then we can swap the base and the

    // scaled registers.

    if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)

      return false;


    switch (AddrI.getOpcode()) {

    default:

      return false;


    case AArch64::SBFMXri:

      // sxtw Xa, Wm

      // ldr Xd, [Xn, Xa, lsl #N]

      // ->

      // ldr Xd, [Xn, Wm, sxtw #N]

      if (AddrI.getOperand(2).getImm() != 0 ||

          AddrI.getOperand(3).getImm() != 31)

        return false;


      AM.BaseReg = MemI.getOperand(1).getReg();

      if (AM.BaseReg == Reg)

        AM.BaseReg = MemI.getOperand(2).getReg();

      AM.ScaledReg = AddrI.getOperand(1).getReg();

      AM.Scale = OffsetScale;

      AM.Displacement = 0;

      AM.Form = ExtAddrMode::Formula::SExtScaledReg;

      return true;


    case TargetOpcode::SUBREG_TO_REG: {

      // mov Wa, Wm

      // ldr Xd, [Xn, Xa, lsl #N]

      // ->

      // ldr Xd, [Xn, Wm, uxtw #N]


      // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.

      if (AddrI.getOperand(1).getImm() != 0 ||

          AddrI.getOperand(3).getImm() != AArch64::sub_32)

        return false;


      const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();

      Register OffsetReg = AddrI.getOperand(2).getReg();

      if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))

        return false;


      const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);

      if (DefMI.getOpcode() != AArch64::ORRWrs ||

          DefMI.getOperand(1).getReg() != AArch64::WZR ||

          DefMI.getOperand(3).getImm() != 0)

        return false;


      AM.BaseReg = MemI.getOperand(1).getReg();

      if (AM.BaseReg == Reg)

        AM.BaseReg = MemI.getOperand(2).getReg();

      AM.ScaledReg = DefMI.getOperand(2).getReg();

      AM.Scale = OffsetScale;

      AM.Displacement = 0;

      AM.Form = ExtAddrMode::Formula::ZExtScaledReg;

      return true;

    }

    }

  }


  // Handle memory instructions with a [Reg, #Imm] addressing mode.


  // Check we are not breaking a potential conversion to an LDP.

  auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,

                                 int64_t NewOffset) -> bool {

    int64_t MinOffset, MaxOffset;

    switch (NumBytes) {

    default:

      return true;

    case 4:

      MinOffset = -256;

      MaxOffset = 252;

      break;

    case 8:

      MinOffset = -512;

      MaxOffset = 504;

      break;

    case 16:

      MinOffset = -1024;

      MaxOffset = 1008;

      break;

    }

    return OldOffset < MinOffset || OldOffset > MaxOffset ||

           (NewOffset >= MinOffset && NewOffset <= MaxOffset);

  };

  auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {

    int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;

    int64_t NewOffset = OldOffset + Disp;

    if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))

      return false;

    // If the old offset would fit into an LDP, but the new offset wouldn't,

    // bail out.

    if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))

      return false;

    AM.BaseReg = AddrI.getOperand(1).getReg();

    AM.ScaledReg = 0;

    AM.Scale = 0;

    AM.Displacement = NewOffset;

    AM.Form = ExtAddrMode::Formula::Basic;

    return true;

  };


  auto canFoldAddRegIntoAddrMode =

      [&](int64_t Scale,

          ExtAddrMode::Formula Form = ExtAddrMode::Formula::Basic) -> bool {

    if (MemI.getOperand(2).getImm() != 0)

      return false;

    if ((unsigned)Scale != Scale)

      return false;

    if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))

      return false;

    AM.BaseReg = AddrI.getOperand(1).getReg();

    AM.ScaledReg = AddrI.getOperand(2).getReg();

    AM.Scale = Scale;

    AM.Displacement = 0;

    AM.Form = Form;

    return true;

  };


  auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {

    unsigned Opcode = MemI.getOpcode();

    return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&

           Subtarget.isSTRQroSlow();

  };


  int64_t Disp = 0;

  const bool OptSize = MemI.getMF()->getFunction().hasOptSize();

  switch (AddrI.getOpcode()) {

  default:

    return false;


  case AArch64::ADDXri:

    // add Xa, Xn, #N

    // ldr Xd, [Xa, #M]

    // ->

    // ldr Xd, [Xn, #N'+M]

    Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();

    return canFoldAddSubImmIntoAddrMode(Disp);


  case AArch64::SUBXri:

    // sub Xa, Xn, #N

    // ldr Xd, [Xa, #M]

    // ->

    // ldr Xd, [Xn, #N'+M]

    Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();

    return canFoldAddSubImmIntoAddrMode(-Disp);


  case AArch64::ADDXrs: {

    // add Xa, Xn, Xm, lsl #N

    // ldr Xd, [Xa]

    // ->

    // ldr Xd, [Xn, Xm, lsl #N]


    // Don't fold the add if the result would be slower, unless optimising for

    // size.

    unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());

    if (AArch64_AM::getShiftType(Shift) != AArch64_AM::ShiftExtendType::LSL)

      return false;

    Shift = AArch64_AM::getShiftValue(Shift);

    if (!OptSize) {

      if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())

        return false;

      if (avoidSlowSTRQ(MemI))

        return false;

    }

    return canFoldAddRegIntoAddrMode(1ULL << Shift);

  }


  case AArch64::ADDXrr:

    // add Xa, Xn, Xm

    // ldr Xd, [Xa]

    // ->

    // ldr Xd, [Xn, Xm, lsl #0]


    // Don't fold the add if the result would be slower, unless optimising for

    // size.

    if (!OptSize && avoidSlowSTRQ(MemI))

      return false;

    return canFoldAddRegIntoAddrMode(1);


  case AArch64::ADDXrx:

    // add Xa, Xn, Wm, {s,u}xtw #N

    // ldr Xd, [Xa]

    // ->

    // ldr Xd, [Xn, Wm, {s,u}xtw #N]


    // Don't fold the add if the result would be slower, unless optimising for

    // size.

    if (!OptSize && avoidSlowSTRQ(MemI))

      return false;


    // Can fold only sign-/zero-extend of a word.

    unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());

    AArch64_AM::ShiftExtendType Extend = AArch64_AM::getArithExtendType(Imm);

    if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)

      return false;


    return canFoldAddRegIntoAddrMode(

        1ULL << AArch64_AM::getArithShiftValue(Imm),

        (Extend == AArch64_AM::SXTW) ? ExtAddrMode::Formula::SExtScaledReg

                                     : ExtAddrMode::Formula::ZExtScaledReg);

  }

}


// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,

// return the opcode of an instruction performing the same operation, but using

// the [Reg, Reg] addressing mode.


static unsigned regOffsetOpcode(unsigned Opcode) {

  switch (Opcode) {

  default:

    llvm_unreachable("Address folding not implemented for instruction");


  case AArch64::LDURQi:

  case AArch64::LDRQui:

    return AArch64::LDRQroX;

  case AArch64::STURQi:

  case AArch64::STRQui:

    return AArch64::STRQroX;

  case AArch64::LDURDi:

  case AArch64::LDRDui:

    return AArch64::LDRDroX;

  case AArch64::STURDi:

  case AArch64::STRDui:

    return AArch64::STRDroX;

  case AArch64::LDURXi:

  case AArch64::LDRXui:

    return AArch64::LDRXroX;

  case AArch64::STURXi:

  case AArch64::STRXui:

    return AArch64::STRXroX;

  case AArch64::LDURWi:

  case AArch64::LDRWui:

    return AArch64::LDRWroX;

  case AArch64::LDURSWi:

  case AArch64::LDRSWui:

    return AArch64::LDRSWroX;

  case AArch64::STURWi:

  case AArch64::STRWui:

    return AArch64::STRWroX;

  case AArch64::LDURHi:

  case AArch64::LDRHui:

    return AArch64::LDRHroX;

  case AArch64::STURHi:

  case AArch64::STRHui:

    return AArch64::STRHroX;

  case AArch64::LDURHHi:

  case AArch64::LDRHHui:

    return AArch64::LDRHHroX;

  case AArch64::STURHHi:

  case AArch64::STRHHui:

    return AArch64::STRHHroX;

  case AArch64::LDURSHXi:

  case AArch64::LDRSHXui:

    return AArch64::LDRSHXroX;

  case AArch64::LDURSHWi:

  case AArch64::LDRSHWui:

    return AArch64::LDRSHWroX;

  case AArch64::LDURBi:

  case AArch64::LDRBui:

    return AArch64::LDRBroX;

  case AArch64::LDURBBi:

  case AArch64::LDRBBui:

    return AArch64::LDRBBroX;

  case AArch64::LDURSBXi:

  case AArch64::LDRSBXui:

    return AArch64::LDRSBXroX;

  case AArch64::LDURSBWi:

  case AArch64::LDRSBWui:

    return AArch64::LDRSBWroX;

  case AArch64::STURBi:

  case AArch64::STRBui:

    return AArch64::STRBroX;

  case AArch64::STURBBi:

  case AArch64::STRBBui:

    return AArch64::STRBBroX;

  }

}


// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return

// the opcode of an instruction performing the same operation, but using the

// [Reg, #Imm] addressing mode with scaled offset.


unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {

  switch (Opcode) {

  default:

    llvm_unreachable("Address folding not implemented for instruction");


  case AArch64::LDURQi:

    Scale = 16;

    return AArch64::LDRQui;

  case AArch64::STURQi:

    Scale = 16;

    return AArch64::STRQui;

  case AArch64::LDURDi:

    Scale = 8;

    return AArch64::LDRDui;

  case AArch64::STURDi:

    Scale = 8;

    return AArch64::STRDui;

  case AArch64::LDURXi:

    Scale = 8;

    return AArch64::LDRXui;

  case AArch64::STURXi:

    Scale = 8;

    return AArch64::STRXui;

  case AArch64::LDURWi:

    Scale = 4;

    return AArch64::LDRWui;

  case AArch64::LDURSWi:

    Scale = 4;

    return AArch64::LDRSWui;

  case AArch64::STURWi:

    Scale = 4;

    return AArch64::STRWui;

  case AArch64::LDURHi:

    Scale = 2;

    return AArch64::LDRHui;

  case AArch64::STURHi:

    Scale = 2;

    return AArch64::STRHui;

  case AArch64::LDURHHi:

    Scale = 2;

    return AArch64::LDRHHui;

  case AArch64::STURHHi:

    Scale = 2;

    return AArch64::STRHHui;

  case AArch64::LDURSHXi:

    Scale = 2;

    return AArch64::LDRSHXui;

  case AArch64::LDURSHWi:

    Scale = 2;

    return AArch64::LDRSHWui;

  case AArch64::LDURBi:

    Scale = 1;

    return AArch64::LDRBui;

  case AArch64::LDURBBi:

    Scale = 1;

    return AArch64::LDRBBui;

  case AArch64::LDURSBXi:

    Scale = 1;

    return AArch64::LDRSBXui;

  case AArch64::LDURSBWi:

    Scale = 1;

    return AArch64::LDRSBWui;

  case AArch64::STURBi:

    Scale = 1;

    return AArch64::STRBui;

  case AArch64::STURBBi:

    Scale = 1;

    return AArch64::STRBBui;

  case AArch64::LDRQui:

  case AArch64::STRQui:

    Scale = 16;

    return Opcode;

  case AArch64::LDRDui:

  case AArch64::STRDui:

  case AArch64::LDRXui:

  case AArch64::STRXui:

    Scale = 8;

    return Opcode;

  case AArch64::LDRWui:

  case AArch64::LDRSWui:

  case AArch64::STRWui:

    Scale = 4;

    return Opcode;

  case AArch64::LDRHui:

  case AArch64::STRHui:

  case AArch64::LDRHHui:

  case AArch64::STRHHui:

  case AArch64::LDRSHXui:

  case AArch64::LDRSHWui:

    Scale = 2;

    return Opcode;

  case AArch64::LDRBui:

  case AArch64::LDRBBui:

  case AArch64::LDRSBXui:

  case AArch64::LDRSBWui:

  case AArch64::STRBui:

  case AArch64::STRBBui:

    Scale = 1;

    return Opcode;

  }

}


// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return

// the opcode of an instruction performing the same operation, but using the

// [Reg, #Imm] addressing mode with unscaled offset.


unsigned unscaledOffsetOpcode(unsigned Opcode) {

  switch (Opcode) {

  default:

    llvm_unreachable("Address folding not implemented for instruction");


  case AArch64::LDURQi:

  case AArch64::STURQi:

  case AArch64::LDURDi:

  case AArch64::STURDi:

  case AArch64::LDURXi:

  case AArch64::STURXi:

  case AArch64::LDURWi:

  case AArch64::LDURSWi:

  case AArch64::STURWi:

  case AArch64::LDURHi:

  case AArch64::STURHi:

  case AArch64::LDURHHi:

  case AArch64::STURHHi:

  case AArch64::LDURSHXi:

  case AArch64::LDURSHWi:

  case AArch64::LDURBi:

  case AArch64::STURBi:

  case AArch64::LDURBBi:

  case AArch64::STURBBi:

  case AArch64::LDURSBWi:

  case AArch64::LDURSBXi:

    return Opcode;

  case AArch64::LDRQui:

    return AArch64::LDURQi;

  case AArch64::STRQui:

    return AArch64::STURQi;

  case AArch64::LDRDui:

    return AArch64::LDURDi;

  case AArch64::STRDui:

    return AArch64::STURDi;

  case AArch64::LDRXui:

    return AArch64::LDURXi;

  case AArch64::STRXui:

    return AArch64::STURXi;

  case AArch64::LDRWui:

    return AArch64::LDURWi;

  case AArch64::LDRSWui:

    return AArch64::LDURSWi;

  case AArch64::STRWui:

    return AArch64::STURWi;

  case AArch64::LDRHui:

    return AArch64::LDURHi;

  case AArch64::STRHui:

    return AArch64::STURHi;

  case AArch64::LDRHHui:

    return AArch64::LDURHHi;

  case AArch64::STRHHui:

    return AArch64::STURHHi;

  case AArch64::LDRSHXui:

    return AArch64::LDURSHXi;

  case AArch64::LDRSHWui:

    return AArch64::LDURSHWi;

  case AArch64::LDRBBui:

    return AArch64::LDURBBi;

  case AArch64::LDRBui:

    return AArch64::LDURBi;

  case AArch64::STRBBui:

    return AArch64::STURBBi;

  case AArch64::STRBui:

    return AArch64::STURBi;

  case AArch64::LDRSBWui:

    return AArch64::LDURSBWi;

  case AArch64::LDRSBXui:

    return AArch64::LDURSBXi;

  }

}


// Given the opcode of a memory load/store instruction, return the opcode of an

// instruction performing the same operation, but using

// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the

// offset register.


static unsigned offsetExtendOpcode(unsigned Opcode) {

  switch (Opcode) {

  default:

    llvm_unreachable("Address folding not implemented for instruction");


  case AArch64::LDRQroX:

  case AArch64::LDURQi:

  case AArch64::LDRQui:

    return AArch64::LDRQroW;

  case AArch64::STRQroX:

  case AArch64::STURQi:

  case AArch64::STRQui:

    return AArch64::STRQroW;

  case AArch64::LDRDroX:

  case AArch64::LDURDi:

  case AArch64::LDRDui:

    return AArch64::LDRDroW;

  case AArch64::STRDroX:

  case AArch64::STURDi:

  case AArch64::STRDui:

    return AArch64::STRDroW;

  case AArch64::LDRXroX:

  case AArch64::LDURXi:

  case AArch64::LDRXui:

    return AArch64::LDRXroW;

  case AArch64::STRXroX:

  case AArch64::STURXi:

  case AArch64::STRXui:

    return AArch64::STRXroW;

  case AArch64::LDRWroX:

  case AArch64::LDURWi:

  case AArch64::LDRWui:

    return AArch64::LDRWroW;

  case AArch64::LDRSWroX:

  case AArch64::LDURSWi:

  case AArch64::LDRSWui:

    return AArch64::LDRSWroW;

  case AArch64::STRWroX:

  case AArch64::STURWi:

  case AArch64::STRWui:

    return AArch64::STRWroW;

  case AArch64::LDRHroX:

  case AArch64::LDURHi:

  case AArch64::LDRHui:

    return AArch64::LDRHroW;

  case AArch64::STRHroX:

  case AArch64::STURHi:

  case AArch64::STRHui:

    return AArch64::STRHroW;

  case AArch64::LDRHHroX:

  case AArch64::LDURHHi:

  case AArch64::LDRHHui:

    return AArch64::LDRHHroW;

  case AArch64::STRHHroX:

  case AArch64::STURHHi:

  case AArch64::STRHHui:

    return AArch64::STRHHroW;

  case AArch64::LDRSHXroX:

  case AArch64::LDURSHXi:

  case AArch64::LDRSHXui:

    return AArch64::LDRSHXroW;

  case AArch64::LDRSHWroX:

  case AArch64::LDURSHWi:

  case AArch64::LDRSHWui:

    return AArch64::LDRSHWroW;

  case AArch64::LDRBroX:

  case AArch64::LDURBi:

  case AArch64::LDRBui:

    return AArch64::LDRBroW;

  case AArch64::LDRBBroX:

  case AArch64::LDURBBi:

  case AArch64::LDRBBui:

    return AArch64::LDRBBroW;

  case AArch64::LDRSBXroX:

  case AArch64::LDURSBXi:

  case AArch64::LDRSBXui:

    return AArch64::LDRSBXroW;

  case AArch64::LDRSBWroX:

  case AArch64::LDURSBWi:

  case AArch64::LDRSBWui:

    return AArch64::LDRSBWroW;

  case AArch64::STRBroX:

  case AArch64::STURBi:

  case AArch64::STRBui:

    return AArch64::STRBroW;

  case AArch64::STRBBroX:

  case AArch64::STURBBi:

  case AArch64::STRBBui:

    return AArch64::STRBBroW;

  }

}


MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,

                                                 const ExtAddrMode &AM) const {


  const DebugLoc &DL = MemI.getDebugLoc();

  MachineBasicBlock &MBB = *MemI.getParent();

  MachineRegisterInfo &MRI = MemI.getMF()->getRegInfo();


  if (AM.Form == ExtAddrMode::Formula::Basic) {

    if (AM.ScaledReg) {

      // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.

      unsigned Opcode = regOffsetOpcode(MemI.getOpcode());

      MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);

      auto B = BuildMI(MBB, MemI, DL, get(Opcode))

                   .addReg(MemI.getOperand(0).getReg(),

                           MemI.mayLoad() ? RegState::Define : 0)

                   .addReg(AM.BaseReg)

                   .addReg(AM.ScaledReg)

                   .addImm(0)

                   .addImm(AM.Scale > 1)

                   .setMemRefs(MemI.memoperands())

                   .setMIFlags(MemI.getFlags());

      return B.getInstr();

    }


    assert(AM.ScaledReg == 0 && AM.Scale == 0 &&

           "Addressing mode not supported for folding");


    // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.

    unsigned Scale = 1;

    unsigned Opcode = MemI.getOpcode();

    if (isInt<9>(AM.Displacement))

      Opcode = unscaledOffsetOpcode(Opcode);

    else

      Opcode = scaledOffsetOpcode(Opcode, Scale);


    auto B = BuildMI(MBB, MemI, DL, get(Opcode))

                 .addReg(MemI.getOperand(0).getReg(),

                         MemI.mayLoad() ? RegState::Define : 0)

                 .addReg(AM.BaseReg)

                 .addImm(AM.Displacement / Scale)

                 .setMemRefs(MemI.memoperands())

                 .setMIFlags(MemI.getFlags());

    return B.getInstr();

  }


  if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||

      AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {

    // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.

    assert(AM.ScaledReg && !AM.Displacement &&

           "Address offset can be a register or an immediate, but not both");

    unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());

    MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);

    // Make sure the offset register is in the correct register class.

    Register OffsetReg = AM.ScaledReg;

    const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);

    if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {

      OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);

      BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)

          .addReg(AM.ScaledReg, 0, AArch64::sub_32);

    }

    auto B = BuildMI(MBB, MemI, DL, get(Opcode))

                 .addReg(MemI.getOperand(0).getReg(),

                         MemI.mayLoad() ? RegState::Define : 0)

                 .addReg(AM.BaseReg)

                 .addReg(OffsetReg)

                 .addImm(AM.Form == ExtAddrMode::Formula::SExtScaledReg)

                 .addImm(AM.Scale != 1)

                 .setMemRefs(MemI.memoperands())

                 .setMIFlags(MemI.getFlags());


    return B.getInstr();

  }


  llvm_unreachable(

      "Function must not be called with an addressing mode it can't handle");

}


/// Return true if the opcode is a post-index ld/st instruction, which really

/// loads from base+0.


static bool isPostIndexLdStOpcode(unsigned Opcode) {

  switch (Opcode) {

  default:

    return false;

  case AArch64::LD1Fourv16b_POST:

  case AArch64::LD1Fourv1d_POST:

  case AArch64::LD1Fourv2d_POST:

  case AArch64::LD1Fourv2s_POST:

  case AArch64::LD1Fourv4h_POST:

  case AArch64::LD1Fourv4s_POST:

  case AArch64::LD1Fourv8b_POST:

  case AArch64::LD1Fourv8h_POST:

  case AArch64::LD1Onev16b_POST:

  case AArch64::LD1Onev1d_POST:

  case AArch64::LD1Onev2d_POST:

  case AArch64::LD1Onev2s_POST:

  case AArch64::LD1Onev4h_POST:

  case AArch64::LD1Onev4s_POST:

  case AArch64::LD1Onev8b_POST:

  case AArch64::LD1Onev8h_POST:

  case AArch64::LD1Rv16b_POST:

  case AArch64::LD1Rv1d_POST:

  case AArch64::LD1Rv2d_POST:

  case AArch64::LD1Rv2s_POST:

  case AArch64::LD1Rv4h_POST:

  case AArch64::LD1Rv4s_POST:

  case AArch64::LD1Rv8b_POST:

  case AArch64::LD1Rv8h_POST:

  case AArch64::LD1Threev16b_POST:

  case AArch64::LD1Threev1d_POST:

  case AArch64::LD1Threev2d_POST:

  case AArch64::LD1Threev2s_POST:

  case AArch64::LD1Threev4h_POST:

  case AArch64::LD1Threev4s_POST:

  case AArch64::LD1Threev8b_POST:

  case AArch64::LD1Threev8h_POST:

  case AArch64::LD1Twov16b_POST:

  case AArch64::LD1Twov1d_POST:

  case AArch64::LD1Twov2d_POST:

  case AArch64::LD1Twov2s_POST:

  case AArch64::LD1Twov4h_POST:

  case AArch64::LD1Twov4s_POST:

  case AArch64::LD1Twov8b_POST:

  case AArch64::LD1Twov8h_POST:

  case AArch64::LD1i16_POST:

  case AArch64::LD1i32_POST:

  case AArch64::LD1i64_POST:

  case AArch64::LD1i8_POST:

  case AArch64::LD2Rv16b_POST:

  case AArch64::LD2Rv1d_POST:

  case AArch64::LD2Rv2d_POST:

  case AArch64::LD2Rv2s_POST:

  case AArch64::LD2Rv4h_POST:

  case AArch64::LD2Rv4s_POST:

  case AArch64::LD2Rv8b_POST:

  case AArch64::LD2Rv8h_POST:

  case AArch64::LD2Twov16b_POST:

  case AArch64::LD2Twov2d_POST:

  case AArch64::LD2Twov2s_POST:

  case AArch64::LD2Twov4h_POST:

  case AArch64::LD2Twov4s_POST:

  case AArch64::LD2Twov8b_POST:

  case AArch64::LD2Twov8h_POST:

  case AArch64::LD2i16_POST:

  case AArch64::LD2i32_POST:

  case AArch64::LD2i64_POST:

  case AArch64::LD2i8_POST:

  case AArch64::LD3Rv16b_POST:

  case AArch64::LD3Rv1d_POST:

  case AArch64::LD3Rv2d_POST:

  case AArch64::LD3Rv2s_POST:

  case AArch64::LD3Rv4h_POST:

  case AArch64::LD3Rv4s_POST:

  case AArch64::LD3Rv8b_POST:

  case AArch64::LD3Rv8h_POST:

  case AArch64::LD3Threev16b_POST:

  case AArch64::LD3Threev2d_POST:

  case AArch64::LD3Threev2s_POST:

  case AArch64::LD3Threev4h_POST:

  case AArch64::LD3Threev4s_POST:

  case AArch64::LD3Threev8b_POST:

  case AArch64::LD3Threev8h_POST:

  case AArch64::LD3i16_POST:

  case AArch64::LD3i32_POST:

  case AArch64::LD3i64_POST:

  case AArch64::LD3i8_POST:

  case AArch64::LD4Fourv16b_POST:

  case AArch64::LD4Fourv2d_POST:

  case AArch64::LD4Fourv2s_POST:

  case AArch64::LD4Fourv4h_POST:

  case AArch64::LD4Fourv4s_POST:

  case AArch64::LD4Fourv8b_POST:

  case AArch64::LD4Fourv8h_POST:

  case AArch64::LD4Rv16b_POST:

  case AArch64::LD4Rv1d_POST:

  case AArch64::LD4Rv2d_POST:

  case AArch64::LD4Rv2s_POST:

  case AArch64::LD4Rv4h_POST:

  case AArch64::LD4Rv4s_POST:

  case AArch64::LD4Rv8b_POST:

  case AArch64::LD4Rv8h_POST:

  case AArch64::LD4i16_POST:

  case AArch64::LD4i32_POST:

  case AArch64::LD4i64_POST:

  case AArch64::LD4i8_POST:

  case AArch64::LDAPRWpost:

  case AArch64::LDAPRXpost:

  case AArch64::LDIAPPWpost:

  case AArch64::LDIAPPXpost:

  case AArch64::LDPDpost:

  case AArch64::LDPQpost:

  case AArch64::LDPSWpost:

  case AArch64::LDPSpost:

  case AArch64::LDPWpost:

  case AArch64::LDPXpost:

  case AArch64::LDRBBpost:

  case AArch64::LDRBpost:

  case AArch64::LDRDpost:

  case AArch64::LDRHHpost:

  case AArch64::LDRHpost:

  case AArch64::LDRQpost:

  case AArch64::LDRSBWpost:

  case AArch64::LDRSBXpost:

  case AArch64::LDRSHWpost:

  case AArch64::LDRSHXpost:

  case AArch64::LDRSWpost:

  case AArch64::LDRSpost:

  case AArch64::LDRWpost:

  case AArch64::LDRXpost:

  case AArch64::ST1Fourv16b_POST:

  case AArch64::ST1Fourv1d_POST:

  case AArch64::ST1Fourv2d_POST:

  case AArch64::ST1Fourv2s_POST:

  case AArch64::ST1Fourv4h_POST:

  case AArch64::ST1Fourv4s_POST:

  case AArch64::ST1Fourv8b_POST:

  case AArch64::ST1Fourv8h_POST:

  case AArch64::ST1Onev16b_POST:

  case AArch64::ST1Onev1d_POST:

  case AArch64::ST1Onev2d_POST:

  case AArch64::ST1Onev2s_POST:

  case AArch64::ST1Onev4h_POST:

  case AArch64::ST1Onev4s_POST:

  case AArch64::ST1Onev8b_POST:

  case AArch64::ST1Onev8h_POST:

  case AArch64::ST1Threev16b_POST:

  case AArch64::ST1Threev1d_POST:

  case AArch64::ST1Threev2d_POST:

  case AArch64::ST1Threev2s_POST:

  case AArch64::ST1Threev4h_POST:

  case AArch64::ST1Threev4s_POST:

  case AArch64::ST1Threev8b_POST:

  case AArch64::ST1Threev8h_POST:

  case AArch64::ST1Twov16b_POST:

  case AArch64::ST1Twov1d_POST:

  case AArch64::ST1Twov2d_POST:

  case AArch64::ST1Twov2s_POST:

  case AArch64::ST1Twov4h_POST:

  case AArch64::ST1Twov4s_POST:

  case AArch64::ST1Twov8b_POST:

  case AArch64::ST1Twov8h_POST:

  case AArch64::ST1i16_POST:

  case AArch64::ST1i32_POST:

  case AArch64::ST1i64_POST:

  case AArch64::ST1i8_POST:

  case AArch64::ST2GPostIndex:

  case AArch64::ST2Twov16b_POST:

  case AArch64::ST2Twov2d_POST:

  case AArch64::ST2Twov2s_POST:

  case AArch64::ST2Twov4h_POST:

  case AArch64::ST2Twov4s_POST:

  case AArch64::ST2Twov8b_POST:

  case AArch64::ST2Twov8h_POST:

  case AArch64::ST2i16_POST:

  case AArch64::ST2i32_POST:

  case AArch64::ST2i64_POST:

  case AArch64::ST2i8_POST:

  case AArch64::ST3Threev16b_POST:

  case AArch64::ST3Threev2d_POST:

  case AArch64::ST3Threev2s_POST:

  case AArch64::ST3Threev4h_POST:

  case AArch64::ST3Threev4s_POST:

  case AArch64::ST3Threev8b_POST:

  case AArch64::ST3Threev8h_POST:

  case AArch64::ST3i16_POST:

  case AArch64::ST3i32_POST:

  case AArch64::ST3i64_POST:

  case AArch64::ST3i8_POST:

  case AArch64::ST4Fourv16b_POST:

  case AArch64::ST4Fourv2d_POST:

  case AArch64::ST4Fourv2s_POST:

  case AArch64::ST4Fourv4h_POST:

  case AArch64::ST4Fourv4s_POST:

  case AArch64::ST4Fourv8b_POST:

  case AArch64::ST4Fourv8h_POST:

  case AArch64::ST4i16_POST:

  case AArch64::ST4i32_POST:

  case AArch64::ST4i64_POST:

  case AArch64::ST4i8_POST:

  case AArch64::STGPostIndex:

  case AArch64::STGPpost:

  case AArch64::STPDpost:

  case AArch64::STPQpost:

  case AArch64::STPSpost:

  case AArch64::STPWpost:

  case AArch64::STPXpost:

  case AArch64::STRBBpost:

  case AArch64::STRBpost:

  case AArch64::STRDpost:

  case AArch64::STRHHpost:

  case AArch64::STRHpost:

  case AArch64::STRQpost:

  case AArch64::STRSpost:

  case AArch64::STRWpost:

  case AArch64::STRXpost:

  case AArch64::STZ2GPostIndex:

  case AArch64::STZGPostIndex:

    return true;

  }

}


bool AArch64InstrInfo::getMemOperandWithOffsetWidth(

    const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,

    bool &OffsetIsScalable, TypeSize &Width,

    const TargetRegisterInfo *TRI) const {

  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");

  // Handle only loads/stores with base register followed by immediate offset.

  if (LdSt.getNumExplicitOperands() == 3) {

    // Non-paired instruction (e.g., ldr x1, [x0, #8]).

    if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||

        !LdSt.getOperand(2).isImm())

      return false;

  } else if (LdSt.getNumExplicitOperands() == 4) {

    // Paired instruction (e.g., ldp x1, x2, [x0, #8]).

    if (!LdSt.getOperand(1).isReg() ||

        (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||

        !LdSt.getOperand(3).isImm())

      return false;

  } else

    return false;


  // Get the scaling factor for the instruction and set the width for the

  // instruction.

  TypeSize Scale(0U, false);

  int64_t Dummy1, Dummy2;


  // If this returns false, then it's an instruction we don't want to handle.

  if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))

    return false;


  // Compute the offset. Offset is calculated as the immediate operand

  // multiplied by the scaling factor. Unscaled instructions have scaling factor

  // set to 1. Postindex are a special case which have an offset of 0.

  if (isPostIndexLdStOpcode(LdSt.getOpcode())) {

    BaseOp = &LdSt.getOperand(2);

    Offset = 0;

  } else if (LdSt.getNumExplicitOperands() == 3) {

    BaseOp = &LdSt.getOperand(1);

    Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();

  } else {

    assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");

    BaseOp = &LdSt.getOperand(2);

    Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();

  }

  OffsetIsScalable = Scale.isScalable();


  return BaseOp->isReg() || BaseOp->isFI();

}


MachineOperand &


AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {

  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");

  MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);

  assert(OfsOp.isImm() && "Offset operand wasn't immediate.");

  return OfsOp;

}


bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,

                                    TypeSize &Width, int64_t &MinOffset,

                                    int64_t &MaxOffset) {

  switch (Opcode) {

  // Not a memory operation or something we want to handle.

  default:

    Scale = TypeSize::getFixed(0);

    Width = TypeSize::getFixed(0);

    MinOffset = MaxOffset = 0;

    return false;

  // LDR / STR

  case AArch64::LDRQui:

  case AArch64::STRQui:

    Scale = TypeSize::getFixed(16);

    Width = TypeSize::getFixed(16);

    MinOffset = 0;

    MaxOffset = 4095;

    break;

  case AArch64::LDRXui:

  case AArch64::LDRDui:

  case AArch64::STRXui:

  case AArch64::STRDui:

  case AArch64::PRFMui:

    Scale = TypeSize::getFixed(8);

    Width = TypeSize::getFixed(8);

    MinOffset = 0;

    MaxOffset = 4095;

    break;

  case AArch64::LDRWui:

  case AArch64::LDRSui:

  case AArch64::LDRSWui:

  case AArch64::STRWui:

  case AArch64::STRSui:

    Scale = TypeSize::getFixed(4);

    Width = TypeSize::getFixed(4);

    MinOffset = 0;

    MaxOffset = 4095;

    break;

  case AArch64::LDRHui:

  case AArch64::LDRHHui:

  case AArch64::LDRSHWui:

  case AArch64::LDRSHXui:

  case AArch64::STRHui:

  case AArch64::STRHHui:

    Scale = TypeSize::getFixed(2);

    Width = TypeSize::getFixed(2);

    MinOffset = 0;

    MaxOffset = 4095;

    break;

  case AArch64::LDRBui:

  case AArch64::LDRBBui:

  case AArch64::LDRSBWui:

  case AArch64::LDRSBXui:

  case AArch64::STRBui:

  case AArch64::STRBBui:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(1);

    MinOffset = 0;

    MaxOffset = 4095;

    break;

  // post/pre inc

  case AArch64::STRQpre:

  case AArch64::LDRQpost:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(16);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::LDRDpost:

  case AArch64::LDRDpre:

  case AArch64::LDRXpost:

  case AArch64::LDRXpre:

  case AArch64::STRDpost:

  case AArch64::STRDpre:

  case AArch64::STRXpost:

  case AArch64::STRXpre:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(8);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::STRWpost:

  case AArch64::STRWpre:

  case AArch64::LDRWpost:

  case AArch64::LDRWpre:

  case AArch64::STRSpost:

  case AArch64::STRSpre:

  case AArch64::LDRSpost:

  case AArch64::LDRSpre:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(4);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::LDRHpost:

  case AArch64::LDRHpre:

  case AArch64::STRHpost:

  case AArch64::STRHpre:

  case AArch64::LDRHHpost:

  case AArch64::LDRHHpre:

  case AArch64::STRHHpost:

  case AArch64::STRHHpre:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(2);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::LDRBpost:

  case AArch64::LDRBpre:

  case AArch64::STRBpost:

  case AArch64::STRBpre:

  case AArch64::LDRBBpost:

  case AArch64::LDRBBpre:

  case AArch64::STRBBpost:

  case AArch64::STRBBpre:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(1);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  // Unscaled

  case AArch64::LDURQi:

  case AArch64::STURQi:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(16);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::LDURXi:

  case AArch64::LDURDi:

  case AArch64::LDAPURXi:

  case AArch64::STURXi:

  case AArch64::STURDi:

  case AArch64::STLURXi:

  case AArch64::PRFUMi:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(8);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::LDURWi:

  case AArch64::LDURSi:

  case AArch64::LDURSWi:

  case AArch64::LDAPURi:

  case AArch64::LDAPURSWi:

  case AArch64::STURWi:

  case AArch64::STURSi:

  case AArch64::STLURWi:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(4);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::LDURHi:

  case AArch64::LDURHHi:

  case AArch64::LDURSHXi:

  case AArch64::LDURSHWi:

  case AArch64::LDAPURHi:

  case AArch64::LDAPURSHWi:

  case AArch64::LDAPURSHXi:

  case AArch64::STURHi:

  case AArch64::STURHHi:

  case AArch64::STLURHi:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(2);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::LDURBi:

  case AArch64::LDURBBi:

  case AArch64::LDURSBXi:

  case AArch64::LDURSBWi:

  case AArch64::LDAPURBi:

  case AArch64::LDAPURSBWi:

  case AArch64::LDAPURSBXi:

  case AArch64::STURBi:

  case AArch64::STURBBi:

  case AArch64::STLURBi:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(1);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  // LDP / STP (including pre/post inc)

  case AArch64::LDPQi:

  case AArch64::LDNPQi:

  case AArch64::STPQi:

  case AArch64::STNPQi:

  case AArch64::LDPQpost:

  case AArch64::LDPQpre:

  case AArch64::STPQpost:

  case AArch64::STPQpre:

    Scale = TypeSize::getFixed(16);

    Width = TypeSize::getFixed(16 * 2);

    MinOffset = -64;

    MaxOffset = 63;

    break;

  case AArch64::LDPXi:

  case AArch64::LDPDi:

  case AArch64::LDNPXi:

  case AArch64::LDNPDi:

  case AArch64::STPXi:

  case AArch64::STPDi:

  case AArch64::STNPXi:

  case AArch64::STNPDi:

  case AArch64::LDPDpost:

  case AArch64::LDPDpre:

  case AArch64::LDPXpost:

  case AArch64::LDPXpre:

  case AArch64::STPDpost:

  case AArch64::STPDpre:

  case AArch64::STPXpost:

  case AArch64::STPXpre:

    Scale = TypeSize::getFixed(8);

    Width = TypeSize::getFixed(8 * 2);

    MinOffset = -64;

    MaxOffset = 63;

    break;

  case AArch64::LDPWi:

  case AArch64::LDPSi:

  case AArch64::LDNPWi:

  case AArch64::LDNPSi:

  case AArch64::STPWi:

  case AArch64::STPSi:

  case AArch64::STNPWi:

  case AArch64::STNPSi:

  case AArch64::LDPSpost:

  case AArch64::LDPSpre:

  case AArch64::LDPWpost:

  case AArch64::LDPWpre:

  case AArch64::STPSpost:

  case AArch64::STPSpre:

  case AArch64::STPWpost:

  case AArch64::STPWpre:

    Scale = TypeSize::getFixed(4);

    Width = TypeSize::getFixed(4 * 2);

    MinOffset = -64;

    MaxOffset = 63;

    break;

  case AArch64::StoreSwiftAsyncContext:

    // Store is an STRXui, but there might be an ADDXri in the expansion too.

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(8);

    MinOffset = 0;

    MaxOffset = 4095;

    break;

  case AArch64::ADDG:

    Scale = TypeSize::getFixed(16);

    Width = TypeSize::getFixed(0);

    MinOffset = 0;

    MaxOffset = 63;

    break;

  case AArch64::TAGPstack:

    Scale = TypeSize::getFixed(16);

    Width = TypeSize::getFixed(0);

    // TAGP with a negative offset turns into SUBP, which has a maximum offset

    // of 63 (not 64!).

    MinOffset = -63;

    MaxOffset = 63;

    break;

  case AArch64::LDG:

  case AArch64::STGi:

  case AArch64::STGPreIndex:

  case AArch64::STGPostIndex:

  case AArch64::STZGi:

  case AArch64::STZGPreIndex:

  case AArch64::STZGPostIndex:

    Scale = TypeSize::getFixed(16);

    Width = TypeSize::getFixed(16);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  // SVE

  case AArch64::STR_ZZZZXI:

  case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:

  case AArch64::LDR_ZZZZXI:

  case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:

    Scale = TypeSize::getScalable(16);

    Width = TypeSize::getScalable(16 * 4);

    MinOffset = -256;

    MaxOffset = 252;

    break;

  case AArch64::STR_ZZZXI:

  case AArch64::LDR_ZZZXI:

    Scale = TypeSize::getScalable(16);

    Width = TypeSize::getScalable(16 * 3);

    MinOffset = -256;

    MaxOffset = 253;

    break;

  case AArch64::STR_ZZXI:

  case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:

  case AArch64::LDR_ZZXI:

  case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:

    Scale = TypeSize::getScalable(16);

    Width = TypeSize::getScalable(16 * 2);

    MinOffset = -256;

    MaxOffset = 254;

    break;

  case AArch64::LDR_PXI:

  case AArch64::STR_PXI:

    Scale = TypeSize::getScalable(2);

    Width = TypeSize::getScalable(2);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::LDR_PPXI:

  case AArch64::STR_PPXI:

    Scale = TypeSize::getScalable(2);

    Width = TypeSize::getScalable(2 * 2);

    MinOffset = -256;

    MaxOffset = 254;

    break;

  case AArch64::LDR_ZXI:

  case AArch64::STR_ZXI:

    Scale = TypeSize::getScalable(16);

    Width = TypeSize::getScalable(16);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::LD1B_IMM:

  case AArch64::LD1H_IMM:

  case AArch64::LD1W_IMM:

  case AArch64::LD1D_IMM:

  case AArch64::LDNT1B_ZRI:

  case AArch64::LDNT1H_ZRI:

  case AArch64::LDNT1W_ZRI:

  case AArch64::LDNT1D_ZRI:

  case AArch64::ST1B_IMM:

  case AArch64::ST1H_IMM:

  case AArch64::ST1W_IMM:

  case AArch64::ST1D_IMM:

  case AArch64::STNT1B_ZRI:

  case AArch64::STNT1H_ZRI:

  case AArch64::STNT1W_ZRI:

  case AArch64::STNT1D_ZRI:

  case AArch64::LDNF1B_IMM:

  case AArch64::LDNF1H_IMM:

  case AArch64::LDNF1W_IMM:

  case AArch64::LDNF1D_IMM:

    // A full vectors worth of data

    // Width = mbytes * elements

    Scale = TypeSize::getScalable(16);

    Width = TypeSize::getScalable(16);

    MinOffset = -8;

    MaxOffset = 7;

    break;

  case AArch64::LD2B_IMM:

  case AArch64::LD2H_IMM:

  case AArch64::LD2W_IMM:

  case AArch64::LD2D_IMM:

  case AArch64::ST2B_IMM:

  case AArch64::ST2H_IMM:

  case AArch64::ST2W_IMM:

  case AArch64::ST2D_IMM:

    Scale = TypeSize::getScalable(32);

    Width = TypeSize::getScalable(16 * 2);

    MinOffset = -8;

    MaxOffset = 7;

    break;

  case AArch64::LD3B_IMM:

  case AArch64::LD3H_IMM:

  case AArch64::LD3W_IMM:

  case AArch64::LD3D_IMM:

  case AArch64::ST3B_IMM:

  case AArch64::ST3H_IMM:

  case AArch64::ST3W_IMM:

  case AArch64::ST3D_IMM:

    Scale = TypeSize::getScalable(48);

    Width = TypeSize::getScalable(16 * 3);

    MinOffset = -8;

    MaxOffset = 7;

    break;

  case AArch64::LD4B_IMM:

  case AArch64::LD4H_IMM:

  case AArch64::LD4W_IMM:

  case AArch64::LD4D_IMM:

  case AArch64::ST4B_IMM:

  case AArch64::ST4H_IMM:

  case AArch64::ST4W_IMM:

  case AArch64::ST4D_IMM:

    Scale = TypeSize::getScalable(64);

    Width = TypeSize::getScalable(16 * 4);

    MinOffset = -8;

    MaxOffset = 7;

    break;

  case AArch64::LD1B_H_IMM:

  case AArch64::LD1SB_H_IMM:

  case AArch64::LD1H_S_IMM:

  case AArch64::LD1SH_S_IMM:

  case AArch64::LD1W_D_IMM:

  case AArch64::LD1SW_D_IMM:

  case AArch64::ST1B_H_IMM:

  case AArch64::ST1H_S_IMM:

  case AArch64::ST1W_D_IMM:

  case AArch64::LDNF1B_H_IMM:

  case AArch64::LDNF1SB_H_IMM:

  case AArch64::LDNF1H_S_IMM:

  case AArch64::LDNF1SH_S_IMM:

  case AArch64::LDNF1W_D_IMM:

  case AArch64::LDNF1SW_D_IMM:

    // A half vector worth of data

    // Width = mbytes * elements

    Scale = TypeSize::getScalable(8);

    Width = TypeSize::getScalable(8);

    MinOffset = -8;

    MaxOffset = 7;

    break;

  case AArch64::LD1B_S_IMM:

  case AArch64::LD1SB_S_IMM:

  case AArch64::LD1H_D_IMM:

  case AArch64::LD1SH_D_IMM:

  case AArch64::ST1B_S_IMM:

  case AArch64::ST1H_D_IMM:

  case AArch64::LDNF1B_S_IMM:

  case AArch64::LDNF1SB_S_IMM:

  case AArch64::LDNF1H_D_IMM:

  case AArch64::LDNF1SH_D_IMM:

    // A quarter vector worth of data

    // Width = mbytes * elements

    Scale = TypeSize::getScalable(4);

    Width = TypeSize::getScalable(4);

    MinOffset = -8;

    MaxOffset = 7;

    break;

  case AArch64::LD1B_D_IMM:

  case AArch64::LD1SB_D_IMM:

  case AArch64::ST1B_D_IMM:

  case AArch64::LDNF1B_D_IMM:

  case AArch64::LDNF1SB_D_IMM:

    // A eighth vector worth of data

    // Width = mbytes * elements

    Scale = TypeSize::getScalable(2);

    Width = TypeSize::getScalable(2);

    MinOffset = -8;

    MaxOffset = 7;

    break;

  case AArch64::ST2Gi:

  case AArch64::ST2GPreIndex:

  case AArch64::ST2GPostIndex:

  case AArch64::STZ2Gi:

  case AArch64::STZ2GPreIndex:

  case AArch64::STZ2GPostIndex:

    Scale = TypeSize::getFixed(16);

    Width = TypeSize::getFixed(32);

    MinOffset = -256;

    MaxOffset = 255;

    break;

  case AArch64::STGPi:

  case AArch64::STGPpost:

  case AArch64::STGPpre:

    Scale = TypeSize::getFixed(16);

    Width = TypeSize::getFixed(16);

    MinOffset = -64;

    MaxOffset = 63;

    break;

  case AArch64::LD1RB_IMM:

  case AArch64::LD1RB_H_IMM:

  case AArch64::LD1RB_S_IMM:

  case AArch64::LD1RB_D_IMM:

  case AArch64::LD1RSB_H_IMM:

  case AArch64::LD1RSB_S_IMM:

  case AArch64::LD1RSB_D_IMM:

    Scale = TypeSize::getFixed(1);

    Width = TypeSize::getFixed(1);

    MinOffset = 0;

    MaxOffset = 63;

    break;

  case AArch64::LD1RH_IMM:

  case AArch64::LD1RH_S_IMM:

  case AArch64::LD1RH_D_IMM:

  case AArch64::LD1RSH_S_IMM:

  case AArch64::LD1RSH_D_IMM:

    Scale = TypeSize::getFixed(2);

    Width = TypeSize::getFixed(2);

    MinOffset = 0;

    MaxOffset = 63;

    break;

  case AArch64::LD1RW_IMM:

  case AArch64::LD1RW_D_IMM:

  case AArch64::LD1RSW_IMM:

    Scale = TypeSize::getFixed(4);

    Width = TypeSize::getFixed(4);

    MinOffset = 0;

    MaxOffset = 63;

    break;

  case AArch64::LD1RD_IMM:

    Scale = TypeSize::getFixed(8);

    Width = TypeSize::getFixed(8);

    MinOffset = 0;

    MaxOffset = 63;

    break;

  }


  return true;

}


// Scaling factor for unscaled load or store.


int AArch64InstrInfo::getMemScale(unsigned Opc) {

  switch (Opc) {

  default:

    llvm_unreachable("Opcode has unknown scale!");

  case AArch64::LDRBBui:

  case AArch64::LDURBBi:

  case AArch64::LDRSBWui:

  case AArch64::LDURSBWi:

  case AArch64::STRBBui:

  case AArch64::STURBBi:

    return 1;

  case AArch64::LDRHHui:

  case AArch64::LDURHHi:

  case AArch64::LDRSHWui:

  case AArch64::LDURSHWi:

  case AArch64::STRHHui:

  case AArch64::STURHHi:

    return 2;

  case AArch64::LDRSui:

  case AArch64::LDURSi:

  case AArch64::LDRSpre:

  case AArch64::LDRSWui:

  case AArch64::LDURSWi:

  case AArch64::LDRSWpre:

  case AArch64::LDRWpre:

  case AArch64::LDRWui:

  case AArch64::LDURWi:

  case AArch64::STRSui:

  case AArch64::STURSi:

  case AArch64::STRSpre:

  case AArch64::STRWui:

  case AArch64::STURWi:

  case AArch64::STRWpre:

  case AArch64::LDPSi:

  case AArch64::LDPSWi:

  case AArch64::LDPWi:

  case AArch64::STPSi:

  case AArch64::STPWi:

    return 4;

  case AArch64::LDRDui:

  case AArch64::LDURDi:

  case AArch64::LDRDpre:

  case AArch64::LDRXui:

  case AArch64::LDURXi:

  case AArch64::LDRXpre:

  case AArch64::STRDui:

  case AArch64::STURDi:

  case AArch64::STRDpre:

  case AArch64::STRXui:

  case AArch64::STURXi:

  case AArch64::STRXpre:

  case AArch64::LDPDi:

  case AArch64::LDPXi:

  case AArch64::STPDi:

  case AArch64::STPXi:

    return 8;

  case AArch64::LDRQui:

  case AArch64::LDURQi:

  case AArch64::STRQui:

  case AArch64::STURQi:

  case AArch64::STRQpre:

  case AArch64::LDPQi:

  case AArch64::LDRQpre:

  case AArch64::STPQi:

  case AArch64::STGi:

  case AArch64::STZGi:

  case AArch64::ST2Gi:

  case AArch64::STZ2Gi:

  case AArch64::STGPi:

    return 16;

  }

}


bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    return false;

  case AArch64::LDRWpre:

  case AArch64::LDRXpre:

  case AArch64::LDRSWpre:

  case AArch64::LDRSpre:

  case AArch64::LDRDpre:

  case AArch64::LDRQpre:

    return true;

  }

}


bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    return false;

  case AArch64::STRWpre:

  case AArch64::STRXpre:

  case AArch64::STRSpre:

  case AArch64::STRDpre:

  case AArch64::STRQpre:

    return true;

  }

}


bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {

  return isPreLd(MI) || isPreSt(MI);

}


bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    return false;

  case AArch64::LDPSi:

  case AArch64::LDPSWi:

  case AArch64::LDPDi:

  case AArch64::LDPQi:

  case AArch64::LDPWi:

  case AArch64::LDPXi:

  case AArch64::STPSi:

  case AArch64::STPDi:

  case AArch64::STPQi:

  case AArch64::STPWi:

  case AArch64::STPXi:

  case AArch64::STGPi:

    return true;

  }

}


const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {

  assert(MI.mayLoadOrStore() && "Load or store instruction expected");

  unsigned Idx =

      AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2

                                                                            : 1;

  return MI.getOperand(Idx);

}


const MachineOperand &


AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {

  assert(MI.mayLoadOrStore() && "Load or store instruction expected");

  unsigned Idx =

      AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3

                                                                            : 2;

  return MI.getOperand(Idx);

}


const MachineOperand &


AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  default:

    llvm_unreachable("Unexpected opcode");

  case AArch64::LDRBroX:

  case AArch64::LDRBBroX:

  case AArch64::LDRSBXroX:

  case AArch64::LDRSBWroX:

  case AArch64::LDRHroX:

  case AArch64::LDRHHroX:

  case AArch64::LDRSHXroX:

  case AArch64::LDRSHWroX:

  case AArch64::LDRWroX:

  case AArch64::LDRSroX:

  case AArch64::LDRSWroX:

  case AArch64::LDRDroX:

  case AArch64::LDRXroX:

  case AArch64::LDRQroX:

    return MI.getOperand(4);

  }

}


static const TargetRegisterClass *getRegClass(const MachineInstr &MI,

                                              Register Reg) {

  if (MI.getParent() == nullptr)

    return nullptr;

  const MachineFunction *MF = MI.getParent()->getParent();

  return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;

}


bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {

  auto IsHFPR = [&](const MachineOperand &Op) {

    if (!Op.isReg())

      return false;

    auto Reg = Op.getReg();

    if (Reg.isPhysical())

      return AArch64::FPR16RegClass.contains(Reg);

    const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);

    return TRC == &AArch64::FPR16RegClass ||

           TRC == &AArch64::FPR16_loRegClass;

  };

  return llvm::any_of(MI.operands(), IsHFPR);

}


bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {

  auto IsQFPR = [&](const MachineOperand &Op) {

    if (!Op.isReg())

      return false;

    auto Reg = Op.getReg();

    if (Reg.isPhysical())

      return AArch64::FPR128RegClass.contains(Reg);

    const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);

    return TRC == &AArch64::FPR128RegClass ||

           TRC == &AArch64::FPR128_loRegClass;

  };

  return llvm::any_of(MI.operands(), IsQFPR);

}


bool AArch64InstrInfo::hasBTISemantics(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  case AArch64::BRK:

  case AArch64::HLT:

  case AArch64::PACIASP:

  case AArch64::PACIBSP:

    // Implicit BTI behavior.

    return true;

  case AArch64::PAUTH_PROLOGUE:

    // PAUTH_PROLOGUE expands to PACI(A|B)SP.

    return true;

  case AArch64::HINT: {

    unsigned Imm = MI.getOperand(0).getImm();

    // Explicit BTI instruction.

    if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)

      return true;

    // PACI(A|B)SP instructions.

    if (Imm == 25 || Imm == 27)

      return true;

    return false;

  }

  default:

    return false;

  }

}


bool AArch64InstrInfo::isFpOrNEON(Register Reg) {

  if (Reg == 0)

    return false;

  assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");

  return AArch64::FPR128RegClass.contains(Reg) ||

         AArch64::FPR64RegClass.contains(Reg) ||

         AArch64::FPR32RegClass.contains(Reg) ||

         AArch64::FPR16RegClass.contains(Reg) ||

         AArch64::FPR8RegClass.contains(Reg);

}


bool AArch64InstrInfo::isFpOrNEON(const MachineInstr &MI) {

  auto IsFPR = [&](const MachineOperand &Op) {

    if (!Op.isReg())

      return false;

    auto Reg = Op.getReg();

    if (Reg.isPhysical())

      return isFpOrNEON(Reg);


    const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);

    return TRC == &AArch64::FPR128RegClass ||

           TRC == &AArch64::FPR128_loRegClass ||

           TRC == &AArch64::FPR64RegClass ||

           TRC == &AArch64::FPR64_loRegClass ||

           TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||

           TRC == &AArch64::FPR8RegClass;

  };

  return llvm::any_of(MI.operands(), IsFPR);

}


// Scale the unscaled offsets.  Returns false if the unscaled offset can't be

// scaled.


static bool scaleOffset(unsigned Opc, int64_t &Offset) {

  int Scale = AArch64InstrInfo::getMemScale(Opc);


  // If the byte-offset isn't a multiple of the stride, we can't scale this

  // offset.

  if (Offset % Scale != 0)

    return false;


  // Convert the byte-offset used by unscaled into an "element" offset used

  // by the scaled pair load/store instructions.

  Offset /= Scale;

  return true;

}


static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {

  if (FirstOpc == SecondOpc)

    return true;

  // We can also pair sign-ext and zero-ext instructions.

  switch (FirstOpc) {

  default:

    return false;

  case AArch64::STRSui:

  case AArch64::STURSi:

    return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;

  case AArch64::STRDui:

  case AArch64::STURDi:

    return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;

  case AArch64::STRQui:

  case AArch64::STURQi:

    return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;

  case AArch64::STRWui:

  case AArch64::STURWi:

    return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;

  case AArch64::STRXui:

  case AArch64::STURXi:

    return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;

  case AArch64::LDRSui:

  case AArch64::LDURSi:

    return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;

  case AArch64::LDRDui:

  case AArch64::LDURDi:

    return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;

  case AArch64::LDRQui:

  case AArch64::LDURQi:

    return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;

  case AArch64::LDRWui:

  case AArch64::LDURWi:

    return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;

  case AArch64::LDRSWui:

  case AArch64::LDURSWi:

    return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;

  case AArch64::LDRXui:

  case AArch64::LDURXi:

    return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;

  }

  // These instructions can't be paired based on their opcodes.

  return false;

}


static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,

                            int64_t Offset1, unsigned Opcode1, int FI2,

                            int64_t Offset2, unsigned Opcode2) {

  // Accesses through fixed stack object frame indices may access a different

  // fixed stack slot. Check that the object offsets + offsets match.

  if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {

    int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);

    int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);

    assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");

    // Convert to scaled object offsets.

    int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);

    if (ObjectOffset1 % Scale1 != 0)

      return false;

    ObjectOffset1 /= Scale1;

    int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);

    if (ObjectOffset2 % Scale2 != 0)

      return false;

    ObjectOffset2 /= Scale2;

    ObjectOffset1 += Offset1;

    ObjectOffset2 += Offset2;

    return ObjectOffset1 + 1 == ObjectOffset2;

  }


  return FI1 == FI2;

}


/// Detect opportunities for ldp/stp formation.

///

/// Only called for LdSt for which getMemOperandWithOffset returns true.


bool AArch64InstrInfo::shouldClusterMemOps(

    ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,

    bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,

    int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,

    unsigned NumBytes) const {

  assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);

  const MachineOperand &BaseOp1 = *BaseOps1.front();

  const MachineOperand &BaseOp2 = *BaseOps2.front();

  const MachineInstr &FirstLdSt = *BaseOp1.getParent();

  const MachineInstr &SecondLdSt = *BaseOp2.getParent();

  if (BaseOp1.getType() != BaseOp2.getType())

    return false;


  assert((BaseOp1.isReg() || BaseOp1.isFI()) &&

         "Only base registers and frame indices are supported.");


  // Check for both base regs and base FI.

  if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())

    return false;


  // Only cluster up to a single pair.

  if (ClusterSize > 2)

    return false;


  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))

    return false;


  // Can we pair these instructions based on their opcodes?

  unsigned FirstOpc = FirstLdSt.getOpcode();

  unsigned SecondOpc = SecondLdSt.getOpcode();

  if (!canPairLdStOpc(FirstOpc, SecondOpc))

    return false;


  // Can't merge volatiles or load/stores that have a hint to avoid pair

  // formation, for example.

  if (!isCandidateToMergeOrPair(FirstLdSt) ||

      !isCandidateToMergeOrPair(SecondLdSt))

    return false;


  // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.

  int64_t Offset1 = FirstLdSt.getOperand(2).getImm();

  if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))

    return false;


  int64_t Offset2 = SecondLdSt.getOperand(2).getImm();

  if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))

    return false;


  // Pairwise instructions have a 7-bit signed offset field.

  if (Offset1 > 63 || Offset1 < -64)

    return false;


  // The caller should already have ordered First/SecondLdSt by offset.

  // Note: except for non-equal frame index bases

  if (BaseOp1.isFI()) {

    assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&

           "Caller should have ordered offsets.");


    const MachineFrameInfo &MFI =

        FirstLdSt.getParent()->getParent()->getFrameInfo();

    return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,

                           BaseOp2.getIndex(), Offset2, SecondOpc);

  }


  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");


  return Offset1 + 1 == Offset2;

}


static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,

                                            MCRegister Reg, unsigned SubIdx,

                                            unsigned State,

                                            const TargetRegisterInfo *TRI) {

  if (!SubIdx)

    return MIB.addReg(Reg, State);


  if (Reg.isPhysical())

    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);

  return MIB.addReg(Reg, State, SubIdx);

}


static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,

                                        unsigned NumRegs) {

  // We really want the positive remainder mod 32 here, that happens to be

  // easily obtainable with a mask.

  return ((DestReg - SrcReg) & 0x1f) < NumRegs;

}


void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,

                                        MachineBasicBlock::iterator I,

                                        const DebugLoc &DL, MCRegister DestReg,

                                        MCRegister SrcReg, bool KillSrc,

                                        unsigned Opcode,

                                        ArrayRef<unsigned> Indices) const {

  assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");

  const TargetRegisterInfo *TRI = &getRegisterInfo();

  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);

  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);

  unsigned NumRegs = Indices.size();


  int SubReg = 0, End = NumRegs, Incr = 1;

  if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {

    SubReg = NumRegs - 1;

    End = -1;

    Incr = -1;

  }


  for (; SubReg != End; SubReg += Incr) {

    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));

    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);

    AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);

    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);

  }

}


void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,

                                       MachineBasicBlock::iterator I,

                                       const DebugLoc &DL, MCRegister DestReg,

                                       MCRegister SrcReg, bool KillSrc,

                                       unsigned Opcode, unsigned ZeroReg,

                                       llvm::ArrayRef<unsigned> Indices) const {

  const TargetRegisterInfo *TRI = &getRegisterInfo();

  unsigned NumRegs = Indices.size();


#ifndef NDEBUG

  uint16_t DestEncoding = TRI->getEncodingValue(DestReg);

  uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);

  assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&

         "GPR reg sequences should not be able to overlap");

#endif


  for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {

    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));

    AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);

    MIB.addReg(ZeroReg);

    AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);

    MIB.addImm(0);

  }

}


void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,

                                   MachineBasicBlock::iterator I,

                                   const DebugLoc &DL, Register DestReg,

                                   Register SrcReg, bool KillSrc,

                                   bool RenamableDest,

                                   bool RenamableSrc) const {

  if (AArch64::GPR32spRegClass.contains(DestReg) &&

      (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {

    if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {

      // If either operand is WSP, expand to ADD #0.

      if (Subtarget.hasZeroCycleRegMoveGPR64() &&

          !Subtarget.hasZeroCycleRegMoveGPR32()) {

        // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.

        MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,

                                                     &AArch64::GPR64spRegClass);

        MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,

                                                    &AArch64::GPR64spRegClass);

        // This instruction is reading and writing X registers.  This may upset

        // the register scavenger and machine verifier, so we need to indicate

        // that we are reading an undefined value from SrcRegX, but a proper

        // value from SrcReg.

        BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)

            .addReg(SrcRegX, RegState::Undef)

            .addImm(0)

            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))

            .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));

      } else {

        BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)

            .addReg(SrcReg, getKillRegState(KillSrc))

            .addImm(0)

            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));

      }

    } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {

      BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)

          .addImm(0)

          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));

    } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&

               !Subtarget.hasZeroCycleRegMoveGPR32()) {

      // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.

      MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,

                                                   &AArch64::GPR64spRegClass);

      assert(DestRegX.isValid() && "Destination super-reg not valid");

      MCRegister SrcRegX =

          SrcReg == AArch64::WZR

              ? AArch64::XZR

              : RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,

                                       &AArch64::GPR64spRegClass);

      assert(SrcRegX.isValid() && "Source super-reg not valid");

      // This instruction is reading and writing X registers.  This may upset

      // the register scavenger and machine verifier, so we need to indicate

      // that we are reading an undefined value from SrcRegX, but a proper

      // value from SrcReg.

      BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)

          .addReg(AArch64::XZR)

          .addReg(SrcRegX, RegState::Undef)

          .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));

    } else {

      // Otherwise, expand to ORR WZR.

      BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)

          .addReg(AArch64::WZR)

          .addReg(SrcReg, getKillRegState(KillSrc));

    }

    return;

  }


  // Copy a Predicate register by ORRing with itself.

  if (AArch64::PPRRegClass.contains(DestReg) &&

      AArch64::PPRRegClass.contains(SrcReg)) {

    assert(Subtarget.isSVEorStreamingSVEAvailable() &&

           "Unexpected SVE register.");

    BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)

      .addReg(SrcReg) // Pg

      .addReg(SrcReg)

      .addReg(SrcReg, getKillRegState(KillSrc));

    return;

  }


  // Copy a predicate-as-counter register by ORRing with itself as if it

  // were a regular predicate (mask) register.

  bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);

  bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);

  if (DestIsPNR || SrcIsPNR) {

    auto ToPPR = [](MCRegister R) -> MCRegister {

      return (R - AArch64::PN0) + AArch64::P0;

    };

    MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();

    MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();


    if (PPRSrcReg != PPRDestReg) {

      auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)

                       .addReg(PPRSrcReg) // Pg

                       .addReg(PPRSrcReg)

                       .addReg(PPRSrcReg, getKillRegState(KillSrc));

      if (DestIsPNR)

        NewMI.addDef(DestReg, RegState::Implicit);

    }

    return;

  }


  // Copy a Z register by ORRing with itself.

  if (AArch64::ZPRRegClass.contains(DestReg) &&

      AArch64::ZPRRegClass.contains(SrcReg)) {

    assert(Subtarget.isSVEorStreamingSVEAvailable() &&

           "Unexpected SVE register.");

    BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)

      .addReg(SrcReg)

      .addReg(SrcReg, getKillRegState(KillSrc));

    return;

  }


  // Copy a Z register pair by copying the individual sub-registers.

  if ((AArch64::ZPR2RegClass.contains(DestReg) ||

       AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&

      (AArch64::ZPR2RegClass.contains(SrcReg) ||

       AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {

    assert(Subtarget.isSVEorStreamingSVEAvailable() &&

           "Unexpected SVE register.");

    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};

    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,

                     Indices);

    return;

  }


  // Copy a Z register triple by copying the individual sub-registers.

  if (AArch64::ZPR3RegClass.contains(DestReg) &&

      AArch64::ZPR3RegClass.contains(SrcReg)) {

    assert(Subtarget.isSVEorStreamingSVEAvailable() &&

           "Unexpected SVE register.");

    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,

                                       AArch64::zsub2};

    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,

                     Indices);

    return;

  }


  // Copy a Z register quad by copying the individual sub-registers.

  if ((AArch64::ZPR4RegClass.contains(DestReg) ||

       AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&

      (AArch64::ZPR4RegClass.contains(SrcReg) ||

       AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {

    assert(Subtarget.isSVEorStreamingSVEAvailable() &&

           "Unexpected SVE register.");

    static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,

                                       AArch64::zsub2, AArch64::zsub3};

    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,

                     Indices);

    return;

  }


  if (AArch64::GPR64spRegClass.contains(DestReg) &&

      (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {

    if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {

      // If either operand is SP, expand to ADD #0.

      BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)

          .addReg(SrcReg, getKillRegState(KillSrc))

          .addImm(0)

          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));

    } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {

      BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)

          .addImm(0)

          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));

    } else {

      // Otherwise, expand to ORR XZR.

      BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)

          .addReg(AArch64::XZR)

          .addReg(SrcReg, getKillRegState(KillSrc));

    }

    return;

  }


  // Copy a DDDD register quad by copying the individual sub-registers.

  if (AArch64::DDDDRegClass.contains(DestReg) &&

      AArch64::DDDDRegClass.contains(SrcReg)) {

    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,

                                       AArch64::dsub2, AArch64::dsub3};

    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,

                     Indices);

    return;

  }


  // Copy a DDD register triple by copying the individual sub-registers.

  if (AArch64::DDDRegClass.contains(DestReg) &&

      AArch64::DDDRegClass.contains(SrcReg)) {

    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,

                                       AArch64::dsub2};

    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,

                     Indices);

    return;

  }


  // Copy a DD register pair by copying the individual sub-registers.

  if (AArch64::DDRegClass.contains(DestReg) &&

      AArch64::DDRegClass.contains(SrcReg)) {

    static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};

    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,

                     Indices);

    return;

  }


  // Copy a QQQQ register quad by copying the individual sub-registers.

  if (AArch64::QQQQRegClass.contains(DestReg) &&

      AArch64::QQQQRegClass.contains(SrcReg)) {

    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,

                                       AArch64::qsub2, AArch64::qsub3};

    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,

                     Indices);

    return;

  }


  // Copy a QQQ register triple by copying the individual sub-registers.

  if (AArch64::QQQRegClass.contains(DestReg) &&

      AArch64::QQQRegClass.contains(SrcReg)) {

    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,

                                       AArch64::qsub2};

    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,

                     Indices);

    return;

  }


  // Copy a QQ register pair by copying the individual sub-registers.

  if (AArch64::QQRegClass.contains(DestReg) &&

      AArch64::QQRegClass.contains(SrcReg)) {

    static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};

    copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,

                     Indices);

    return;

  }


  if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&

      AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {

    static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};

    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,

                    AArch64::XZR, Indices);

    return;

  }


  if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&

      AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {

    static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};

    copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,

                    AArch64::WZR, Indices);

    return;

  }


  if (AArch64::FPR128RegClass.contains(DestReg) &&

      AArch64::FPR128RegClass.contains(SrcReg)) {

    if (Subtarget.isSVEorStreamingSVEAvailable() &&

        !Subtarget.isNeonAvailable())

      BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))

          .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)

          .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))

          .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));

    else if (Subtarget.isNeonAvailable())

      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)

          .addReg(SrcReg)

          .addReg(SrcReg, getKillRegState(KillSrc));

    else {

      BuildMI(MBB, I, DL, get(AArch64::STRQpre))

          .addReg(AArch64::SP, RegState::Define)

          .addReg(SrcReg, getKillRegState(KillSrc))

          .addReg(AArch64::SP)

          .addImm(-16);

      BuildMI(MBB, I, DL, get(AArch64::LDRQpost))

          .addReg(AArch64::SP, RegState::Define)

          .addReg(DestReg, RegState::Define)

          .addReg(AArch64::SP)

          .addImm(16);

    }

    return;

  }


  if (AArch64::FPR64RegClass.contains(DestReg) &&

      AArch64::FPR64RegClass.contains(SrcReg)) {

    if (Subtarget.hasZeroCycleRegMoveFPR128() &&

        !Subtarget.hasZeroCycleRegMoveFPR64() &&

        !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {

      MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,

                                                   &AArch64::FPR128RegClass);

      MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,

                                                  &AArch64::FPR128RegClass);

      // This instruction is reading and writing Q registers. This may upset

      // the register scavenger and machine verifier, so we need to indicate

      // that we are reading an undefined value from SrcRegQ, but a proper

      // value from SrcReg.

      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)

          .addReg(SrcRegQ, RegState::Undef)

          .addReg(SrcRegQ, RegState::Undef)

          .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));

    } else {

      BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)

          .addReg(SrcReg, getKillRegState(KillSrc));

    }

    return;

  }


  if (AArch64::FPR32RegClass.contains(DestReg) &&

      AArch64::FPR32RegClass.contains(SrcReg)) {

    if (Subtarget.hasZeroCycleRegMoveFPR128() &&

        !Subtarget.hasZeroCycleRegMoveFPR64() &&

        !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {

      MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,

                                                   &AArch64::FPR128RegClass);

      MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,

                                                  &AArch64::FPR128RegClass);

      // This instruction is reading and writing Q registers. This may upset

      // the register scavenger and machine verifier, so we need to indicate

      // that we are reading an undefined value from SrcRegQ, but a proper

      // value from SrcReg.

      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)

          .addReg(SrcRegQ, RegState::Undef)

          .addReg(SrcRegQ, RegState::Undef)

          .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));

    } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&

               !Subtarget.hasZeroCycleRegMoveFPR32()) {

      MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,

                                                   &AArch64::FPR64RegClass);

      MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,

                                                  &AArch64::FPR64RegClass);

      // This instruction is reading and writing D registers. This may upset

      // the register scavenger and machine verifier, so we need to indicate

      // that we are reading an undefined value from SrcRegD, but a proper

      // value from SrcReg.

      BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)

          .addReg(SrcRegD, RegState::Undef)

          .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));

    } else {

      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)

          .addReg(SrcReg, getKillRegState(KillSrc));

    }

    return;

  }


  if (AArch64::FPR16RegClass.contains(DestReg) &&

      AArch64::FPR16RegClass.contains(SrcReg)) {

    if (Subtarget.hasZeroCycleRegMoveFPR128() &&

        !Subtarget.hasZeroCycleRegMoveFPR64() &&

        !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {

      MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,

                                                   &AArch64::FPR128RegClass);

      MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,

                                                  &AArch64::FPR128RegClass);

      // This instruction is reading and writing Q registers. This may upset

      // the register scavenger and machine verifier, so we need to indicate

      // that we are reading an undefined value from SrcRegQ, but a proper

      // value from SrcReg.

      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)

          .addReg(SrcRegQ, RegState::Undef)

          .addReg(SrcRegQ, RegState::Undef)

          .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));

    } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&

               !Subtarget.hasZeroCycleRegMoveFPR32()) {

      MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,

                                                   &AArch64::FPR64RegClass);

      MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,

                                                  &AArch64::FPR64RegClass);

      // This instruction is reading and writing D registers. This may upset

      // the register scavenger and machine verifier, so we need to indicate

      // that we are reading an undefined value from SrcRegD, but a proper

      // value from SrcReg.

      BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)

          .addReg(SrcRegD, RegState::Undef)

          .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));

    } else {

      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,

                                       &AArch64::FPR32RegClass);

      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,

                                      &AArch64::FPR32RegClass);

      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)

          .addReg(SrcReg, getKillRegState(KillSrc));

    }

    return;

  }


  if (AArch64::FPR8RegClass.contains(DestReg) &&

      AArch64::FPR8RegClass.contains(SrcReg)) {

    if (Subtarget.hasZeroCycleRegMoveFPR128() &&

        !Subtarget.hasZeroCycleRegMoveFPR64() &&

        !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {

      MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,

                                                   &AArch64::FPR128RegClass);

      MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,

                                                  &AArch64::FPR128RegClass);

      // This instruction is reading and writing Q registers. This may upset

      // the register scavenger and machine verifier, so we need to indicate

      // that we are reading an undefined value from SrcRegQ, but a proper

      // value from SrcReg.

      BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)

          .addReg(SrcRegQ, RegState::Undef)

          .addReg(SrcRegQ, RegState::Undef)

          .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));

    } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&

               !Subtarget.hasZeroCycleRegMoveFPR32()) {

      MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,

                                                   &AArch64::FPR64RegClass);

      MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,

                                                  &AArch64::FPR64RegClass);

      // This instruction is reading and writing D registers. This may upset

      // the register scavenger and machine verifier, so we need to indicate

      // that we are reading an undefined value from SrcRegD, but a proper

      // value from SrcReg.

      BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)

          .addReg(SrcRegD, RegState::Undef)

          .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));

    } else {

      DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,

                                       &AArch64::FPR32RegClass);

      SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,

                                      &AArch64::FPR32RegClass);

      BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)

          .addReg(SrcReg, getKillRegState(KillSrc));

    }

    return;

  }


  // Copies between GPR64 and FPR64.

  if (AArch64::FPR64RegClass.contains(DestReg) &&

      AArch64::GPR64RegClass.contains(SrcReg)) {

    if (AArch64::XZR == SrcReg) {

      BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);

    } else {

      BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)

          .addReg(SrcReg, getKillRegState(KillSrc));

    }

    return;

  }

  if (AArch64::GPR64RegClass.contains(DestReg) &&

      AArch64::FPR64RegClass.contains(SrcReg)) {

    BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)

        .addReg(SrcReg, getKillRegState(KillSrc));

    return;

  }

  // Copies between GPR32 and FPR32.

  if (AArch64::FPR32RegClass.contains(DestReg) &&

      AArch64::GPR32RegClass.contains(SrcReg)) {

    if (AArch64::WZR == SrcReg) {

      BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);

    } else {

      BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)

          .addReg(SrcReg, getKillRegState(KillSrc));

    }

    return;

  }

  if (AArch64::GPR32RegClass.contains(DestReg) &&

      AArch64::FPR32RegClass.contains(SrcReg)) {

    BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)

        .addReg(SrcReg, getKillRegState(KillSrc));

    return;

  }


  if (DestReg == AArch64::NZCV) {

    assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");

    BuildMI(MBB, I, DL, get(AArch64::MSR))

        .addImm(AArch64SysReg::NZCV)

        .addReg(SrcReg, getKillRegState(KillSrc))

        .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);

    return;

  }


  if (SrcReg == AArch64::NZCV) {

    assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");

    BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)

        .addImm(AArch64SysReg::NZCV)

        .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));

    return;

  }


#ifndef NDEBUG

  errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)

         << "\n";

#endif

  llvm_unreachable("unimplemented reg-to-reg copy");

}


static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,

                                    MachineBasicBlock &MBB,

                                    MachineBasicBlock::iterator InsertBefore,

                                    const MCInstrDesc &MCID,

                                    Register SrcReg, bool IsKill,

                                    unsigned SubIdx0, unsigned SubIdx1, int FI,

                                    MachineMemOperand *MMO) {

  Register SrcReg0 = SrcReg;

  Register SrcReg1 = SrcReg;

  if (SrcReg.isPhysical()) {

    SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);

    SubIdx0 = 0;

    SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);

    SubIdx1 = 0;

  }

  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)

      .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)

      .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)

      .addFrameIndex(FI)

      .addImm(0)

      .addMemOperand(MMO);

}


void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,

                                           MachineBasicBlock::iterator MBBI,

                                           Register SrcReg, bool isKill, int FI,

                                           const TargetRegisterClass *RC,

                                           const TargetRegisterInfo *TRI,

                                           Register VReg,

                                           MachineInstr::MIFlag Flags) const {

  MachineFunction &MF = *MBB.getParent();

  MachineFrameInfo &MFI = MF.getFrameInfo();


  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);

  MachineMemOperand *MMO =

      MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,

                              MFI.getObjectSize(FI), MFI.getObjectAlign(FI));

  unsigned Opc = 0;

  bool Offset = true;

  MCRegister PNRReg = MCRegister::NoRegister;

  unsigned StackID = TargetStackID::Default;

  switch (TRI->getSpillSize(*RC)) {

  case 1:

    if (AArch64::FPR8RegClass.hasSubClassEq(RC))

      Opc = AArch64::STRBui;

    break;

  case 2: {

    if (AArch64::FPR16RegClass.hasSubClassEq(RC))

      Opc = AArch64::STRHui;

    else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||

             AArch64::PPRRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register store without SVE store instructions");

      Opc = AArch64::STR_PXI;

      StackID = TargetStackID::ScalablePredicateVector;

    }

    break;

  }

  case 4:

    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {

      Opc = AArch64::STRWui;

      if (SrcReg.isVirtual())

        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);

      else

        assert(SrcReg != AArch64::WSP);

    } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))

      Opc = AArch64::STRSui;

    else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {

      Opc = AArch64::STR_PPXI;

      StackID = TargetStackID::ScalablePredicateVector;

    }

    break;

  case 8:

    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {

      Opc = AArch64::STRXui;

      if (SrcReg.isVirtual())

        MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);

      else

        assert(SrcReg != AArch64::SP);

    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {

      Opc = AArch64::STRDui;

    } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {

      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,

                              get(AArch64::STPWi), SrcReg, isKill,

                              AArch64::sube32, AArch64::subo32, FI, MMO);

      return;

    }

    break;

  case 16:

    if (AArch64::FPR128RegClass.hasSubClassEq(RC))

      Opc = AArch64::STRQui;

    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");

      Opc = AArch64::ST1Twov1d;

      Offset = false;

    } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {

      storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,

                              get(AArch64::STPXi), SrcReg, isKill,

                              AArch64::sube64, AArch64::subo64, FI, MMO);

      return;

    } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register store without SVE store instructions");

      Opc = AArch64::STR_ZXI;

      StackID = TargetStackID::ScalableVector;

    }

    break;

  case 24:

    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");

      Opc = AArch64::ST1Threev1d;

      Offset = false;

    }

    break;

  case 32:

    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");

      Opc = AArch64::ST1Fourv1d;

      Offset = false;

    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");

      Opc = AArch64::ST1Twov2d;

      Offset = false;

    } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register store without SVE store instructions");

      Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;

      StackID = TargetStackID::ScalableVector;

    } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register store without SVE store instructions");

      Opc = AArch64::STR_ZZXI;

      StackID = TargetStackID::ScalableVector;

    }

    break;

  case 48:

    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");

      Opc = AArch64::ST1Threev2d;

      Offset = false;

    } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register store without SVE store instructions");

      Opc = AArch64::STR_ZZZXI;

      StackID = TargetStackID::ScalableVector;

    }

    break;

  case 64:

    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register store without NEON");

      Opc = AArch64::ST1Fourv2d;

      Offset = false;

    } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register store without SVE store instructions");

      Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;

      StackID = TargetStackID::ScalableVector;

    } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register store without SVE store instructions");

      Opc = AArch64::STR_ZZZZXI;

      StackID = TargetStackID::ScalableVector;

    }

    break;

  }

  assert(Opc && "Unknown register class");

  MFI.setStackID(FI, StackID);


  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))

                                     .addReg(SrcReg, getKillRegState(isKill))

                                     .addFrameIndex(FI);


  if (Offset)

    MI.addImm(0);

  if (PNRReg.isValid())

    MI.addDef(PNRReg, RegState::Implicit);

  MI.addMemOperand(MMO);

}


static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,

                                     MachineBasicBlock &MBB,

                                     MachineBasicBlock::iterator InsertBefore,

                                     const MCInstrDesc &MCID,

                                     Register DestReg, unsigned SubIdx0,

                                     unsigned SubIdx1, int FI,

                                     MachineMemOperand *MMO) {

  Register DestReg0 = DestReg;

  Register DestReg1 = DestReg;

  bool IsUndef = true;

  if (DestReg.isPhysical()) {

    DestReg0 = TRI.getSubReg(DestReg, SubIdx0);

    SubIdx0 = 0;

    DestReg1 = TRI.getSubReg(DestReg, SubIdx1);

    SubIdx1 = 0;

    IsUndef = false;

  }

  BuildMI(MBB, InsertBefore, DebugLoc(), MCID)

      .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)

      .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)

      .addFrameIndex(FI)

      .addImm(0)

      .addMemOperand(MMO);

}


void AArch64InstrInfo::loadRegFromStackSlot(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,

    int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,

    Register VReg, MachineInstr::MIFlag Flags) const {

  MachineFunction &MF = *MBB.getParent();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);

  MachineMemOperand *MMO =

      MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,

                              MFI.getObjectSize(FI), MFI.getObjectAlign(FI));


  unsigned Opc = 0;

  bool Offset = true;

  unsigned StackID = TargetStackID::Default;

  Register PNRReg = MCRegister::NoRegister;

  switch (TRI->getSpillSize(*RC)) {

  case 1:

    if (AArch64::FPR8RegClass.hasSubClassEq(RC))

      Opc = AArch64::LDRBui;

    break;

  case 2: {

    bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);

    if (AArch64::FPR16RegClass.hasSubClassEq(RC))

      Opc = AArch64::LDRHui;

    else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register load without SVE load instructions");

      if (IsPNR)

        PNRReg = DestReg;

      Opc = AArch64::LDR_PXI;

      StackID = TargetStackID::ScalablePredicateVector;

    }

    break;

  }

  case 4:

    if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {

      Opc = AArch64::LDRWui;

      if (DestReg.isVirtual())

        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);

      else

        assert(DestReg != AArch64::WSP);

    } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))

      Opc = AArch64::LDRSui;

    else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {

      Opc = AArch64::LDR_PPXI;

      StackID = TargetStackID::ScalablePredicateVector;

    }

    break;

  case 8:

    if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {

      Opc = AArch64::LDRXui;

      if (DestReg.isVirtual())

        MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);

      else

        assert(DestReg != AArch64::SP);

    } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {

      Opc = AArch64::LDRDui;

    } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {

      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,

                               get(AArch64::LDPWi), DestReg, AArch64::sube32,

                               AArch64::subo32, FI, MMO);

      return;

    }

    break;

  case 16:

    if (AArch64::FPR128RegClass.hasSubClassEq(RC))

      Opc = AArch64::LDRQui;

    else if (AArch64::DDRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");

      Opc = AArch64::LD1Twov1d;

      Offset = false;

    } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {

      loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,

                               get(AArch64::LDPXi), DestReg, AArch64::sube64,

                               AArch64::subo64, FI, MMO);

      return;

    } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register load without SVE load instructions");

      Opc = AArch64::LDR_ZXI;

      StackID = TargetStackID::ScalableVector;

    }

    break;

  case 24:

    if (AArch64::DDDRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");

      Opc = AArch64::LD1Threev1d;

      Offset = false;

    }

    break;

  case 32:

    if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");

      Opc = AArch64::LD1Fourv1d;

      Offset = false;

    } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");

      Opc = AArch64::LD1Twov2d;

      Offset = false;

    } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register load without SVE load instructions");

      Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;

      StackID = TargetStackID::ScalableVector;

    } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register load without SVE load instructions");

      Opc = AArch64::LDR_ZZXI;

      StackID = TargetStackID::ScalableVector;

    }

    break;

  case 48:

    if (AArch64::QQQRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");

      Opc = AArch64::LD1Threev2d;

      Offset = false;

    } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register load without SVE load instructions");

      Opc = AArch64::LDR_ZZZXI;

      StackID = TargetStackID::ScalableVector;

    }

    break;

  case 64:

    if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.hasNEON() && "Unexpected register load without NEON");

      Opc = AArch64::LD1Fourv2d;

      Offset = false;

    } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register load without SVE load instructions");

      Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;

      StackID = TargetStackID::ScalableVector;

    } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {

      assert(Subtarget.isSVEorStreamingSVEAvailable() &&

             "Unexpected register load without SVE load instructions");

      Opc = AArch64::LDR_ZZZZXI;

      StackID = TargetStackID::ScalableVector;

    }

    break;

  }


  assert(Opc && "Unknown register class");

  MFI.setStackID(FI, StackID);


  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))

                                     .addReg(DestReg, getDefRegState(true))

                                     .addFrameIndex(FI);

  if (Offset)

    MI.addImm(0);

  if (PNRReg.isValid() && !PNRReg.isVirtual())

    MI.addDef(PNRReg, RegState::Implicit);

  MI.addMemOperand(MMO);

}


bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,

                                           const MachineInstr &UseMI,

                                           const TargetRegisterInfo *TRI) {

  return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),

                                         UseMI.getIterator()),

                [TRI](const MachineInstr &I) {

                  return I.modifiesRegister(AArch64::NZCV, TRI) ||

                         I.readsRegister(AArch64::NZCV, TRI);

                });

}


void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(

    const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {

  // The smallest scalable element supported by scaled SVE addressing

  // modes are predicates, which are 2 scalable bytes in size. So the scalable

  // byte offset must always be a multiple of 2.

  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");


  // VGSized offsets are divided by '2', because the VG register is the

  // the number of 64bit granules as opposed to 128bit vector chunks,

  // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.

  // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.

  // VG = n * 2 and the dwarf offset must be VG * 8 bytes.

  ByteSized = Offset.getFixed();

  VGSized = Offset.getScalable() / 2;

}


/// Returns the offset in parts to which this frame offset can be

/// decomposed for the purpose of describing a frame offset.

/// For non-scalable offsets this is simply its byte size.

void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(

    const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,

    int64_t &NumDataVectors) {

  // The smallest scalable element supported by scaled SVE addressing

  // modes are predicates, which are 2 scalable bytes in size. So the scalable

  // byte offset must always be a multiple of 2.

  assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");


  NumBytes = Offset.getFixed();

  NumDataVectors = 0;

  NumPredicateVectors = Offset.getScalable() / 2;

  // This method is used to get the offsets to adjust the frame offset.

  // If the function requires ADDPL to be used and needs more than two ADDPL

  // instructions, part of the offset is folded into NumDataVectors so that it

  // uses ADDVL for part of it, reducing the number of ADDPL instructions.

  if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||

      NumPredicateVectors > 62) {

    NumDataVectors = NumPredicateVectors / 8;

    NumPredicateVectors -= NumDataVectors * 8;

  }

}


// Convenience function to create a DWARF expression for: Constant `Operation`.

// This helper emits compact sequences for common cases. For example, for`-15

// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.


static void appendConstantExpr(SmallVectorImpl<char> &Expr, int64_t Constant,

                               dwarf::LocationAtom Operation) {

  if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {

    // -Constant (1 to 31)

    Expr.push_back(dwarf::DW_OP_lit0 - Constant);

    Operation = dwarf::DW_OP_minus;

  } else if (Constant >= 0 && Constant <= 31) {

    // Literal value 0 to 31

    Expr.push_back(dwarf::DW_OP_lit0 + Constant);

  } else {

    // Signed constant

    Expr.push_back(dwarf::DW_OP_consts);

    appendLEB128<LEB128Sign::Signed>(Expr, Constant);

  }

  return Expr.push_back(Operation);

}


// Convenience function to create a DWARF expression for a register.


static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {

  Expr.push_back((char)dwarf::DW_OP_bregx);

  appendLEB128<LEB128Sign::Unsigned>(Expr, RegNum);

  Expr.push_back(0);

}


// Convenience function to create a DWARF expression for loading a register from

// a CFA offset.


static void appendLoadRegExpr(SmallVectorImpl<char> &Expr,

                              int64_t OffsetFromDefCFA) {

  // This assumes the top of the DWARF stack contains the CFA.

  Expr.push_back(dwarf::DW_OP_dup);

  // Add the offset to the register.

  appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);

  // Dereference the address (loads a 64 bit value)..

  Expr.push_back(dwarf::DW_OP_deref);

}


// Convenience function to create a comment for

//  (+/-) NumBytes (* RegScale)?


static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,

                                StringRef RegScale = {}) {

  if (NumBytes) {

    Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);

    if (!RegScale.empty())

      Comment << ' ' << RegScale;

  }

}


// Creates an MCCFIInstruction:

//    { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }


static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,

                                               unsigned Reg,

                                               const StackOffset &Offset) {

  int64_t NumBytes, NumVGScaledBytes;

  AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,

                                                        NumVGScaledBytes);

  std::string CommentBuffer;

  llvm::raw_string_ostream Comment(CommentBuffer);


  if (Reg == AArch64::SP)

    Comment << "sp";

  else if (Reg == AArch64::FP)

    Comment << "fp";

  else

    Comment << printReg(Reg, &TRI);


  // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)

  SmallString<64> Expr;

  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);

  assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");

  // Reg + NumBytes

  Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);

  appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);

  appendOffsetComment(NumBytes, Comment);

  if (NumVGScaledBytes) {

    // + VG * NumVGScaledBytes

    appendOffsetComment(NumVGScaledBytes, Comment, "* VG");

    appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));

    appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);

    Expr.push_back(dwarf::DW_OP_plus);

  }


  // Wrap this into DW_CFA_def_cfa.

  SmallString<64> DefCfaExpr;

  DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);

  appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());

  DefCfaExpr.append(Expr.str());

  return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),

                                        Comment.str());

}


MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI,

                                    unsigned FrameReg, unsigned Reg,

                                    const StackOffset &Offset,

                                    bool LastAdjustmentWasScalable) {

  if (Offset.getScalable())

    return createDefCFAExpression(TRI, Reg, Offset);


  if (FrameReg == Reg && !LastAdjustmentWasScalable)

    return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));


  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);

  return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());

}


MCCFIInstruction


llvm::createCFAOffset(const TargetRegisterInfo &TRI, unsigned Reg,

                      const StackOffset &OffsetFromDefCFA,

                      std::optional<int64_t> IncomingVGOffsetFromDefCFA) {

  int64_t NumBytes, NumVGScaledBytes;

  AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(

      OffsetFromDefCFA, NumBytes, NumVGScaledBytes);


  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);


  // Non-scalable offsets can use DW_CFA_offset directly.

  if (!NumVGScaledBytes)

    return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);


  std::string CommentBuffer;

  llvm::raw_string_ostream Comment(CommentBuffer);

  Comment << printReg(Reg, &TRI) << "  @ cfa";


  // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)

  assert(NumVGScaledBytes && "Expected scalable offset");

  SmallString<64> OffsetExpr;

  // + VG * NumVGScaledBytes

  StringRef VGRegScale;

  if (IncomingVGOffsetFromDefCFA) {

    appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);

    VGRegScale = "* IncomingVG";

  } else {

    appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));

    VGRegScale = "* VG";

  }

  appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);

  appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);

  OffsetExpr.push_back(dwarf::DW_OP_plus);

  if (NumBytes) {

    // + NumBytes

    appendOffsetComment(NumBytes, Comment);

    appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);

  }


  // Wrap this into DW_CFA_expression

  SmallString<64> CfaExpr;

  CfaExpr.push_back(dwarf::DW_CFA_expression);

  appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);

  appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());

  CfaExpr.append(OffsetExpr.str());


  return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),

                                        Comment.str());

}


// Helper function to emit a frame offset adjustment from a given

// pointer (SrcReg), stored into DestReg. This function is explicit

// in that it requires the opcode.


static void emitFrameOffsetAdj(MachineBasicBlock &MBB,

                               MachineBasicBlock::iterator MBBI,

                               const DebugLoc &DL, unsigned DestReg,

                               unsigned SrcReg, int64_t Offset, unsigned Opc,

                               const TargetInstrInfo *TII,

                               MachineInstr::MIFlag Flag, bool NeedsWinCFI,

                               bool *HasWinCFI, bool EmitCFAOffset,

                               StackOffset CFAOffset, unsigned FrameReg) {

  int Sign = 1;

  unsigned MaxEncoding, ShiftSize;

  switch (Opc) {

  case AArch64::ADDXri:

  case AArch64::ADDSXri:

  case AArch64::SUBXri:

  case AArch64::SUBSXri:

    MaxEncoding = 0xfff;

    ShiftSize = 12;

    break;

  case AArch64::ADDVL_XXI:

  case AArch64::ADDPL_XXI:

  case AArch64::ADDSVL_XXI:

  case AArch64::ADDSPL_XXI:

    MaxEncoding = 31;

    ShiftSize = 0;

    if (Offset < 0) {

      MaxEncoding = 32;

      Sign = -1;

      Offset = -Offset;

    }

    break;

  default:

    llvm_unreachable("Unsupported opcode");

  }


  // `Offset` can be in bytes or in "scalable bytes".

  int VScale = 1;

  if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)

    VScale = 16;

  else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)

    VScale = 2;


  // FIXME: If the offset won't fit in 24-bits, compute the offset into a

  // scratch register.  If DestReg is a virtual register, use it as the

  // scratch register; otherwise, create a new virtual register (to be

  // replaced by the scavenger at the end of PEI).  That case can be optimized

  // slightly if DestReg is SP which is always 16-byte aligned, so the scratch

  // register can be loaded with offset%8 and the add/sub can use an extending

  // instruction with LSL#3.

  // Currently the function handles any offsets but generates a poor sequence

  // of code.

  //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");


  const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;

  Register TmpReg = DestReg;

  if (TmpReg == AArch64::XZR)

    TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(

        &AArch64::GPR64RegClass);

  do {

    uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);

    unsigned LocalShiftSize = 0;

    if (ThisVal > MaxEncoding) {

      ThisVal = ThisVal >> ShiftSize;

      LocalShiftSize = ShiftSize;

    }

    assert((ThisVal >> ShiftSize) <= MaxEncoding &&

           "Encoding cannot handle value that big");


    Offset -= ThisVal << LocalShiftSize;

    if (Offset == 0)

      TmpReg = DestReg;

    auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)

                   .addReg(SrcReg)

                   .addImm(Sign * (int)ThisVal);

    if (ShiftSize)

      MBI = MBI.addImm(

          AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));

    MBI = MBI.setMIFlag(Flag);


    auto Change =

        VScale == 1

            ? StackOffset::getFixed(ThisVal << LocalShiftSize)

            : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));

    if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)

      CFAOffset += Change;

    else

      CFAOffset -= Change;

    if (EmitCFAOffset && DestReg == TmpReg) {

      MachineFunction &MF = *MBB.getParent();

      const TargetSubtargetInfo &STI = MF.getSubtarget();

      const TargetRegisterInfo &TRI = *STI.getRegisterInfo();


      unsigned CFIIndex = MF.addFrameInst(

          createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));

      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))

          .addCFIIndex(CFIIndex)

          .setMIFlags(Flag);

    }


    if (NeedsWinCFI) {

      int Imm = (int)(ThisVal << LocalShiftSize);

      if (VScale != 1 && DestReg == AArch64::SP) {

        if (HasWinCFI)

          *HasWinCFI = true;

        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))

            .addImm(ThisVal)

            .setMIFlag(Flag);

      } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||

                 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {

        assert(VScale == 1 && "Expected non-scalable operation");

        if (HasWinCFI)

          *HasWinCFI = true;

        if (Imm == 0)

          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);

        else

          BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))

              .addImm(Imm)

              .setMIFlag(Flag);

        assert(Offset == 0 && "Expected remaining offset to be zero to "

                              "emit a single SEH directive");

      } else if (DestReg == AArch64::SP) {

        assert(VScale == 1 && "Expected non-scalable operation");

        if (HasWinCFI)

          *HasWinCFI = true;

        assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");

        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))

            .addImm(Imm)

            .setMIFlag(Flag);

      }

    }


    SrcReg = TmpReg;

  } while (Offset);

}


void llvm::emitFrameOffset(MachineBasicBlock &MBB,

                           MachineBasicBlock::iterator MBBI, const DebugLoc &DL,

                           unsigned DestReg, unsigned SrcReg,

                           StackOffset Offset, const TargetInstrInfo *TII,

                           MachineInstr::MIFlag Flag, bool SetNZCV,

                           bool NeedsWinCFI, bool *HasWinCFI,

                           bool EmitCFAOffset, StackOffset CFAOffset,

                           unsigned FrameReg) {

  // If a function is marked as arm_locally_streaming, then the runtime value of

  // vscale in the prologue/epilogue is different the runtime value of vscale

  // in the function's body. To avoid having to consider multiple vscales,

  // we can use `addsvl` to allocate any scalable stack-slots, which under

  // most circumstances will be only locals, not callee-save slots.

  const Function &F = MBB.getParent()->getFunction();

  bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");


  int64_t Bytes, NumPredicateVectors, NumDataVectors;

  AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(

      Offset, Bytes, NumPredicateVectors, NumDataVectors);


  // Insert ADDSXri for scalable offset at the end.

  bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);

  if (NeedsFinalDefNZCV)

    SetNZCV = false;


  // First emit non-scalable frame offsets, or a simple 'mov'.

  if (Bytes || (!Offset && SrcReg != DestReg)) {

    assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&

           "SP increment/decrement not 8-byte aligned");

    unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;

    if (Bytes < 0) {

      Bytes = -Bytes;

      Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;

    }

    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,

                       NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,

                       FrameReg);

    CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)

                     ? StackOffset::getFixed(-Bytes)

                     : StackOffset::getFixed(Bytes);

    SrcReg = DestReg;

    FrameReg = DestReg;

  }


  assert(!(NeedsWinCFI && NumPredicateVectors) &&

         "WinCFI can't allocate fractions of an SVE data vector");


  if (NumDataVectors) {

    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,

                       UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,

                       Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,

                       FrameReg);

    CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);

    SrcReg = DestReg;

  }


  if (NumPredicateVectors) {

    assert(DestReg != AArch64::SP && "Unaligned access to SP");

    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,

                       UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,

                       Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,

                       FrameReg);

  }


  if (NeedsFinalDefNZCV)

    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)

        .addReg(DestReg)

        .addImm(0)

        .addImm(0);

}


MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(

    MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,

    MachineBasicBlock::iterator InsertPt, int FrameIndex,

    LiveIntervals *LIS, VirtRegMap *VRM) const {

  // This is a bit of a hack. Consider this instruction:

  //

  //   %0 = COPY %sp; GPR64all:%0

  //

  // We explicitly chose GPR64all for the virtual register so such a copy might

  // be eliminated by RegisterCoalescer. However, that may not be possible, and

  // %0 may even spill. We can't spill %sp, and since it is in the GPR64all

  // register class, TargetInstrInfo::foldMemoryOperand() is going to try.

  //

  // To prevent that, we are going to constrain the %0 register class here.

  if (MI.isFullCopy()) {

    Register DstReg = MI.getOperand(0).getReg();

    Register SrcReg = MI.getOperand(1).getReg();

    if (SrcReg == AArch64::SP && DstReg.isVirtual()) {

      MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);

      return nullptr;

    }

    if (DstReg == AArch64::SP && SrcReg.isVirtual()) {

      MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);

      return nullptr;

    }

    // Nothing can folded with copy from/to NZCV.

    if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)

      return nullptr;

  }


  // Handle the case where a copy is being spilled or filled but the source

  // and destination register class don't match.  For example:

  //

  //   %0 = COPY %xzr; GPR64common:%0

  //

  // In this case we can still safely fold away the COPY and generate the

  // following spill code:

  //

  //   STRXui %xzr, %stack.0

  //

  // This also eliminates spilled cross register class COPYs (e.g. between x and

  // d regs) of the same size.  For example:

  //

  //   %0 = COPY %1; GPR64:%0, FPR64:%1

  //

  // will be filled as

  //

  //   LDRDui %0, fi<#0>

  //

  // instead of

  //

  //   LDRXui %Temp, fi<#0>

  //   %0 = FMOV %Temp

  //

  if (MI.isCopy() && Ops.size() == 1 &&

      // Make sure we're only folding the explicit COPY defs/uses.

      (Ops[0] == 0 || Ops[0] == 1)) {

    bool IsSpill = Ops[0] == 0;

    bool IsFill = !IsSpill;

    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();

    const MachineRegisterInfo &MRI = MF.getRegInfo();

    MachineBasicBlock &MBB = *MI.getParent();

    const MachineOperand &DstMO = MI.getOperand(0);

    const MachineOperand &SrcMO = MI.getOperand(1);

    Register DstReg = DstMO.getReg();

    Register SrcReg = SrcMO.getReg();

    // This is slightly expensive to compute for physical regs since

    // getMinimalPhysRegClass is slow.

    auto getRegClass = [&](unsigned Reg) {

      return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)

                                              : TRI.getMinimalPhysRegClass(Reg);

    };


    if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {

      assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==

                 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&

             "Mismatched register size in non subreg COPY");

      if (IsSpill)

        storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,

                            getRegClass(SrcReg), &TRI, Register());

      else

        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,

                             getRegClass(DstReg), &TRI, Register());

      return &*--InsertPt;

    }


    // Handle cases like spilling def of:

    //

    //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0

    //

    // where the physical register source can be widened and stored to the full

    // virtual reg destination stack slot, in this case producing:

    //

    //   STRXui %xzr, %stack.0

    //

    if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&

        TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {

      assert(SrcMO.getSubReg() == 0 &&

             "Unexpected subreg on physical register");

      storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),

                          FrameIndex, &AArch64::GPR64RegClass, &TRI,

                          Register());

      return &*--InsertPt;

    }


    // Handle cases like filling use of:

    //

    //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1

    //

    // where we can load the full virtual reg source stack slot, into the subreg

    // destination, in this case producing:

    //

    //   LDRWui %0:sub_32<def,read-undef>, %stack.0

    //

    if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {

      const TargetRegisterClass *FillRC = nullptr;

      switch (DstMO.getSubReg()) {

      default:

        break;

      case AArch64::sub_32:

        if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))

          FillRC = &AArch64::GPR32RegClass;

        break;

      case AArch64::ssub:

        FillRC = &AArch64::FPR32RegClass;

        break;

      case AArch64::dsub:

        FillRC = &AArch64::FPR64RegClass;

        break;

      }


      if (FillRC) {

        assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==

                   TRI.getRegSizeInBits(*FillRC) &&

               "Mismatched regclass size on folded subreg COPY");

        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,

                             Register());

        MachineInstr &LoadMI = *--InsertPt;

        MachineOperand &LoadDst = LoadMI.getOperand(0);

        assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");

        LoadDst.setSubReg(DstMO.getSubReg());

        LoadDst.setIsUndef();

        return &LoadMI;

      }

    }

  }


  // Cannot fold.

  return nullptr;

}


int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,

                                    StackOffset &SOffset,

                                    bool *OutUseUnscaledOp,

                                    unsigned *OutUnscaledOp,

                                    int64_t *EmittableOffset) {

  // Set output values in case of early exit.

  if (EmittableOffset)

    *EmittableOffset = 0;

  if (OutUseUnscaledOp)

    *OutUseUnscaledOp = false;

  if (OutUnscaledOp)

    *OutUnscaledOp = 0;


  // Exit early for structured vector spills/fills as they can't take an

  // immediate offset.

  switch (MI.getOpcode()) {

  default:

    break;

  case AArch64::LD1Rv1d:

  case AArch64::LD1Rv2s:

  case AArch64::LD1Rv2d:

  case AArch64::LD1Rv4h:

  case AArch64::LD1Rv4s:

  case AArch64::LD1Rv8b:

  case AArch64::LD1Rv8h:

  case AArch64::LD1Rv16b:

  case AArch64::LD1Twov2d:

  case AArch64::LD1Threev2d:

  case AArch64::LD1Fourv2d:

  case AArch64::LD1Twov1d:

  case AArch64::LD1Threev1d:

  case AArch64::LD1Fourv1d:

  case AArch64::ST1Twov2d:

  case AArch64::ST1Threev2d:

  case AArch64::ST1Fourv2d:

  case AArch64::ST1Twov1d:

  case AArch64::ST1Threev1d:

  case AArch64::ST1Fourv1d:

  case AArch64::ST1i8:

  case AArch64::ST1i16:

  case AArch64::ST1i32:

  case AArch64::ST1i64:

  case AArch64::IRG:

  case AArch64::IRGstack:

  case AArch64::STGloop:

  case AArch64::STZGloop:

    return AArch64FrameOffsetCannotUpdate;

  }


  // Get the min/max offset and the scale.

  TypeSize ScaleValue(0U, false), Width(0U, false);

  int64_t MinOff, MaxOff;

  if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,

                                      MaxOff))

    llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");


  // Construct the complete offset.

  bool IsMulVL = ScaleValue.isScalable();

  unsigned Scale = ScaleValue.getKnownMinValue();

  int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();


  const MachineOperand &ImmOpnd =

      MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));

  Offset += ImmOpnd.getImm() * Scale;


  // If the offset doesn't match the scale, we rewrite the instruction to

  // use the unscaled instruction instead. Likewise, if we have a negative

  // offset and there is an unscaled op to use.

  std::optional<unsigned> UnscaledOp =

      AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());

  bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);

  if (useUnscaledOp &&

      !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,

                                      MaxOff))

    llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");


  Scale = ScaleValue.getKnownMinValue();

  assert(IsMulVL == ScaleValue.isScalable() &&

         "Unscaled opcode has different value for scalable");


  int64_t Remainder = Offset % Scale;

  assert(!(Remainder && useUnscaledOp) &&

         "Cannot have remainder when using unscaled op");


  assert(MinOff < MaxOff && "Unexpected Min/Max offsets");

  int64_t NewOffset = Offset / Scale;

  if (MinOff <= NewOffset && NewOffset <= MaxOff)

    Offset = Remainder;

  else {

    NewOffset = NewOffset < 0 ? MinOff : MaxOff;

    Offset = Offset - (NewOffset * Scale);

  }


  if (EmittableOffset)

    *EmittableOffset = NewOffset;

  if (OutUseUnscaledOp)

    *OutUseUnscaledOp = useUnscaledOp;

  if (OutUnscaledOp && UnscaledOp)

    *OutUnscaledOp = *UnscaledOp;


  if (IsMulVL)

    SOffset = StackOffset::get(SOffset.getFixed(), Offset);

  else

    SOffset = StackOffset::get(Offset, SOffset.getScalable());

  return AArch64FrameOffsetCanUpdate |

         (SOffset ? 0 : AArch64FrameOffsetIsLegal);

}


bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,

                                    unsigned FrameReg, StackOffset &Offset,

                                    const AArch64InstrInfo *TII) {

  unsigned Opcode = MI.getOpcode();

  unsigned ImmIdx = FrameRegIdx + 1;


  if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {

    Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());

    emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),

                    MI.getOperand(0).getReg(), FrameReg, Offset, TII,

                    MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));

    MI.eraseFromParent();

    Offset = StackOffset();

    return true;

  }


  int64_t NewOffset;

  unsigned UnscaledOp;

  bool UseUnscaledOp;

  int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,

                                         &UnscaledOp, &NewOffset);

  if (Status & AArch64FrameOffsetCanUpdate) {

    if (Status & AArch64FrameOffsetIsLegal)

      // Replace the FrameIndex with FrameReg.

      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);

    if (UseUnscaledOp)

      MI.setDesc(TII->get(UnscaledOp));


    MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);

    return !Offset;

  }


  return false;

}


void AArch64InstrInfo::insertNoop(MachineBasicBlock &MBB,

                                  MachineBasicBlock::iterator MI) const {

  DebugLoc DL;

  BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);

}


MCInst AArch64InstrInfo::getNop() const {

  return MCInstBuilder(AArch64::HINT).addImm(0);

}


// AArch64 supports MachineCombiner.

bool AArch64InstrInfo::useMachineCombiner() const { return true; }


// True when Opc sets flag


static bool isCombineInstrSettingFlag(unsigned Opc) {

  switch (Opc) {

  case AArch64::ADDSWrr:

  case AArch64::ADDSWri:

  case AArch64::ADDSXrr:

  case AArch64::ADDSXri:

  case AArch64::SUBSWrr:

  case AArch64::SUBSXrr:

  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.

  case AArch64::SUBSWri:

  case AArch64::SUBSXri:

    return true;

  default:

    break;

  }

  return false;

}


// 32b Opcodes that can be combined with a MUL


static bool isCombineInstrCandidate32(unsigned Opc) {

  switch (Opc) {

  case AArch64::ADDWrr:

  case AArch64::ADDWri:

  case AArch64::SUBWrr:

  case AArch64::ADDSWrr:

  case AArch64::ADDSWri:

  case AArch64::SUBSWrr:

  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.

  case AArch64::SUBWri:

  case AArch64::SUBSWri:

    return true;

  default:

    break;

  }

  return false;

}


// 64b Opcodes that can be combined with a MUL


static bool isCombineInstrCandidate64(unsigned Opc) {

  switch (Opc) {

  case AArch64::ADDXrr:

  case AArch64::ADDXri:

  case AArch64::SUBXrr:

  case AArch64::ADDSXrr:

  case AArch64::ADDSXri:

  case AArch64::SUBSXrr:

  // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.

  case AArch64::SUBXri:

  case AArch64::SUBSXri:

  case AArch64::ADDv8i8:

  case AArch64::ADDv16i8:

  case AArch64::ADDv4i16:

  case AArch64::ADDv8i16:

  case AArch64::ADDv2i32:

  case AArch64::ADDv4i32:

  case AArch64::SUBv8i8:

  case AArch64::SUBv16i8:

  case AArch64::SUBv4i16:

  case AArch64::SUBv8i16:

  case AArch64::SUBv2i32:

  case AArch64::SUBv4i32:

    return true;

  default:

    break;

  }

  return false;

}


// FP Opcodes that can be combined with a FMUL.


static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {

  switch (Inst.getOpcode()) {

  default:

    break;

  case AArch64::FADDHrr:

  case AArch64::FADDSrr:

  case AArch64::FADDDrr:

  case AArch64::FADDv4f16:

  case AArch64::FADDv8f16:

  case AArch64::FADDv2f32:

  case AArch64::FADDv2f64:

  case AArch64::FADDv4f32:

  case AArch64::FSUBHrr:

  case AArch64::FSUBSrr:

  case AArch64::FSUBDrr:

  case AArch64::FSUBv4f16:

  case AArch64::FSUBv8f16:

  case AArch64::FSUBv2f32:

  case AArch64::FSUBv2f64:

  case AArch64::FSUBv4f32:

    TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;

    // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by

    // the target options or if FADD/FSUB has the contract fast-math flag.

    return Options.AllowFPOpFusion == FPOpFusion::Fast ||

           Inst.getFlag(MachineInstr::FmContract);

  }

  return false;

}


// Opcodes that can be combined with a MUL


static bool isCombineInstrCandidate(unsigned Opc) {

  return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));

}


//

// Utility routine that checks if \param MO is defined by an

// \param CombineOpc instruction in the basic block \param MBB


static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,

                       unsigned CombineOpc, unsigned ZeroReg = 0,

                       bool CheckZeroReg = false) {

  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();

  MachineInstr *MI = nullptr;


  if (MO.isReg() && MO.getReg().isVirtual())

    MI = MRI.getUniqueVRegDef(MO.getReg());

  // And it needs to be in the trace (otherwise, it won't have a depth).

  if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)

    return false;

  // Must only used by the user we combine with.

  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))

    return false;


  if (CheckZeroReg) {

    assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&

           MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&

           MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");

    // The third input reg must be zero.

    if (MI->getOperand(3).getReg() != ZeroReg)

      return false;

  }


  if (isCombineInstrSettingFlag(CombineOpc) &&

      MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)

    return false;


  return true;

}


//

// Is \param MO defined by an integer multiply and can be combined?


static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,

                              unsigned MulOpc, unsigned ZeroReg) {

  return canCombine(MBB, MO, MulOpc, ZeroReg, true);

}


//

// Is \param MO defined by a floating-point multiply and can be combined?


static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,

                               unsigned MulOpc) {

  return canCombine(MBB, MO, MulOpc);

}


// TODO: There are many more machine instruction opcodes to match:

//       1. Other data types (integer, vectors)

//       2. Other math / logic operations (xor, or)

//       3. Other forms of the same operation (intrinsics and other variants)

bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,

                                                   bool Invert) const {

  if (Invert)

    return false;

  switch (Inst.getOpcode()) {

  // == Floating-point types ==

  // -- Floating-point instructions --

  case AArch64::FADDHrr:

  case AArch64::FADDSrr:

  case AArch64::FADDDrr:

  case AArch64::FMULHrr:

  case AArch64::FMULSrr:

  case AArch64::FMULDrr:

  case AArch64::FMULX16:

  case AArch64::FMULX32:

  case AArch64::FMULX64:

  // -- Advanced SIMD instructions --

  case AArch64::FADDv4f16:

  case AArch64::FADDv8f16:

  case AArch64::FADDv2f32:

  case AArch64::FADDv4f32:

  case AArch64::FADDv2f64:

  case AArch64::FMULv4f16:

  case AArch64::FMULv8f16:

  case AArch64::FMULv2f32:

  case AArch64::FMULv4f32:

  case AArch64::FMULv2f64:

  case AArch64::FMULXv4f16:

  case AArch64::FMULXv8f16:

  case AArch64::FMULXv2f32:

  case AArch64::FMULXv4f32:

  case AArch64::FMULXv2f64:

  // -- SVE instructions --

  // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX

  // in the SVE instruction set (though there are predicated ones).

  case AArch64::FADD_ZZZ_H:

  case AArch64::FADD_ZZZ_S:

  case AArch64::FADD_ZZZ_D:

  case AArch64::FMUL_ZZZ_H:

  case AArch64::FMUL_ZZZ_S:

  case AArch64::FMUL_ZZZ_D:

    return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&

           Inst.getFlag(MachineInstr::MIFlag::FmNsz);


  // == Integer types ==

  // -- Base instructions --

  // Opcodes MULWrr and MULXrr don't exist because

  // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of

  // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.

  // The machine-combiner does not support three-source-operands machine

  // instruction. So we cannot reassociate MULs.

  case AArch64::ADDWrr:

  case AArch64::ADDXrr:

  case AArch64::ANDWrr:

  case AArch64::ANDXrr:

  case AArch64::ORRWrr:

  case AArch64::ORRXrr:

  case AArch64::EORWrr:

  case AArch64::EORXrr:

  case AArch64::EONWrr:

  case AArch64::EONXrr:

  // -- Advanced SIMD instructions --

  // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL

  // in the Advanced SIMD instruction set.

  case AArch64::ADDv8i8:

  case AArch64::ADDv16i8:

  case AArch64::ADDv4i16:

  case AArch64::ADDv8i16:

  case AArch64::ADDv2i32:

  case AArch64::ADDv4i32:

  case AArch64::ADDv1i64:

  case AArch64::ADDv2i64:

  case AArch64::MULv8i8:

  case AArch64::MULv16i8:

  case AArch64::MULv4i16:

  case AArch64::MULv8i16:

  case AArch64::MULv2i32:

  case AArch64::MULv4i32:

  case AArch64::ANDv8i8:

  case AArch64::ANDv16i8:

  case AArch64::ORRv8i8:

  case AArch64::ORRv16i8:

  case AArch64::EORv8i8:

  case AArch64::EORv16i8:

  // -- SVE instructions --

  case AArch64::ADD_ZZZ_B:

  case AArch64::ADD_ZZZ_H:

  case AArch64::ADD_ZZZ_S:

  case AArch64::ADD_ZZZ_D:

  case AArch64::MUL_ZZZ_B:

  case AArch64::MUL_ZZZ_H:

  case AArch64::MUL_ZZZ_S:

  case AArch64::MUL_ZZZ_D:

  case AArch64::AND_ZZZ:

  case AArch64::ORR_ZZZ:

  case AArch64::EOR_ZZZ:

    return true;


  default:

    return false;

  }

}


/// Find instructions that can be turned into madd.


static bool getMaddPatterns(MachineInstr &Root,

                            SmallVectorImpl<unsigned> &Patterns) {

  unsigned Opc = Root.getOpcode();

  MachineBasicBlock &MBB = *Root.getParent();

  bool Found = false;


  if (!isCombineInstrCandidate(Opc))

    return false;

  if (isCombineInstrSettingFlag(Opc)) {

    int Cmp_NZCV =

        Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);

    // When NZCV is live bail out.

    if (Cmp_NZCV == -1)

      return false;

    unsigned NewOpc = convertToNonFlagSettingOpc(Root);

    // When opcode can't change bail out.

    // CHECKME: do we miss any cases for opcode conversion?

    if (NewOpc == Opc)

      return false;

    Opc = NewOpc;

  }


  auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,

                      unsigned Pattern) {

    if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {

      Patterns.push_back(Pattern);

      Found = true;

    }

  };


  auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {

    if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {

      Patterns.push_back(Pattern);

      Found = true;

    }

  };


  typedef AArch64MachineCombinerPattern MCP;


  switch (Opc) {

  default:

    break;

  case AArch64::ADDWrr:

    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&

           "ADDWrr does not have register operands");

    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);

    setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);

    break;

  case AArch64::ADDXrr:

    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);

    setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);

    break;

  case AArch64::SUBWrr:

    setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);

    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);

    break;

  case AArch64::SUBXrr:

    setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);

    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);

    break;

  case AArch64::ADDWri:

    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);

    break;

  case AArch64::ADDXri:

    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);

    break;

  case AArch64::SUBWri:

    setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);

    break;

  case AArch64::SUBXri:

    setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);

    break;

  case AArch64::ADDv8i8:

    setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);

    setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);

    break;

  case AArch64::ADDv16i8:

    setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);

    setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);

    break;

  case AArch64::ADDv4i16:

    setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);

    setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);

    setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);

    setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);

    break;

  case AArch64::ADDv8i16:

    setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);

    setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);

    setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);

    setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);

    break;

  case AArch64::ADDv2i32:

    setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);

    setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);

    setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);

    setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);

    break;

  case AArch64::ADDv4i32:

    setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);

    setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);

    setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);

    setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);

    break;

  case AArch64::SUBv8i8:

    setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);

    setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);

    break;

  case AArch64::SUBv16i8:

    setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);

    setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);

    break;

  case AArch64::SUBv4i16:

    setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);

    setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);

    setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);

    setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);

    break;

  case AArch64::SUBv8i16:

    setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);

    setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);

    setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);

    setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);

    break;

  case AArch64::SUBv2i32:

    setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);

    setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);

    setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);

    setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);

    break;

  case AArch64::SUBv4i32:

    setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);

    setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);

    setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);

    setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);

    break;

  }

  return Found;

}


bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {

  switch (Opcode) {

  default:

    break;

  case AArch64::UABALB_ZZZ_D:

  case AArch64::UABALB_ZZZ_H:

  case AArch64::UABALB_ZZZ_S:

  case AArch64::UABALT_ZZZ_D:

  case AArch64::UABALT_ZZZ_H:

  case AArch64::UABALT_ZZZ_S:

  case AArch64::SABALB_ZZZ_D:

  case AArch64::SABALB_ZZZ_S:

  case AArch64::SABALB_ZZZ_H:

  case AArch64::SABALT_ZZZ_D:

  case AArch64::SABALT_ZZZ_S:

  case AArch64::SABALT_ZZZ_H:

  case AArch64::UABALv16i8_v8i16:

  case AArch64::UABALv2i32_v2i64:

  case AArch64::UABALv4i16_v4i32:

  case AArch64::UABALv4i32_v2i64:

  case AArch64::UABALv8i16_v4i32:

  case AArch64::UABALv8i8_v8i16:

  case AArch64::UABAv16i8:

  case AArch64::UABAv2i32:

  case AArch64::UABAv4i16:

  case AArch64::UABAv4i32:

  case AArch64::UABAv8i16:

  case AArch64::UABAv8i8:

  case AArch64::SABALv16i8_v8i16:

  case AArch64::SABALv2i32_v2i64:

  case AArch64::SABALv4i16_v4i32:

  case AArch64::SABALv4i32_v2i64:

  case AArch64::SABALv8i16_v4i32:

  case AArch64::SABALv8i8_v8i16:

  case AArch64::SABAv16i8:

  case AArch64::SABAv2i32:

  case AArch64::SABAv4i16:

  case AArch64::SABAv4i32:

  case AArch64::SABAv8i16:

  case AArch64::SABAv8i8:

    return true;

  }


  return false;

}


unsigned AArch64InstrInfo::getAccumulationStartOpcode(

    unsigned AccumulationOpcode) const {

  switch (AccumulationOpcode) {

  default:

    llvm_unreachable("Unsupported accumulation Opcode!");

  case AArch64::UABALB_ZZZ_D:

    return AArch64::UABDLB_ZZZ_D;

  case AArch64::UABALB_ZZZ_H:

    return AArch64::UABDLB_ZZZ_H;

  case AArch64::UABALB_ZZZ_S:

    return AArch64::UABDLB_ZZZ_S;

  case AArch64::UABALT_ZZZ_D:

    return AArch64::UABDLT_ZZZ_D;

  case AArch64::UABALT_ZZZ_H:

    return AArch64::UABDLT_ZZZ_H;

  case AArch64::UABALT_ZZZ_S:

    return AArch64::UABDLT_ZZZ_S;

  case AArch64::UABALv16i8_v8i16:

    return AArch64::UABDLv16i8_v8i16;

  case AArch64::UABALv2i32_v2i64:

    return AArch64::UABDLv2i32_v2i64;

  case AArch64::UABALv4i16_v4i32:

    return AArch64::UABDLv4i16_v4i32;

  case AArch64::UABALv4i32_v2i64:

    return AArch64::UABDLv4i32_v2i64;

  case AArch64::UABALv8i16_v4i32:

    return AArch64::UABDLv8i16_v4i32;

  case AArch64::UABALv8i8_v8i16:

    return AArch64::UABDLv8i8_v8i16;

  case AArch64::UABAv16i8:

    return AArch64::UABDv16i8;

  case AArch64::UABAv2i32:

    return AArch64::UABDv2i32;

  case AArch64::UABAv4i16:

    return AArch64::UABDv4i16;

  case AArch64::UABAv4i32:

    return AArch64::UABDv4i32;

  case AArch64::UABAv8i16:

    return AArch64::UABDv8i16;

  case AArch64::UABAv8i8:

    return AArch64::UABDv8i8;

  case AArch64::SABALB_ZZZ_D:

    return AArch64::SABDLB_ZZZ_D;

  case AArch64::SABALB_ZZZ_S:

    return AArch64::SABDLB_ZZZ_S;

  case AArch64::SABALB_ZZZ_H:

    return AArch64::SABDLB_ZZZ_H;

  case AArch64::SABALT_ZZZ_D:

    return AArch64::SABDLT_ZZZ_D;

  case AArch64::SABALT_ZZZ_S:

    return AArch64::SABDLT_ZZZ_S;

  case AArch64::SABALT_ZZZ_H:

    return AArch64::SABDLT_ZZZ_H;

  case AArch64::SABALv16i8_v8i16:

    return AArch64::SABDLv16i8_v8i16;

  case AArch64::SABALv2i32_v2i64:

    return AArch64::SABDLv2i32_v2i64;

  case AArch64::SABALv4i16_v4i32:

    return AArch64::SABDLv4i16_v4i32;

  case AArch64::SABALv4i32_v2i64:

    return AArch64::SABDLv4i32_v2i64;

  case AArch64::SABALv8i16_v4i32:

    return AArch64::SABDLv8i16_v4i32;

  case AArch64::SABALv8i8_v8i16:

    return AArch64::SABDLv8i8_v8i16;

  case AArch64::SABAv16i8:

    return AArch64::SABDv16i8;

  case AArch64::SABAv2i32:

    return AArch64::SABAv2i32;

  case AArch64::SABAv4i16:

    return AArch64::SABDv4i16;

  case AArch64::SABAv4i32:

    return AArch64::SABDv4i32;

  case AArch64::SABAv8i16:

    return AArch64::SABDv8i16;

  case AArch64::SABAv8i8:

    return AArch64::SABDv8i8;

  }

}


/// Floating-Point Support


/// Find instructions that can be turned into madd.


static bool getFMAPatterns(MachineInstr &Root,

                           SmallVectorImpl<unsigned> &Patterns) {


  if (!isCombineInstrCandidateFP(Root))

    return false;


  MachineBasicBlock &MBB = *Root.getParent();

  bool Found = false;


  auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {

    if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {

      Patterns.push_back(Pattern);

      return true;

    }

    return false;

  };


  typedef AArch64MachineCombinerPattern MCP;


  switch (Root.getOpcode()) {

  default:

    assert(false && "Unsupported FP instruction in combiner\n");

    break;

  case AArch64::FADDHrr:

    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&

           "FADDHrr does not have register operands");


    Found  = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);

    Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);

    break;

  case AArch64::FADDSrr:

    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&

           "FADDSrr does not have register operands");


    Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||

             Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);


    Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||

             Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);

    break;

  case AArch64::FADDDrr:

    Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||

             Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);


    Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||

             Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);

    break;

  case AArch64::FADDv4f16:

    Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||

             Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);


    Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||

             Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);

    break;

  case AArch64::FADDv8f16:

    Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||

             Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);


    Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||

             Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);

    break;

  case AArch64::FADDv2f32:

    Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||

             Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);


    Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||

             Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);

    break;

  case AArch64::FADDv2f64:

    Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||

             Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);


    Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||

             Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);

    break;

  case AArch64::FADDv4f32:

    Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||

             Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);


    Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||

             Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);

    break;

  case AArch64::FSUBHrr:

    Found  = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);

    Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);

    Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);

    break;

  case AArch64::FSUBSrr:

    Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);


    Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||

             Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);


    Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);

    break;

  case AArch64::FSUBDrr:

    Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);


    Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||

             Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);


    Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);

    break;

  case AArch64::FSUBv4f16:

    Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||

             Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);


    Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||

             Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);

    break;

  case AArch64::FSUBv8f16:

    Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||

             Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);


    Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||

             Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);

    break;

  case AArch64::FSUBv2f32:

    Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||

             Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);


    Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||

             Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);

    break;

  case AArch64::FSUBv2f64:

    Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||

             Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);


    Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||

             Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);

    break;

  case AArch64::FSUBv4f32:

    Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||

             Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);


    Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||

             Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);

    break;

  }

  return Found;

}


static bool getFMULPatterns(MachineInstr &Root,

                            SmallVectorImpl<unsigned> &Patterns) {

  MachineBasicBlock &MBB = *Root.getParent();

  bool Found = false;


  auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {

    MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();

    MachineOperand &MO = Root.getOperand(Operand);

    MachineInstr *MI = nullptr;

    if (MO.isReg() && MO.getReg().isVirtual())

      MI = MRI.getUniqueVRegDef(MO.getReg());

    // Ignore No-op COPYs in FMUL(COPY(DUP(..)))

    if (MI && MI->getOpcode() == TargetOpcode::COPY &&

        MI->getOperand(1).getReg().isVirtual())

      MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());

    if (MI && MI->getOpcode() == Opcode) {

      Patterns.push_back(Pattern);

      return true;

    }

    return false;

  };


  typedef AArch64MachineCombinerPattern MCP;


  switch (Root.getOpcode()) {

  default:

    return false;

  case AArch64::FMULv2f32:

    Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);

    Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);

    break;

  case AArch64::FMULv2f64:

    Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);

    Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);

    break;

  case AArch64::FMULv4f16:

    Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);

    Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);

    break;

  case AArch64::FMULv4f32:

    Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);

    Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);

    break;

  case AArch64::FMULv8f16:

    Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);

    Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);

    break;

  }


  return Found;

}


static bool getFNEGPatterns(MachineInstr &Root,

                            SmallVectorImpl<unsigned> &Patterns) {

  unsigned Opc = Root.getOpcode();

  MachineBasicBlock &MBB = *Root.getParent();

  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();


  auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {

    MachineOperand &MO = Root.getOperand(1);

    MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());

    if (MI != nullptr && (MI->getOpcode() == Opcode) &&

        MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&

        Root.getFlag(MachineInstr::MIFlag::FmContract) &&

        Root.getFlag(MachineInstr::MIFlag::FmNsz) &&

        MI->getFlag(MachineInstr::MIFlag::FmContract) &&

        MI->getFlag(MachineInstr::MIFlag::FmNsz)) {

      Patterns.push_back(Pattern);

      return true;

    }

    return false;

  };


  switch (Opc) {

  default:

    break;

  case AArch64::FNEGDr:

    return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);

  case AArch64::FNEGSr:

    return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);

  }


  return false;

}


/// Return true when a code sequence can improve throughput. It

/// should be called only for instructions in loops.

/// \param Pattern - combiner pattern


bool AArch64InstrInfo::isThroughputPattern(unsigned Pattern) const {

  switch (Pattern) {

  default:

    break;

  case AArch64MachineCombinerPattern::FMULADDH_OP1:

  case AArch64MachineCombinerPattern::FMULADDH_OP2:

  case AArch64MachineCombinerPattern::FMULSUBH_OP1:

  case AArch64MachineCombinerPattern::FMULSUBH_OP2:

  case AArch64MachineCombinerPattern::FMULADDS_OP1:

  case AArch64MachineCombinerPattern::FMULADDS_OP2:

  case AArch64MachineCombinerPattern::FMULSUBS_OP1:

  case AArch64MachineCombinerPattern::FMULSUBS_OP2:

  case AArch64MachineCombinerPattern::FMULADDD_OP1:

  case AArch64MachineCombinerPattern::FMULADDD_OP2:

  case AArch64MachineCombinerPattern::FMULSUBD_OP1:

  case AArch64MachineCombinerPattern::FMULSUBD_OP2:

  case AArch64MachineCombinerPattern::FNMULSUBH_OP1:

  case AArch64MachineCombinerPattern::FNMULSUBS_OP1:

  case AArch64MachineCombinerPattern::FNMULSUBD_OP1:

  case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLAv4f16_OP2:

  case AArch64MachineCombinerPattern::FMLAv4f16_OP1:

  case AArch64MachineCombinerPattern::FMLAv8f16_OP1:

  case AArch64MachineCombinerPattern::FMLAv8f16_OP2:

  case AArch64MachineCombinerPattern::FMLAv2f32_OP2:

  case AArch64MachineCombinerPattern::FMLAv2f32_OP1:

  case AArch64MachineCombinerPattern::FMLAv2f64_OP1:

  case AArch64MachineCombinerPattern::FMLAv2f64_OP2:

  case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLAv4f32_OP1:

  case AArch64MachineCombinerPattern::FMLAv4f32_OP2:

  case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLSv4f16_OP1:

  case AArch64MachineCombinerPattern::FMLSv4f16_OP2:

  case AArch64MachineCombinerPattern::FMLSv8f16_OP1:

  case AArch64MachineCombinerPattern::FMLSv8f16_OP2:

  case AArch64MachineCombinerPattern::FMLSv2f32_OP2:

  case AArch64MachineCombinerPattern::FMLSv2f64_OP2:

  case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLSv4f32_OP2:

  case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2:

  case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2:

  case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2:

  case AArch64MachineCombinerPattern::MULADDv8i8_OP1:

  case AArch64MachineCombinerPattern::MULADDv8i8_OP2:

  case AArch64MachineCombinerPattern::MULADDv16i8_OP1:

  case AArch64MachineCombinerPattern::MULADDv16i8_OP2:

  case AArch64MachineCombinerPattern::MULADDv4i16_OP1:

  case AArch64MachineCombinerPattern::MULADDv4i16_OP2:

  case AArch64MachineCombinerPattern::MULADDv8i16_OP1:

  case AArch64MachineCombinerPattern::MULADDv8i16_OP2:

  case AArch64MachineCombinerPattern::MULADDv2i32_OP1:

  case AArch64MachineCombinerPattern::MULADDv2i32_OP2:

  case AArch64MachineCombinerPattern::MULADDv4i32_OP1:

  case AArch64MachineCombinerPattern::MULADDv4i32_OP2:

  case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:

  case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:

  case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:

  case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:

  case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:

  case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:

  case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:

  case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:

  case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:

  case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:

  case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:

  case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:

  case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:

  case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:

  case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:

  case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:

  case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:

  case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:

  case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:

  case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:

  case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:

  case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:

  case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:

  case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:

  case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:

  case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:

  case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:

  case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:

    return true;

  } // end switch (Pattern)

  return false;

}


/// Find other MI combine patterns.


static bool getMiscPatterns(MachineInstr &Root,

                            SmallVectorImpl<unsigned> &Patterns) {

  // A - (B + C)  ==>   (A - B) - C  or  (A - C) - B

  unsigned Opc = Root.getOpcode();

  MachineBasicBlock &MBB = *Root.getParent();


  switch (Opc) {

  case AArch64::SUBWrr:

  case AArch64::SUBSWrr:

  case AArch64::SUBXrr:

  case AArch64::SUBSXrr:

    // Found candidate root.

    break;

  default:

    return false;

  }


  if (isCombineInstrSettingFlag(Opc) &&

      Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==

          -1)

    return false;


  if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||

      canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||

      canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||

      canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {

    Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP1);

    Patterns.push_back(AArch64MachineCombinerPattern::SUBADD_OP2);

    return true;

  }


  return false;

}


/// Check if the given instruction forms a gather load pattern that can be

/// optimized for better Memory-Level Parallelism (MLP). This function

/// identifies chains of NEON lane load instructions that load data from

/// different memory addresses into individual lanes of a 128-bit vector

/// register, then attempts to split the pattern into parallel loads to break

/// the serial dependency between instructions.

///

/// Pattern Matched:

///   Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->

///   LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)

///

/// Transformed Into:

///   Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64

///   to combine the results, enabling better memory-level parallelism.

///

/// Supported Element Types:

///   - 32-bit elements (LD1i32, 4 lanes total)

///   - 16-bit elements (LD1i16, 8 lanes total)

///   - 8-bit elements (LD1i8, 16 lanes total)


static bool getGatherLanePattern(MachineInstr &Root,

                                 SmallVectorImpl<unsigned> &Patterns,

                                 unsigned LoadLaneOpCode, unsigned NumLanes) {

  const MachineFunction *MF = Root.getMF();


  // Early exit if optimizing for size.

  if (MF->getFunction().hasMinSize())

    return false;


  const MachineRegisterInfo &MRI = MF->getRegInfo();

  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();


  // The root of the pattern must load into the last lane of the vector.

  if (Root.getOperand(2).getImm() != NumLanes - 1)

    return false;


  // Check that we have load into all lanes except lane 0.

  // For each load we also want to check that:

  // 1. It has a single non-debug use (since we will be replacing the virtual

  // register)

  // 2. That the addressing mode only uses a single pointer operand

  auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());

  auto Range = llvm::seq<unsigned>(1, NumLanes - 1);

  SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());

  SmallVector<const MachineInstr *, 16> LoadInstrs;

  while (!RemainingLanes.empty() && CurrInstr &&

         CurrInstr->getOpcode() == LoadLaneOpCode &&

         MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&

         CurrInstr->getNumOperands() == 4) {

    RemainingLanes.erase(CurrInstr->getOperand(2).getImm());

    LoadInstrs.push_back(CurrInstr);

    CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());

  }


  // Check that we have found a match for lanes N-1.. 1.

  if (!RemainingLanes.empty())

    return false;


  // Match the SUBREG_TO_REG sequence.

  if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)

    return false;


  // Verify that the subreg to reg loads an integer into the first lane.

  auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();

  unsigned SingleLaneSizeInBits = 128 / NumLanes;

  if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)

    return false;


  // Verify that it also has a single non debug use.

  if (!MRI.hasOneNonDBGUse(Lane0LoadReg))

    return false;


  LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));


  // If there is any chance of aliasing, do not apply the pattern.

  // Walk backward through the MBB starting from Root.

  // Exit early if we've encountered all load instructions or hit the search

  // limit.

  auto MBBItr = Root.getIterator();

  unsigned RemainingSteps = GatherOptSearchLimit;

  SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;

  RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());

  const MachineBasicBlock *MBB = Root.getParent();


  for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&

         !RemainingLoadInstrs.empty();

       --MBBItr, --RemainingSteps) {

    const MachineInstr &CurrInstr = *MBBItr;


    // Remove this instruction from remaining loads if it's one we're tracking.

    RemainingLoadInstrs.erase(&CurrInstr);


    // Check for potential aliasing with any of the load instructions to

    // optimize.

    if (CurrInstr.isLoadFoldBarrier())

      return false;

  }


  // If we hit the search limit without finding all load instructions,

  // don't match the pattern.

  if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())

    return false;


  switch (NumLanes) {

  case 4:

    Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i32);

    break;

  case 8:

    Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i16);

    break;

  case 16:

    Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i8);

    break;

  default:

    llvm_unreachable("Got bad number of lanes for gather pattern.");

  }


  return true;

}


/// Search for patterns of LD instructions we can optimize.


static bool getLoadPatterns(MachineInstr &Root,

                            SmallVectorImpl<unsigned> &Patterns) {


  // The pattern searches for loads into single lanes.

  switch (Root.getOpcode()) {

  case AArch64::LD1i32:

    return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);

  case AArch64::LD1i16:

    return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);

  case AArch64::LD1i8:

    return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);

  default:

    return false;

  }

}


/// Generate optimized instruction sequence for gather load patterns to improve

/// Memory-Level Parallelism (MLP). This function transforms a chain of

/// sequential NEON lane loads into parallel vector loads that can execute

/// concurrently.

static void


generateGatherLanePattern(MachineInstr &Root,

                          SmallVectorImpl<MachineInstr *> &InsInstrs,

                          SmallVectorImpl<MachineInstr *> &DelInstrs,

                          DenseMap<Register, unsigned> &InstrIdxForVirtReg,

                          unsigned Pattern, unsigned NumLanes) {

  MachineFunction &MF = *Root.getParent()->getParent();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();


  // Gather the initial load instructions to build the pattern.

  SmallVector<MachineInstr *, 16> LoadToLaneInstrs;

  MachineInstr *CurrInstr = &Root;

  for (unsigned i = 0; i < NumLanes - 1; ++i) {

    LoadToLaneInstrs.push_back(CurrInstr);

    CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());

  }


  // Sort the load instructions according to the lane.

  llvm::sort(LoadToLaneInstrs,

             [](const MachineInstr *A, const MachineInstr *B) {

               return A->getOperand(2).getImm() > B->getOperand(2).getImm();

             });


  MachineInstr *SubregToReg = CurrInstr;

  LoadToLaneInstrs.push_back(

      MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));

  auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);


  const TargetRegisterClass *FPR128RegClass =

      MRI.getRegClass(Root.getOperand(0).getReg());


  // Helper lambda to create a LD1 instruction.

  auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,

                                  Register SrcRegister, unsigned Lane,

                                  Register OffsetRegister,

                                  bool OffsetRegisterKillState) {

    auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);

    MachineInstrBuilder LoadIndexIntoRegister =

        BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),

                NewRegister)

            .addReg(SrcRegister)

            .addImm(Lane)

            .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));

    InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));

    InsInstrs.push_back(LoadIndexIntoRegister);

    return NewRegister;

  };


  // Helper to create load instruction based on the NumLanes in the NEON

  // register we are rewriting.

  auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,

                                  Register OffsetReg,

                                  bool KillState) -> MachineInstrBuilder {

    unsigned Opcode;

    switch (NumLanes) {

    case 4:

      Opcode = AArch64::LDRSui;

      break;

    case 8:

      Opcode = AArch64::LDRHui;

      break;

    case 16:

      Opcode = AArch64::LDRBui;

      break;

    default:

      llvm_unreachable(

          "Got unsupported number of lanes in machine-combiner gather pattern");

    }

    // Immediate offset load

    return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)

        .addReg(OffsetReg)

        .addImm(0);

  };


  // Load the remaining lanes into register 0.

  auto LanesToLoadToReg0 =

      llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,

                       LoadToLaneInstrsAscending.begin() + NumLanes / 2);

  Register PrevReg = SubregToReg->getOperand(0).getReg();

  for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {

    const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);

    PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,

                                   OffsetRegOperand.getReg(),

                                   OffsetRegOperand.isKill());

    DelInstrs.push_back(LoadInstr);

  }

  Register LastLoadReg0 = PrevReg;


  // First load into register 1. Perform an integer load to zero out the upper

  // lanes in a single instruction.

  MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();

  MachineInstr *OriginalSplitLoad =

      *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);

  Register DestRegForMiddleIndex = MRI.createVirtualRegister(

      MRI.getRegClass(Lane0Load->getOperand(0).getReg()));


  const MachineOperand &OriginalSplitToLoadOffsetOperand =

      OriginalSplitLoad->getOperand(3);

  MachineInstrBuilder MiddleIndexLoadInstr =

      CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,

                           OriginalSplitToLoadOffsetOperand.getReg(),

                           OriginalSplitToLoadOffsetOperand.isKill());


  InstrIdxForVirtReg.insert(

      std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));

  InsInstrs.push_back(MiddleIndexLoadInstr);

  DelInstrs.push_back(OriginalSplitLoad);


  // Subreg To Reg instruction for register 1.

  Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);

  unsigned SubregType;

  switch (NumLanes) {

  case 4:

    SubregType = AArch64::ssub;

    break;

  case 8:

    SubregType = AArch64::hsub;

    break;

  case 16:

    SubregType = AArch64::bsub;

    break;

  default:

    llvm_unreachable(

        "Got invalid NumLanes for machine-combiner gather pattern");

  }


  auto SubRegToRegInstr =

      BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),

              DestRegForSubregToReg)

          .addImm(0)

          .addReg(DestRegForMiddleIndex, getKillRegState(true))

          .addImm(SubregType);

  InstrIdxForVirtReg.insert(

      std::make_pair(DestRegForSubregToReg, InsInstrs.size()));

  InsInstrs.push_back(SubRegToRegInstr);


  // Load remaining lanes into register 1.

  auto LanesToLoadToReg1 =

      llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,

                       LoadToLaneInstrsAscending.end());

  PrevReg = SubRegToRegInstr->getOperand(0).getReg();

  for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {

    const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);

    PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,

                                   OffsetRegOperand.getReg(),

                                   OffsetRegOperand.isKill());


    // Do not add the last reg to DelInstrs - it will be removed later.

    if (Index == NumLanes / 2 - 2) {

      break;

    }

    DelInstrs.push_back(LoadInstr);

  }

  Register LastLoadReg1 = PrevReg;


  // Create the final zip instruction to combine the results.

  MachineInstrBuilder ZipInstr =

      BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),

              Root.getOperand(0).getReg())

          .addReg(LastLoadReg0)

          .addReg(LastLoadReg1);

  InsInstrs.push_back(ZipInstr);

}


CombinerObjective


AArch64InstrInfo::getCombinerObjective(unsigned Pattern) const {

  switch (Pattern) {

  case AArch64MachineCombinerPattern::SUBADD_OP1:

  case AArch64MachineCombinerPattern::SUBADD_OP2:

  case AArch64MachineCombinerPattern::GATHER_LANE_i32:

  case AArch64MachineCombinerPattern::GATHER_LANE_i16:

  case AArch64MachineCombinerPattern::GATHER_LANE_i8:

    return CombinerObjective::MustReduceDepth;

  default:

    return TargetInstrInfo::getCombinerObjective(Pattern);

  }

}


/// Return true when there is potentially a faster code sequence for an

/// instruction chain ending in \p Root. All potential patterns are listed in

/// the \p Pattern vector. Pattern should be sorted in priority order since the

/// pattern evaluator stops checking as soon as it finds a faster sequence.


bool AArch64InstrInfo::getMachineCombinerPatterns(

    MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,

    bool DoRegPressureReduce) const {

  // Integer patterns

  if (getMaddPatterns(Root, Patterns))

    return true;

  // Floating point patterns

  if (getFMULPatterns(Root, Patterns))

    return true;

  if (getFMAPatterns(Root, Patterns))

    return true;

  if (getFNEGPatterns(Root, Patterns))

    return true;


  // Other patterns

  if (getMiscPatterns(Root, Patterns))

    return true;


  // Load patterns

  if (getLoadPatterns(Root, Patterns))

    return true;


  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,

                                                     DoRegPressureReduce);

}


enum class FMAInstKind { Default, Indexed, Accumulator };

/// genFusedMultiply - Generate fused multiply instructions.

/// This function supports both integer and floating point instructions.

/// A typical example:

///  F|MUL I=A,B,0

///  F|ADD R,I,C

///  ==> F|MADD R,A,B,C

/// \param MF Containing MachineFunction

/// \param MRI Register information

/// \param TII Target information

/// \param Root is the F|ADD instruction

/// \param [out] InsInstrs is a vector of machine instructions and will

/// contain the generated madd instruction

/// \param IdxMulOpd is index of operand in Root that is the result of

/// the F|MUL. In the example above IdxMulOpd is 1.

/// \param MaddOpc the opcode fo the f|madd instruction

/// \param RC Register class of operands

/// \param kind of fma instruction (addressing mode) to be generated

/// \param ReplacedAddend is the result register from the instruction

/// replacing the non-combined operand, if any.

static MachineInstr *


genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,

                 const TargetInstrInfo *TII, MachineInstr &Root,

                 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,

                 unsigned MaddOpc, const TargetRegisterClass *RC,

                 FMAInstKind kind = FMAInstKind::Default,

                 const Register *ReplacedAddend = nullptr) {

  assert(IdxMulOpd == 1 || IdxMulOpd == 2);


  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;

  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());

  Register ResultReg = Root.getOperand(0).getReg();

  Register SrcReg0 = MUL->getOperand(1).getReg();

  bool Src0IsKill = MUL->getOperand(1).isKill();

  Register SrcReg1 = MUL->getOperand(2).getReg();

  bool Src1IsKill = MUL->getOperand(2).isKill();


  Register SrcReg2;

  bool Src2IsKill;

  if (ReplacedAddend) {

    // If we just generated a new addend, we must be it's only use.

    SrcReg2 = *ReplacedAddend;

    Src2IsKill = true;

  } else {

    SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();

    Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();

  }


  if (ResultReg.isVirtual())

    MRI.constrainRegClass(ResultReg, RC);

  if (SrcReg0.isVirtual())

    MRI.constrainRegClass(SrcReg0, RC);

  if (SrcReg1.isVirtual())

    MRI.constrainRegClass(SrcReg1, RC);

  if (SrcReg2.isVirtual())

    MRI.constrainRegClass(SrcReg2, RC);


  MachineInstrBuilder MIB;

  if (kind == FMAInstKind::Default)

    MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)

              .addReg(SrcReg0, getKillRegState(Src0IsKill))

              .addReg(SrcReg1, getKillRegState(Src1IsKill))

              .addReg(SrcReg2, getKillRegState(Src2IsKill));

  else if (kind == FMAInstKind::Indexed)

    MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)

              .addReg(SrcReg2, getKillRegState(Src2IsKill))

              .addReg(SrcReg0, getKillRegState(Src0IsKill))

              .addReg(SrcReg1, getKillRegState(Src1IsKill))

              .addImm(MUL->getOperand(3).getImm());

  else if (kind == FMAInstKind::Accumulator)

    MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)

              .addReg(SrcReg2, getKillRegState(Src2IsKill))

              .addReg(SrcReg0, getKillRegState(Src0IsKill))

              .addReg(SrcReg1, getKillRegState(Src1IsKill));

  else

    assert(false && "Invalid FMA instruction kind \n");

  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)

  InsInstrs.push_back(MIB);

  return MUL;

}


static MachineInstr *


genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI,

               const TargetInstrInfo *TII, MachineInstr &Root,

               SmallVectorImpl<MachineInstr *> &InsInstrs) {

  MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());


  unsigned Opc = 0;

  const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());

  if (AArch64::FPR32RegClass.hasSubClassEq(RC))

    Opc = AArch64::FNMADDSrrr;

  else if (AArch64::FPR64RegClass.hasSubClassEq(RC))

    Opc = AArch64::FNMADDDrrr;

  else

    return nullptr;


  Register ResultReg = Root.getOperand(0).getReg();

  Register SrcReg0 = MAD->getOperand(1).getReg();

  Register SrcReg1 = MAD->getOperand(2).getReg();

  Register SrcReg2 = MAD->getOperand(3).getReg();

  bool Src0IsKill = MAD->getOperand(1).isKill();

  bool Src1IsKill = MAD->getOperand(2).isKill();

  bool Src2IsKill = MAD->getOperand(3).isKill();

  if (ResultReg.isVirtual())

    MRI.constrainRegClass(ResultReg, RC);

  if (SrcReg0.isVirtual())

    MRI.constrainRegClass(SrcReg0, RC);

  if (SrcReg1.isVirtual())

    MRI.constrainRegClass(SrcReg1, RC);

  if (SrcReg2.isVirtual())

    MRI.constrainRegClass(SrcReg2, RC);


  MachineInstrBuilder MIB =

      BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)

          .addReg(SrcReg0, getKillRegState(Src0IsKill))

          .addReg(SrcReg1, getKillRegState(Src1IsKill))

          .addReg(SrcReg2, getKillRegState(Src2IsKill));

  InsInstrs.push_back(MIB);


  return MAD;

}


/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)

static MachineInstr *


genIndexedMultiply(MachineInstr &Root,

                   SmallVectorImpl<MachineInstr *> &InsInstrs,

                   unsigned IdxDupOp, unsigned MulOpc,

                   const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {

  assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&

         "Invalid index of FMUL operand");


  MachineFunction &MF = *Root.getMF();

  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();


  MachineInstr *Dup =

      MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());


  if (Dup->getOpcode() == TargetOpcode::COPY)

    Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());


  Register DupSrcReg = Dup->getOperand(1).getReg();

  MRI.clearKillFlags(DupSrcReg);

  MRI.constrainRegClass(DupSrcReg, RC);


  unsigned DupSrcLane = Dup->getOperand(2).getImm();


  unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;

  MachineOperand &MulOp = Root.getOperand(IdxMulOp);


  Register ResultReg = Root.getOperand(0).getReg();


  MachineInstrBuilder MIB;

  MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)

            .add(MulOp)

            .addReg(DupSrcReg)

            .addImm(DupSrcLane);


  InsInstrs.push_back(MIB);

  return &Root;

}


/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate

/// instructions.

///

/// \see genFusedMultiply


static MachineInstr *genFusedMultiplyAcc(

    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,

    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,

    unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {

  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,

                          FMAInstKind::Accumulator);

}


/// genNeg - Helper to generate an intermediate negation of the second operand

/// of Root


static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,

                       const TargetInstrInfo *TII, MachineInstr &Root,

                       SmallVectorImpl<MachineInstr *> &InsInstrs,

                       DenseMap<Register, unsigned> &InstrIdxForVirtReg,

                       unsigned MnegOpc, const TargetRegisterClass *RC) {

  Register NewVR = MRI.createVirtualRegister(RC);

  MachineInstrBuilder MIB =

      BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)

          .add(Root.getOperand(2));

  InsInstrs.push_back(MIB);


  assert(InstrIdxForVirtReg.empty());

  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));


  return NewVR;

}


/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate

/// instructions with an additional negation of the accumulator


static MachineInstr *genFusedMultiplyAccNeg(

    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,

    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,

    DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,

    unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {

  assert(IdxMulOpd == 1);


  Register NewVR =

      genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);

  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,

                          FMAInstKind::Accumulator, &NewVR);

}


/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate

/// instructions.

///

/// \see genFusedMultiply


static MachineInstr *genFusedMultiplyIdx(

    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,

    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,

    unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {

  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,

                          FMAInstKind::Indexed);

}


/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate

/// instructions with an additional negation of the accumulator


static MachineInstr *genFusedMultiplyIdxNeg(

    MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,

    MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,

    DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,

    unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {

  assert(IdxMulOpd == 1);


  Register NewVR =

      genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);


  return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,

                          FMAInstKind::Indexed, &NewVR);

}


/// genMaddR - Generate madd instruction and combine mul and add using

/// an extra virtual register

/// Example - an ADD intermediate needs to be stored in a register:

///   MUL I=A,B,0

///   ADD R,I,Imm

///   ==> ORR  V, ZR, Imm

///   ==> MADD R,A,B,V

/// \param MF Containing MachineFunction

/// \param MRI Register information

/// \param TII Target information

/// \param Root is the ADD instruction

/// \param [out] InsInstrs is a vector of machine instructions and will

/// contain the generated madd instruction

/// \param IdxMulOpd is index of operand in Root that is the result of

/// the MUL. In the example above IdxMulOpd is 1.

/// \param MaddOpc the opcode fo the madd instruction

/// \param VR is a virtual register that holds the value of an ADD operand

/// (V in the example above).

/// \param RC Register class of operands


static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,

                              const TargetInstrInfo *TII, MachineInstr &Root,

                              SmallVectorImpl<MachineInstr *> &InsInstrs,

                              unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,

                              const TargetRegisterClass *RC) {

  assert(IdxMulOpd == 1 || IdxMulOpd == 2);


  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());

  Register ResultReg = Root.getOperand(0).getReg();

  Register SrcReg0 = MUL->getOperand(1).getReg();

  bool Src0IsKill = MUL->getOperand(1).isKill();

  Register SrcReg1 = MUL->getOperand(2).getReg();

  bool Src1IsKill = MUL->getOperand(2).isKill();


  if (ResultReg.isVirtual())

    MRI.constrainRegClass(ResultReg, RC);

  if (SrcReg0.isVirtual())

    MRI.constrainRegClass(SrcReg0, RC);

  if (SrcReg1.isVirtual())

    MRI.constrainRegClass(SrcReg1, RC);

  if (Register::isVirtualRegister(VR))

    MRI.constrainRegClass(VR, RC);


  MachineInstrBuilder MIB =

      BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)

          .addReg(SrcReg0, getKillRegState(Src0IsKill))

          .addReg(SrcReg1, getKillRegState(Src1IsKill))

          .addReg(VR);

  // Insert the MADD

  InsInstrs.push_back(MIB);

  return MUL;

}


/// Do the following transformation

/// A - (B + C)  ==>   (A - B) - C

/// A - (B + C)  ==>   (A - C) - B


static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI,

                             const TargetInstrInfo *TII, MachineInstr &Root,

                             SmallVectorImpl<MachineInstr *> &InsInstrs,

                             SmallVectorImpl<MachineInstr *> &DelInstrs,

                             unsigned IdxOpd1,

                             DenseMap<Register, unsigned> &InstrIdxForVirtReg) {

  assert(IdxOpd1 == 1 || IdxOpd1 == 2);

  unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;

  MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());


  Register ResultReg = Root.getOperand(0).getReg();

  Register RegA = Root.getOperand(1).getReg();

  bool RegAIsKill = Root.getOperand(1).isKill();

  Register RegB = AddMI->getOperand(IdxOpd1).getReg();

  bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();

  Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();

  bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();

  Register NewVR =

      MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));


  unsigned Opcode = Root.getOpcode();

  if (Opcode == AArch64::SUBSWrr)

    Opcode = AArch64::SUBWrr;

  else if (Opcode == AArch64::SUBSXrr)

    Opcode = AArch64::SUBXrr;

  else

    assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&

           "Unexpected instruction opcode.");


  uint32_t Flags = Root.mergeFlagsWith(*AddMI);

  Flags &= ~MachineInstr::NoSWrap;

  Flags &= ~MachineInstr::NoUWrap;


  MachineInstrBuilder MIB1 =

      BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)

          .addReg(RegA, getKillRegState(RegAIsKill))

          .addReg(RegB, getKillRegState(RegBIsKill))

          .setMIFlags(Flags);

  MachineInstrBuilder MIB2 =

      BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)

          .addReg(NewVR, getKillRegState(true))

          .addReg(RegC, getKillRegState(RegCIsKill))

          .setMIFlags(Flags);


  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

  InsInstrs.push_back(MIB1);

  InsInstrs.push_back(MIB2);

  DelInstrs.push_back(AddMI);

  DelInstrs.push_back(&Root);

}


unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(

    unsigned int AccumulatorOpCode) const {

  switch (AccumulatorOpCode) {

  case AArch64::UABALB_ZZZ_D:

  case AArch64::SABALB_ZZZ_D:

  case AArch64::UABALT_ZZZ_D:

  case AArch64::SABALT_ZZZ_D:

    return AArch64::ADD_ZZZ_D;

  case AArch64::UABALB_ZZZ_H:

  case AArch64::SABALB_ZZZ_H:

  case AArch64::UABALT_ZZZ_H:

  case AArch64::SABALT_ZZZ_H:

    return AArch64::ADD_ZZZ_H;

  case AArch64::UABALB_ZZZ_S:

  case AArch64::SABALB_ZZZ_S:

  case AArch64::UABALT_ZZZ_S:

  case AArch64::SABALT_ZZZ_S:

    return AArch64::ADD_ZZZ_S;

  case AArch64::UABALv16i8_v8i16:

  case AArch64::SABALv8i8_v8i16:

  case AArch64::SABAv8i16:

  case AArch64::UABAv8i16:

    return AArch64::ADDv8i16;

  case AArch64::SABALv2i32_v2i64:

  case AArch64::UABALv2i32_v2i64:

  case AArch64::SABALv4i32_v2i64:

    return AArch64::ADDv2i64;

  case AArch64::UABALv4i16_v4i32:

  case AArch64::SABALv4i16_v4i32:

  case AArch64::SABALv8i16_v4i32:

  case AArch64::SABAv4i32:

  case AArch64::UABAv4i32:

    return AArch64::ADDv4i32;

  case AArch64::UABALv4i32_v2i64:

    return AArch64::ADDv2i64;

  case AArch64::UABALv8i16_v4i32:

    return AArch64::ADDv4i32;

  case AArch64::UABALv8i8_v8i16:

  case AArch64::SABALv16i8_v8i16:

    return AArch64::ADDv8i16;

  case AArch64::UABAv16i8:

  case AArch64::SABAv16i8:

    return AArch64::ADDv16i8;

  case AArch64::UABAv4i16:

  case AArch64::SABAv4i16:

    return AArch64::ADDv4i16;

  case AArch64::UABAv2i32:

  case AArch64::SABAv2i32:

    return AArch64::ADDv2i32;

  case AArch64::UABAv8i8:

  case AArch64::SABAv8i8:

    return AArch64::ADDv8i8;

  default:

    llvm_unreachable("Unknown accumulator opcode");

  }

}


/// When getMachineCombinerPatterns() finds potential patterns,

/// this function generates the instructions that could replace the

/// original code sequence

void AArch64InstrInfo::genAlternativeCodeSequence(

    MachineInstr &Root, unsigned Pattern,

    SmallVectorImpl<MachineInstr *> &InsInstrs,

    SmallVectorImpl<MachineInstr *> &DelInstrs,

    DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {

  MachineBasicBlock &MBB = *Root.getParent();

  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();

  MachineFunction &MF = *MBB.getParent();

  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();


  MachineInstr *MUL = nullptr;

  const TargetRegisterClass *RC;

  unsigned Opc;

  switch (Pattern) {

  default:

    // Reassociate instructions.

    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,

                                                DelInstrs, InstrIdxForVirtReg);

    return;

  case AArch64MachineCombinerPattern::SUBADD_OP1:

    // A - (B + C)

    // ==> (A - B) - C

    genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,

                     InstrIdxForVirtReg);

    return;

  case AArch64MachineCombinerPattern::SUBADD_OP2:

    // A - (B + C)

    // ==> (A - C) - B

    genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,

                     InstrIdxForVirtReg);

    return;

  case AArch64MachineCombinerPattern::MULADDW_OP1:

  case AArch64MachineCombinerPattern::MULADDX_OP1:

    // MUL I=A,B,0

    // ADD R,I,C

    // ==> MADD R,A,B,C

    // --- Create(MADD);

    if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP1) {

      Opc = AArch64::MADDWrrr;

      RC = &AArch64::GPR32RegClass;

    } else {

      Opc = AArch64::MADDXrrr;

      RC = &AArch64::GPR64RegClass;

    }

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDW_OP2:

  case AArch64MachineCombinerPattern::MULADDX_OP2:

    // MUL I=A,B,0

    // ADD R,C,I

    // ==> MADD R,A,B,C

    // --- Create(MADD);

    if (Pattern == AArch64MachineCombinerPattern::MULADDW_OP2) {

      Opc = AArch64::MADDWrrr;

      RC = &AArch64::GPR32RegClass;

    } else {

      Opc = AArch64::MADDXrrr;

      RC = &AArch64::GPR64RegClass;

    }

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDWI_OP1:

  case AArch64MachineCombinerPattern::MULADDXI_OP1:

  case AArch64MachineCombinerPattern::MULSUBWI_OP1:

  case AArch64MachineCombinerPattern::MULSUBXI_OP1: {

    // MUL I=A,B,0

    // ADD/SUB R,I,Imm

    // ==> MOV V, Imm/-Imm

    // ==> MADD R,A,B,V

    // --- Create(MADD);

    const TargetRegisterClass *RC;

    unsigned BitSize, MovImm;

    if (Pattern == AArch64MachineCombinerPattern::MULADDWI_OP1 ||

        Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1) {

      MovImm = AArch64::MOVi32imm;

      RC = &AArch64::GPR32spRegClass;

      BitSize = 32;

      Opc = AArch64::MADDWrrr;

      RC = &AArch64::GPR32RegClass;

    } else {

      MovImm = AArch64::MOVi64imm;

      RC = &AArch64::GPR64spRegClass;

      BitSize = 64;

      Opc = AArch64::MADDXrrr;

      RC = &AArch64::GPR64RegClass;

    }

    Register NewVR = MRI.createVirtualRegister(RC);

    uint64_t Imm = Root.getOperand(2).getImm();


    if (Root.getOperand(3).isImm()) {

      unsigned Val = Root.getOperand(3).getImm();

      Imm = Imm << Val;

    }

    bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||

                 Pattern == AArch64MachineCombinerPattern::MULSUBXI_OP1;

    uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);

    // Check that the immediate can be composed via a single instruction.

    SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;

    AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);

    if (Insn.size() != 1)

      return;

    MachineInstrBuilder MIB1 =

        BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)

            .addImm(IsSub ? -Imm : Imm);

    InsInstrs.push_back(MIB1);

    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

    MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);

    break;

  }

  case AArch64MachineCombinerPattern::MULSUBW_OP1:

  case AArch64MachineCombinerPattern::MULSUBX_OP1: {

    // MUL I=A,B,0

    // SUB R,I, C

    // ==> SUB  V, 0, C

    // ==> MADD R,A,B,V // = -C + A*B

    // --- Create(MADD);

    const TargetRegisterClass *SubRC;

    unsigned SubOpc, ZeroReg;

    if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP1) {

      SubOpc = AArch64::SUBWrr;

      SubRC = &AArch64::GPR32spRegClass;

      ZeroReg = AArch64::WZR;

      Opc = AArch64::MADDWrrr;

      RC = &AArch64::GPR32RegClass;

    } else {

      SubOpc = AArch64::SUBXrr;

      SubRC = &AArch64::GPR64spRegClass;

      ZeroReg = AArch64::XZR;

      Opc = AArch64::MADDXrrr;

      RC = &AArch64::GPR64RegClass;

    }

    Register NewVR = MRI.createVirtualRegister(SubRC);

    // SUB NewVR, 0, C

    MachineInstrBuilder MIB1 =

        BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)

            .addReg(ZeroReg)

            .add(Root.getOperand(2));

    InsInstrs.push_back(MIB1);

    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

    MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);

    break;

  }

  case AArch64MachineCombinerPattern::MULSUBW_OP2:

  case AArch64MachineCombinerPattern::MULSUBX_OP2:

    // MUL I=A,B,0

    // SUB R,C,I

    // ==> MSUB R,A,B,C (computes C - A*B)

    // --- Create(MSUB);

    if (Pattern == AArch64MachineCombinerPattern::MULSUBW_OP2) {

      Opc = AArch64::MSUBWrrr;

      RC = &AArch64::GPR32RegClass;

    } else {

      Opc = AArch64::MSUBXrrr;

      RC = &AArch64::GPR64RegClass;

    }

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv8i8_OP1:

    Opc = AArch64::MLAv8i8;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv8i8_OP2:

    Opc = AArch64::MLAv8i8;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv16i8_OP1:

    Opc = AArch64::MLAv16i8;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv16i8_OP2:

    Opc = AArch64::MLAv16i8;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv4i16_OP1:

    Opc = AArch64::MLAv4i16;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv4i16_OP2:

    Opc = AArch64::MLAv4i16;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv8i16_OP1:

    Opc = AArch64::MLAv8i16;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv8i16_OP2:

    Opc = AArch64::MLAv8i16;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv2i32_OP1:

    Opc = AArch64::MLAv2i32;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv2i32_OP2:

    Opc = AArch64::MLAv2i32;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv4i32_OP1:

    Opc = AArch64::MLAv4i32;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv4i32_OP2:

    Opc = AArch64::MLAv4i32;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;


  case AArch64MachineCombinerPattern::MULSUBv8i8_OP1:

    Opc = AArch64::MLAv8i8;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv8i8_OP2:

    Opc = AArch64::MLSv8i8;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv16i8_OP1:

    Opc = AArch64::MLAv16i8;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv16i8_OP2:

    Opc = AArch64::MLSv16i8;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv4i16_OP1:

    Opc = AArch64::MLAv4i16;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv4i16_OP2:

    Opc = AArch64::MLSv4i16;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv8i16_OP1:

    Opc = AArch64::MLAv8i16;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv8i16_OP2:

    Opc = AArch64::MLSv8i16;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv2i32_OP1:

    Opc = AArch64::MLAv2i32;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv2i32_OP2:

    Opc = AArch64::MLSv2i32;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv4i32_OP1:

    Opc = AArch64::MLAv4i32;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv4i32_OP2:

    Opc = AArch64::MLSv4i32;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;


  case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP1:

    Opc = AArch64::MLAv4i16_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv4i16_indexed_OP2:

    Opc = AArch64::MLAv4i16_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP1:

    Opc = AArch64::MLAv8i16_indexed;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv8i16_indexed_OP2:

    Opc = AArch64::MLAv8i16_indexed;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP1:

    Opc = AArch64::MLAv2i32_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv2i32_indexed_OP2:

    Opc = AArch64::MLAv2i32_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP1:

    Opc = AArch64::MLAv4i32_indexed;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULADDv4i32_indexed_OP2:

    Opc = AArch64::MLAv4i32_indexed;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;


  case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP1:

    Opc = AArch64::MLAv4i16_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv4i16_indexed_OP2:

    Opc = AArch64::MLSv4i16_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP1:

    Opc = AArch64::MLAv8i16_indexed;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv8i16_indexed_OP2:

    Opc = AArch64::MLSv8i16_indexed;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP1:

    Opc = AArch64::MLAv2i32_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv2i32_indexed_OP2:

    Opc = AArch64::MLSv2i32_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP1:

    Opc = AArch64::MLAv4i32_indexed;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,

                                 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,

                                 RC);

    break;

  case AArch64MachineCombinerPattern::MULSUBv4i32_indexed_OP2:

    Opc = AArch64::MLSv4i32_indexed;

    RC = &AArch64::FPR128RegClass;

    MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;


  // Floating Point Support

  case AArch64MachineCombinerPattern::FMULADDH_OP1:

    Opc = AArch64::FMADDHrrr;

    RC = &AArch64::FPR16RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FMULADDS_OP1:

    Opc = AArch64::FMADDSrrr;

    RC = &AArch64::FPR32RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FMULADDD_OP1:

    Opc = AArch64::FMADDDrrr;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;


  case AArch64MachineCombinerPattern::FMULADDH_OP2:

    Opc = AArch64::FMADDHrrr;

    RC = &AArch64::FPR16RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FMULADDS_OP2:

    Opc = AArch64::FMADDSrrr;

    RC = &AArch64::FPR32RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FMULADDD_OP2:

    Opc = AArch64::FMADDDrrr;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;


  case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP1:

    Opc = AArch64::FMLAv1i32_indexed;

    RC = &AArch64::FPR32RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                           FMAInstKind::Indexed);

    break;

  case AArch64MachineCombinerPattern::FMLAv1i32_indexed_OP2:

    Opc = AArch64::FMLAv1i32_indexed;

    RC = &AArch64::FPR32RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Indexed);

    break;


  case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP1:

    Opc = AArch64::FMLAv1i64_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                           FMAInstKind::Indexed);

    break;

  case AArch64MachineCombinerPattern::FMLAv1i64_indexed_OP2:

    Opc = AArch64::FMLAv1i64_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Indexed);

    break;


  case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP1:

    RC = &AArch64::FPR64RegClass;

    Opc = AArch64::FMLAv4i16_indexed;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                           FMAInstKind::Indexed);

    break;

  case AArch64MachineCombinerPattern::FMLAv4f16_OP1:

    RC = &AArch64::FPR64RegClass;

    Opc = AArch64::FMLAv4f16;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                           FMAInstKind::Accumulator);

    break;

  case AArch64MachineCombinerPattern::FMLAv4i16_indexed_OP2:

    RC = &AArch64::FPR64RegClass;

    Opc = AArch64::FMLAv4i16_indexed;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Indexed);

    break;

  case AArch64MachineCombinerPattern::FMLAv4f16_OP2:

    RC = &AArch64::FPR64RegClass;

    Opc = AArch64::FMLAv4f16;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Accumulator);

    break;


  case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv2f32_OP1:

    RC = &AArch64::FPR64RegClass;

    if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP1) {

      Opc = AArch64::FMLAv2i32_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Indexed);

    } else {

      Opc = AArch64::FMLAv2f32;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Accumulator);

    }

    break;

  case AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLAv2f32_OP2:

    RC = &AArch64::FPR64RegClass;

    if (Pattern == AArch64MachineCombinerPattern::FMLAv2i32_indexed_OP2) {

      Opc = AArch64::FMLAv2i32_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Indexed);

    } else {

      Opc = AArch64::FMLAv2f32;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Accumulator);

    }

    break;


  case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP1:

    RC = &AArch64::FPR128RegClass;

    Opc = AArch64::FMLAv8i16_indexed;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                           FMAInstKind::Indexed);

    break;

  case AArch64MachineCombinerPattern::FMLAv8f16_OP1:

    RC = &AArch64::FPR128RegClass;

    Opc = AArch64::FMLAv8f16;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                           FMAInstKind::Accumulator);

    break;

  case AArch64MachineCombinerPattern::FMLAv8i16_indexed_OP2:

    RC = &AArch64::FPR128RegClass;

    Opc = AArch64::FMLAv8i16_indexed;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Indexed);

    break;

  case AArch64MachineCombinerPattern::FMLAv8f16_OP2:

    RC = &AArch64::FPR128RegClass;

    Opc = AArch64::FMLAv8f16;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Accumulator);

    break;


  case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv2f64_OP1:

    RC = &AArch64::FPR128RegClass;

    if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP1) {

      Opc = AArch64::FMLAv2i64_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Indexed);

    } else {

      Opc = AArch64::FMLAv2f64;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Accumulator);

    }

    break;

  case AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLAv2f64_OP2:

    RC = &AArch64::FPR128RegClass;

    if (Pattern == AArch64MachineCombinerPattern::FMLAv2i64_indexed_OP2) {

      Opc = AArch64::FMLAv2i64_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Indexed);

    } else {

      Opc = AArch64::FMLAv2f64;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Accumulator);

    }

    break;


  case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1:

  case AArch64MachineCombinerPattern::FMLAv4f32_OP1:

    RC = &AArch64::FPR128RegClass;

    if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP1) {

      Opc = AArch64::FMLAv4i32_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Indexed);

    } else {

      Opc = AArch64::FMLAv4f32;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Accumulator);

    }

    break;


  case AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2:

  case AArch64MachineCombinerPattern::FMLAv4f32_OP2:

    RC = &AArch64::FPR128RegClass;

    if (Pattern == AArch64MachineCombinerPattern::FMLAv4i32_indexed_OP2) {

      Opc = AArch64::FMLAv4i32_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Indexed);

    } else {

      Opc = AArch64::FMLAv4f32;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Accumulator);

    }

    break;


  case AArch64MachineCombinerPattern::FMULSUBH_OP1:

    Opc = AArch64::FNMSUBHrrr;

    RC = &AArch64::FPR16RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FMULSUBS_OP1:

    Opc = AArch64::FNMSUBSrrr;

    RC = &AArch64::FPR32RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FMULSUBD_OP1:

    Opc = AArch64::FNMSUBDrrr;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;


  case AArch64MachineCombinerPattern::FNMULSUBH_OP1:

    Opc = AArch64::FNMADDHrrr;

    RC = &AArch64::FPR16RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FNMULSUBS_OP1:

    Opc = AArch64::FNMADDSrrr;

    RC = &AArch64::FPR32RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FNMULSUBD_OP1:

    Opc = AArch64::FNMADDDrrr;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);

    break;


  case AArch64MachineCombinerPattern::FMULSUBH_OP2:

    Opc = AArch64::FMSUBHrrr;

    RC = &AArch64::FPR16RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FMULSUBS_OP2:

    Opc = AArch64::FMSUBSrrr;

    RC = &AArch64::FPR32RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;

  case AArch64MachineCombinerPattern::FMULSUBD_OP2:

    Opc = AArch64::FMSUBDrrr;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);

    break;


  case AArch64MachineCombinerPattern::FMLSv1i32_indexed_OP2:

    Opc = AArch64::FMLSv1i32_indexed;

    RC = &AArch64::FPR32RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Indexed);

    break;


  case AArch64MachineCombinerPattern::FMLSv1i64_indexed_OP2:

    Opc = AArch64::FMLSv1i64_indexed;

    RC = &AArch64::FPR64RegClass;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Indexed);

    break;


  case AArch64MachineCombinerPattern::FMLSv4f16_OP1:

  case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP1: {

    RC = &AArch64::FPR64RegClass;

    Register NewVR = MRI.createVirtualRegister(RC);

    MachineInstrBuilder MIB1 =

        BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)

            .add(Root.getOperand(2));

    InsInstrs.push_back(MIB1);

    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

    if (Pattern == AArch64MachineCombinerPattern::FMLSv4f16_OP1) {

      Opc = AArch64::FMLAv4f16;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Accumulator, &NewVR);

    } else {

      Opc = AArch64::FMLAv4i16_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Indexed, &NewVR);

    }

    break;

  }

  case AArch64MachineCombinerPattern::FMLSv4f16_OP2:

    RC = &AArch64::FPR64RegClass;

    Opc = AArch64::FMLSv4f16;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Accumulator);

    break;

  case AArch64MachineCombinerPattern::FMLSv4i16_indexed_OP2:

    RC = &AArch64::FPR64RegClass;

    Opc = AArch64::FMLSv4i16_indexed;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Indexed);

    break;


  case AArch64MachineCombinerPattern::FMLSv2f32_OP2:

  case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2:

    RC = &AArch64::FPR64RegClass;

    if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP2) {

      Opc = AArch64::FMLSv2i32_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Indexed);

    } else {

      Opc = AArch64::FMLSv2f32;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Accumulator);

    }

    break;


  case AArch64MachineCombinerPattern::FMLSv8f16_OP1:

  case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP1: {

    RC = &AArch64::FPR128RegClass;

    Register NewVR = MRI.createVirtualRegister(RC);

    MachineInstrBuilder MIB1 =

        BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)

            .add(Root.getOperand(2));

    InsInstrs.push_back(MIB1);

    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

    if (Pattern == AArch64MachineCombinerPattern::FMLSv8f16_OP1) {

      Opc = AArch64::FMLAv8f16;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Accumulator, &NewVR);

    } else {

      Opc = AArch64::FMLAv8i16_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Indexed, &NewVR);

    }

    break;

  }

  case AArch64MachineCombinerPattern::FMLSv8f16_OP2:

    RC = &AArch64::FPR128RegClass;

    Opc = AArch64::FMLSv8f16;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Accumulator);

    break;

  case AArch64MachineCombinerPattern::FMLSv8i16_indexed_OP2:

    RC = &AArch64::FPR128RegClass;

    Opc = AArch64::FMLSv8i16_indexed;

    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                           FMAInstKind::Indexed);

    break;


  case AArch64MachineCombinerPattern::FMLSv2f64_OP2:

  case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2:

    RC = &AArch64::FPR128RegClass;

    if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP2) {

      Opc = AArch64::FMLSv2i64_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Indexed);

    } else {

      Opc = AArch64::FMLSv2f64;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Accumulator);

    }

    break;


  case AArch64MachineCombinerPattern::FMLSv4f32_OP2:

  case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2:

    RC = &AArch64::FPR128RegClass;

    if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP2) {

      Opc = AArch64::FMLSv4i32_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Indexed);

    } else {

      Opc = AArch64::FMLSv4f32;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,

                             FMAInstKind::Accumulator);

    }

    break;

  case AArch64MachineCombinerPattern::FMLSv2f32_OP1:

  case AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1: {

    RC = &AArch64::FPR64RegClass;

    Register NewVR = MRI.createVirtualRegister(RC);

    MachineInstrBuilder MIB1 =

        BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)

            .add(Root.getOperand(2));

    InsInstrs.push_back(MIB1);

    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

    if (Pattern == AArch64MachineCombinerPattern::FMLSv2i32_indexed_OP1) {

      Opc = AArch64::FMLAv2i32_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Indexed, &NewVR);

    } else {

      Opc = AArch64::FMLAv2f32;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Accumulator, &NewVR);

    }

    break;

  }

  case AArch64MachineCombinerPattern::FMLSv4f32_OP1:

  case AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1: {

    RC = &AArch64::FPR128RegClass;

    Register NewVR = MRI.createVirtualRegister(RC);

    MachineInstrBuilder MIB1 =

        BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)

            .add(Root.getOperand(2));

    InsInstrs.push_back(MIB1);

    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

    if (Pattern == AArch64MachineCombinerPattern::FMLSv4i32_indexed_OP1) {

      Opc = AArch64::FMLAv4i32_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Indexed, &NewVR);

    } else {

      Opc = AArch64::FMLAv4f32;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Accumulator, &NewVR);

    }

    break;

  }

  case AArch64MachineCombinerPattern::FMLSv2f64_OP1:

  case AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1: {

    RC = &AArch64::FPR128RegClass;

    Register NewVR = MRI.createVirtualRegister(RC);

    MachineInstrBuilder MIB1 =

        BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)

            .add(Root.getOperand(2));

    InsInstrs.push_back(MIB1);

    InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));

    if (Pattern == AArch64MachineCombinerPattern::FMLSv2i64_indexed_OP1) {

      Opc = AArch64::FMLAv2i64_indexed;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Indexed, &NewVR);

    } else {

      Opc = AArch64::FMLAv2f64;

      MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,

                             FMAInstKind::Accumulator, &NewVR);

    }

    break;

  }

  case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv2i32_indexed_OP2: {

    unsigned IdxDupOp =

        (Pattern == AArch64MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1

                                                                          : 2;

    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,

                       &AArch64::FPR128RegClass, MRI);

    break;

  }

  case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv2i64_indexed_OP2: {

    unsigned IdxDupOp =

        (Pattern == AArch64MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1

                                                                          : 2;

    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,

                       &AArch64::FPR128RegClass, MRI);

    break;

  }

  case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv4i16_indexed_OP2: {

    unsigned IdxDupOp =

        (Pattern == AArch64MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1

                                                                          : 2;

    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,

                       &AArch64::FPR128_loRegClass, MRI);

    break;

  }

  case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv4i32_indexed_OP2: {

    unsigned IdxDupOp =

        (Pattern == AArch64MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1

                                                                          : 2;

    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,

                       &AArch64::FPR128RegClass, MRI);

    break;

  }

  case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1:

  case AArch64MachineCombinerPattern::FMULv8i16_indexed_OP2: {

    unsigned IdxDupOp =

        (Pattern == AArch64MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1

                                                                          : 2;

    genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,

                       &AArch64::FPR128_loRegClass, MRI);

    break;

  }

  case AArch64MachineCombinerPattern::FNMADD: {

    MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);

    break;

  }

  case AArch64MachineCombinerPattern::GATHER_LANE_i32: {

    generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,

                              Pattern, 4);

    break;

  }

  case AArch64MachineCombinerPattern::GATHER_LANE_i16: {

    generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,

                              Pattern, 8);

    break;

  }

  case AArch64MachineCombinerPattern::GATHER_LANE_i8: {

    generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,

                              Pattern, 16);

    break;

  }


  } // end switch (Pattern)

  // Record MUL and ADD/SUB for deletion

  if (MUL)

    DelInstrs.push_back(MUL);

  DelInstrs.push_back(&Root);


  // Set the flags on the inserted instructions to be the merged flags of the

  // instructions that we have combined.

  uint32_t Flags = Root.getFlags();

  if (MUL)

    Flags = Root.mergeFlagsWith(*MUL);

  for (auto *MI : InsInstrs)

    MI->setFlags(Flags);

}


/// Replace csincr-branch sequence by simple conditional branch

///

/// Examples:

/// 1. \code

///   csinc  w9, wzr, wzr, <condition code>

///   tbnz   w9, #0, 0x44

///    \endcode

/// to

///    \code

///   b.<inverted condition code>

///    \endcode

///

/// 2. \code

///   csinc w9, wzr, wzr, <condition code>

///   tbz   w9, #0, 0x44

///    \endcode

/// to

///    \code

///   b.<condition code>

///    \endcode

///

/// Replace compare and branch sequence by TBZ/TBNZ instruction when the

/// compare's constant operand is power of 2.

///

/// Examples:

///    \code

///   and  w8, w8, #0x400

///   cbnz w8, L1

///    \endcode

/// to

///    \code

///   tbnz w8, #10, L1

///    \endcode

///

/// \param  MI Conditional Branch

/// \return True when the simple conditional branch is generated

///


bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {

  bool IsNegativeBranch = false;

  bool IsTestAndBranch = false;

  unsigned TargetBBInMI = 0;

  switch (MI.getOpcode()) {

  default:

    llvm_unreachable("Unknown branch instruction?");

  case AArch64::Bcc:

  case AArch64::CBWPri:

  case AArch64::CBXPri:

  case AArch64::CBWPrr:

  case AArch64::CBXPrr:

    return false;

  case AArch64::CBZW:

  case AArch64::CBZX:

    TargetBBInMI = 1;

    break;

  case AArch64::CBNZW:

  case AArch64::CBNZX:

    TargetBBInMI = 1;

    IsNegativeBranch = true;

    break;

  case AArch64::TBZW:

  case AArch64::TBZX:

    TargetBBInMI = 2;

    IsTestAndBranch = true;

    break;

  case AArch64::TBNZW:

  case AArch64::TBNZX:

    TargetBBInMI = 2;

    IsNegativeBranch = true;

    IsTestAndBranch = true;

    break;

  }

  // So we increment a zero register and test for bits other

  // than bit 0? Conservatively bail out in case the verifier

  // missed this case.

  if (IsTestAndBranch && MI.getOperand(1).getImm())

    return false;


  // Find Definition.

  assert(MI.getParent() && "Incomplete machine instruction\n");

  MachineBasicBlock *MBB = MI.getParent();

  MachineFunction *MF = MBB->getParent();

  MachineRegisterInfo *MRI = &MF->getRegInfo();

  Register VReg = MI.getOperand(0).getReg();

  if (!VReg.isVirtual())

    return false;


  MachineInstr *DefMI = MRI->getVRegDef(VReg);


  // Look through COPY instructions to find definition.

  while (DefMI->isCopy()) {

    Register CopyVReg = DefMI->getOperand(1).getReg();

    if (!MRI->hasOneNonDBGUse(CopyVReg))

      return false;

    if (!MRI->hasOneDef(CopyVReg))

      return false;

    DefMI = MRI->getVRegDef(CopyVReg);

  }


  switch (DefMI->getOpcode()) {

  default:

    return false;

  // Fold AND into a TBZ/TBNZ if constant operand is power of 2.

  case AArch64::ANDWri:

  case AArch64::ANDXri: {

    if (IsTestAndBranch)

      return false;

    if (DefMI->getParent() != MBB)

      return false;

    if (!MRI->hasOneNonDBGUse(VReg))

      return false;


    bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);

    uint64_t Mask = AArch64_AM::decodeLogicalImmediate(

        DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);

    if (!isPowerOf2_64(Mask))

      return false;


    MachineOperand &MO = DefMI->getOperand(1);

    Register NewReg = MO.getReg();

    if (!NewReg.isVirtual())

      return false;


    assert(!MRI->def_empty(NewReg) && "Register must be defined.");


    MachineBasicBlock &RefToMBB = *MBB;

    MachineBasicBlock *TBB = MI.getOperand(1).getMBB();

    DebugLoc DL = MI.getDebugLoc();

    unsigned Imm = Log2_64(Mask);

    unsigned Opc = (Imm < 32)

                       ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)

                       : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);

    MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))

                              .addReg(NewReg)

                              .addImm(Imm)

                              .addMBB(TBB);

    // Register lives on to the CBZ now.

    MO.setIsKill(false);


    // For immediate smaller than 32, we need to use the 32-bit

    // variant (W) in all cases. Indeed the 64-bit variant does not

    // allow to encode them.

    // Therefore, if the input register is 64-bit, we need to take the

    // 32-bit sub-part.

    if (!Is32Bit && Imm < 32)

      NewMI->getOperand(0).setSubReg(AArch64::sub_32);

    MI.eraseFromParent();

    return true;

  }

  // Look for CSINC

  case AArch64::CSINCWr:

  case AArch64::CSINCXr: {

    if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&

          DefMI->getOperand(2).getReg() == AArch64::WZR) &&

        !(DefMI->getOperand(1).getReg() == AArch64::XZR &&

          DefMI->getOperand(2).getReg() == AArch64::XZR))

      return false;


    if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,

                                         true) != -1)

      return false;


    AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();

    // Convert only when the condition code is not modified between

    // the CSINC and the branch. The CC may be used by other

    // instructions in between.

    if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))

      return false;

    MachineBasicBlock &RefToMBB = *MBB;

    MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();

    DebugLoc DL = MI.getDebugLoc();

    if (IsNegativeBranch)

      CC = AArch64CC::getInvertedCondCode(CC);

    BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);

    MI.eraseFromParent();

    return true;

  }

  }

}


std::pair<unsigned, unsigned>

AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {

  const unsigned Mask = AArch64II::MO_FRAGMENT;

  return std::make_pair(TF & Mask, TF & ~Mask);

}


ArrayRef<std::pair<unsigned, const char *>>

AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {

  using namespace AArch64II;


  static const std::pair<unsigned, const char *> TargetFlags[] = {

      {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},

      {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},

      {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},

      {MO_HI12, "aarch64-hi12"}};

  return ArrayRef(TargetFlags);

}


ArrayRef<std::pair<unsigned, const char *>>

AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {

  using namespace AArch64II;


  static const std::pair<unsigned, const char *> TargetFlags[] = {

      {MO_COFFSTUB, "aarch64-coffstub"},

      {MO_GOT, "aarch64-got"},

      {MO_NC, "aarch64-nc"},

      {MO_S, "aarch64-s"},

      {MO_TLS, "aarch64-tls"},

      {MO_DLLIMPORT, "aarch64-dllimport"},

      {MO_PREL, "aarch64-prel"},

      {MO_TAGGED, "aarch64-tagged"},

      {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},

  };

  return ArrayRef(TargetFlags);

}


ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>

AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {

  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =

      {{MOSuppressPair, "aarch64-suppress-pair"},

       {MOStridedAccess, "aarch64-strided-access"}};

  return ArrayRef(TargetFlags);

}


/// Constants defining how certain sequences should be outlined.

/// This encompasses how an outlined function should be called, and what kind of

/// frame should be emitted for that outlined function.

///

/// \p MachineOutlinerDefault implies that the function should be called with

/// a save and restore of LR to the stack.

///

/// That is,

///

/// I1     Save LR                    OUTLINED_FUNCTION:

/// I2 --> BL OUTLINED_FUNCTION       I1

/// I3     Restore LR                 I2

///                                   I3

///                                   RET

///

/// * Call construction overhead: 3 (save + BL + restore)

/// * Frame construction overhead: 1 (ret)

/// * Requires stack fixups? Yes

///

/// \p MachineOutlinerTailCall implies that the function is being created from

/// a sequence of instructions ending in a return.

///

/// That is,

///

/// I1                             OUTLINED_FUNCTION:

/// I2 --> B OUTLINED_FUNCTION     I1

/// RET                            I2

///                                RET

///

/// * Call construction overhead: 1 (B)

/// * Frame construction overhead: 0 (Return included in sequence)

/// * Requires stack fixups? No

///

/// \p MachineOutlinerNoLRSave implies that the function should be called using

/// a BL instruction, but doesn't require LR to be saved and restored. This

/// happens when LR is known to be dead.

///

/// That is,

///

/// I1                                OUTLINED_FUNCTION:

/// I2 --> BL OUTLINED_FUNCTION       I1

/// I3                                I2

///                                   I3

///                                   RET

///

/// * Call construction overhead: 1 (BL)

/// * Frame construction overhead: 1 (RET)

/// * Requires stack fixups? No

///

/// \p MachineOutlinerThunk implies that the function is being created from

/// a sequence of instructions ending in a call. The outlined function is

/// called with a BL instruction, and the outlined function tail-calls the

/// original call destination.

///

/// That is,

///

/// I1                                OUTLINED_FUNCTION:

/// I2 --> BL OUTLINED_FUNCTION       I1

/// BL f                              I2

///                                   B f

/// * Call construction overhead: 1 (BL)

/// * Frame construction overhead: 0

/// * Requires stack fixups? No

///

/// \p MachineOutlinerRegSave implies that the function should be called with a

/// save and restore of LR to an available register. This allows us to avoid

/// stack fixups. Note that this outlining variant is compatible with the

/// NoLRSave case.

///

/// That is,

///

/// I1     Save LR                    OUTLINED_FUNCTION:

/// I2 --> BL OUTLINED_FUNCTION       I1

/// I3     Restore LR                 I2

///                                   I3

///                                   RET

///

/// * Call construction overhead: 3 (save + BL + restore)

/// * Frame construction overhead: 1 (ret)

/// * Requires stack fixups? No


enum MachineOutlinerClass {

  MachineOutlinerDefault,  /// Emit a save, restore, call, and return.

  MachineOutlinerTailCall, /// Only emit a branch.

  MachineOutlinerNoLRSave, /// Emit a call and return.

  MachineOutlinerThunk,    /// Emit a call and tail-call.

  MachineOutlinerRegSave   /// Same as default, but save to a register.

};


enum MachineOutlinerMBBFlags {

  LRUnavailableSomewhere = 0x2,

  HasCalls = 0x4,

  UnsafeRegsDead = 0x8

};


Register

AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {

  MachineFunction *MF = C.getMF();

  const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();

  const AArch64RegisterInfo *ARI =

      static_cast<const AArch64RegisterInfo *>(&TRI);

  // Check if there is an available register across the sequence that we can

  // use.

  for (unsigned Reg : AArch64::GPR64RegClass) {

    if (!ARI->isReservedReg(*MF, Reg) &&

        Reg != AArch64::LR &&  // LR is not reserved, but don't use it.

        Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.

        Reg != AArch64::X17 && // Ditto for X17.

        C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&

        C.isAvailableInsideSeq(Reg, TRI))

      return Reg;

  }

  return Register();

}


static bool


outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,

                                         const outliner::Candidate &b) {

  const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();

  const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();


  return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&

         MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);

}


static bool


outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,

                                       const outliner::Candidate &b) {

  const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();

  const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();


  return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();

}


static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,

                                                const outliner::Candidate &b) {

  const AArch64Subtarget &SubtargetA =

      a.getMF()->getSubtarget<AArch64Subtarget>();

  const AArch64Subtarget &SubtargetB =

      b.getMF()->getSubtarget<AArch64Subtarget>();

  return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();

}


std::optional<std::unique_ptr<outliner::OutlinedFunction>>

AArch64InstrInfo::getOutliningCandidateInfo(

    const MachineModuleInfo &MMI,

    std::vector<outliner::Candidate> &RepeatedSequenceLocs,

    unsigned MinRepeats) const {

  unsigned SequenceSize = 0;

  for (auto &MI : RepeatedSequenceLocs[0])

    SequenceSize += getInstSizeInBytes(MI);


  unsigned NumBytesToCreateFrame = 0;


  // We only allow outlining for functions having exactly matching return

  // address signing attributes, i.e., all share the same value for the

  // attribute "sign-return-address" and all share the same type of key they

  // are signed with.

  // Additionally we require all functions to simultaneously either support

  // v8.3a features or not. Otherwise an outlined function could get signed

  // using dedicated v8.3 instructions and a call from a function that doesn't

  // support v8.3 instructions would therefore be invalid.

  if (std::adjacent_find(

          RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),

          [](const outliner::Candidate &a, const outliner::Candidate &b) {

            // Return true if a and b are non-equal w.r.t. return address

            // signing or support of v8.3a features

            if (outliningCandidatesSigningScopeConsensus(a, b) &&

                outliningCandidatesSigningKeyConsensus(a, b) &&

                outliningCandidatesV8_3OpsConsensus(a, b)) {

              return false;

            }

            return true;

          }) != RepeatedSequenceLocs.end()) {

    return std::nullopt;

  }


  // Since at this point all candidates agree on their return address signing

  // picking just one is fine. If the candidate functions potentially sign their

  // return addresses, the outlined function should do the same. Note that in

  // the case of "sign-return-address"="non-leaf" this is an assumption: It is

  // not certainly true that the outlined function will have to sign its return

  // address but this decision is made later, when the decision to outline

  // has already been made.

  // The same holds for the number of additional instructions we need: On

  // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is

  // necessary. However, at this point we don't know if the outlined function

  // will have a RET instruction so we assume the worst.

  const TargetRegisterInfo &TRI = getRegisterInfo();

  // Performing a tail call may require extra checks when PAuth is enabled.

  // If PAuth is disabled, set it to zero for uniformity.

  unsigned NumBytesToCheckLRInTCEpilogue = 0;

  if (RepeatedSequenceLocs[0]

          .getMF()

          ->getInfo<AArch64FunctionInfo>()

          ->shouldSignReturnAddress(true)) {

    // One PAC and one AUT instructions

    NumBytesToCreateFrame += 8;


    // PAuth is enabled - set extra tail call cost, if any.

    auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(

        *RepeatedSequenceLocs[0].getMF());

    NumBytesToCheckLRInTCEpilogue =

        AArch64PAuth::getCheckerSizeInBytes(LRCheckMethod);

    // Checking the authenticated LR value may significantly impact

    // SequenceSize, so account for it for more precise results.

    if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))

      SequenceSize += NumBytesToCheckLRInTCEpilogue;


    // We have to check if sp modifying instructions would get outlined.

    // If so we only allow outlining if sp is unchanged overall, so matching

    // sub and add instructions are okay to outline, all other sp modifications

    // are not

    auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {

      int SPValue = 0;

      for (auto &MI : C) {

        if (MI.modifiesRegister(AArch64::SP, &TRI)) {

          switch (MI.getOpcode()) {

          case AArch64::ADDXri:

          case AArch64::ADDWri:

            assert(MI.getNumOperands() == 4 && "Wrong number of operands");

            assert(MI.getOperand(2).isImm() &&

                   "Expected operand to be immediate");

            assert(MI.getOperand(1).isReg() &&

                   "Expected operand to be a register");

            // Check if the add just increments sp. If so, we search for

            // matching sub instructions that decrement sp. If not, the

            // modification is illegal

            if (MI.getOperand(1).getReg() == AArch64::SP)

              SPValue += MI.getOperand(2).getImm();

            else

              return true;

            break;

          case AArch64::SUBXri:

          case AArch64::SUBWri:

            assert(MI.getNumOperands() == 4 && "Wrong number of operands");

            assert(MI.getOperand(2).isImm() &&

                   "Expected operand to be immediate");

            assert(MI.getOperand(1).isReg() &&

                   "Expected operand to be a register");

            // Check if the sub just decrements sp. If so, we search for

            // matching add instructions that increment sp. If not, the

            // modification is illegal

            if (MI.getOperand(1).getReg() == AArch64::SP)

              SPValue -= MI.getOperand(2).getImm();

            else

              return true;

            break;

          default:

            return true;

          }

        }

      }

      if (SPValue)

        return true;

      return false;

    };

    // Remove candidates with illegal stack modifying instructions

    llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);


    // If the sequence doesn't have enough candidates left, then we're done.

    if (RepeatedSequenceLocs.size() < MinRepeats)

      return std::nullopt;

  }


  // Properties about candidate MBBs that hold for all of them.

  unsigned FlagsSetInAll = 0xF;


  // Compute liveness information for each candidate, and set FlagsSetInAll.

  for (outliner::Candidate &C : RepeatedSequenceLocs)

    FlagsSetInAll &= C.Flags;


  unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();


  // Helper lambda which sets call information for every candidate.

  auto SetCandidateCallInfo =

      [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {

        for (outliner::Candidate &C : RepeatedSequenceLocs)

          C.setCallInfo(CallID, NumBytesForCall);

      };


  unsigned FrameID = MachineOutlinerDefault;

  NumBytesToCreateFrame += 4;


  bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {

    return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();

  });


  // We check to see if CFI Instructions are present, and if they are

  // we find the number of CFI Instructions in the candidates.

  unsigned CFICount = 0;

  for (auto &I : RepeatedSequenceLocs[0]) {

    if (I.isCFIInstruction())

      CFICount++;

  }


  // We compare the number of found CFI Instructions to  the number of CFI

  // instructions in the parent function for each candidate.  We must check this

  // since if we outline one of the CFI instructions in a function, we have to

  // outline them all for correctness. If we do not, the address offsets will be

  // incorrect between the two sections of the program.

  for (outliner::Candidate &C : RepeatedSequenceLocs) {

    std::vector<MCCFIInstruction> CFIInstructions =

        C.getMF()->getFrameInstructions();


    if (CFICount > 0 && CFICount != CFIInstructions.size())

      return std::nullopt;

  }


  // Returns true if an instructions is safe to fix up, false otherwise.

  auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {

    if (MI.isCall())

      return true;


    if (!MI.modifiesRegister(AArch64::SP, &TRI) &&

        !MI.readsRegister(AArch64::SP, &TRI))

      return true;


    // Any modification of SP will break our code to save/restore LR.

    // FIXME: We could handle some instructions which add a constant

    // offset to SP, with a bit more work.

    if (MI.modifiesRegister(AArch64::SP, &TRI))

      return false;


    // At this point, we have a stack instruction that we might need to

    // fix up. We'll handle it if it's a load or store.

    if (MI.mayLoadOrStore()) {

      const MachineOperand *Base; // Filled with the base operand of MI.

      int64_t Offset;             // Filled with the offset of MI.

      bool OffsetIsScalable;


      // Does it allow us to offset the base operand and is the base the

      // register SP?

      if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||

          !Base->isReg() || Base->getReg() != AArch64::SP)

        return false;


      // Fixe-up code below assumes bytes.

      if (OffsetIsScalable)

        return false;


      // Find the minimum/maximum offset for this instruction and check

      // if fixing it up would be in range.

      int64_t MinOffset,

          MaxOffset;  // Unscaled offsets for the instruction.

      // The scale to multiply the offsets by.

      TypeSize Scale(0U, false), DummyWidth(0U, false);

      getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);


      Offset += 16; // Update the offset to what it would be if we outlined.

      if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||

          Offset > MaxOffset * (int64_t)Scale.getFixedValue())

        return false;


      // It's in range, so we can outline it.

      return true;

    }


    // FIXME: Add handling for instructions like "add x0, sp, #8".


    // We can't fix it up, so don't outline it.

    return false;

  };


  // True if it's possible to fix up each stack instruction in this sequence.

  // Important for frames/call variants that modify the stack.

  bool AllStackInstrsSafe =

      llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);


  // If the last instruction in any candidate is a terminator, then we should

  // tail call all of the candidates.

  if (RepeatedSequenceLocs[0].back().isTerminator()) {

    FrameID = MachineOutlinerTailCall;

    NumBytesToCreateFrame = 0;

    unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;

    SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);

  }


  else if (LastInstrOpcode == AArch64::BL ||

           ((LastInstrOpcode == AArch64::BLR ||

             LastInstrOpcode == AArch64::BLRNoIP) &&

            !HasBTI)) {

    // FIXME: Do we need to check if the code after this uses the value of LR?

    FrameID = MachineOutlinerThunk;

    NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;

    SetCandidateCallInfo(MachineOutlinerThunk, 4);

  }


  else {

    // We need to decide how to emit calls + frames. We can always emit the same

    // frame if we don't need to save to the stack. If we have to save to the

    // stack, then we need a different frame.

    unsigned NumBytesNoStackCalls = 0;

    std::vector<outliner::Candidate> CandidatesWithoutStackFixups;


    // Check if we have to save LR.

    for (outliner::Candidate &C : RepeatedSequenceLocs) {

      bool LRAvailable =

          (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)

              ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)

              : true;

      // If we have a noreturn caller, then we're going to be conservative and

      // say that we have to save LR. If we don't have a ret at the end of the

      // block, then we can't reason about liveness accurately.

      //

      // FIXME: We can probably do better than always disabling this in

      // noreturn functions by fixing up the liveness info.

      bool IsNoReturn =

          C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);


      // Is LR available? If so, we don't need a save.

      if (LRAvailable && !IsNoReturn) {

        NumBytesNoStackCalls += 4;

        C.setCallInfo(MachineOutlinerNoLRSave, 4);

        CandidatesWithoutStackFixups.push_back(C);

      }


      // Is an unused register available? If so, we won't modify the stack, so

      // we can outline with the same frame type as those that don't save LR.

      else if (findRegisterToSaveLRTo(C)) {

        NumBytesNoStackCalls += 12;

        C.setCallInfo(MachineOutlinerRegSave, 12);

        CandidatesWithoutStackFixups.push_back(C);

      }


      // Is SP used in the sequence at all? If not, we don't have to modify

      // the stack, so we are guaranteed to get the same frame.

      else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {

        NumBytesNoStackCalls += 12;

        C.setCallInfo(MachineOutlinerDefault, 12);

        CandidatesWithoutStackFixups.push_back(C);

      }


      // If we outline this, we need to modify the stack. Pretend we don't

      // outline this by saving all of its bytes.

      else {

        NumBytesNoStackCalls += SequenceSize;

      }

    }


    // If there are no places where we have to save LR, then note that we

    // don't have to update the stack. Otherwise, give every candidate the

    // default call type, as long as it's safe to do so.

    if (!AllStackInstrsSafe ||

        NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {

      RepeatedSequenceLocs = CandidatesWithoutStackFixups;

      FrameID = MachineOutlinerNoLRSave;

      if (RepeatedSequenceLocs.size() < MinRepeats)

        return std::nullopt;

    } else {

      SetCandidateCallInfo(MachineOutlinerDefault, 12);


      // Bugzilla ID: 46767

      // TODO: Check if fixing up the stack more than once is safe so we can

      // outline these.

      //

      // An outline resulting in a caller that requires stack fixups at the

      // callsite to a callee that also requires stack fixups can happen when

      // there are no available registers at the candidate callsite for a

      // candidate that itself also has calls.

      //

      // In other words if function_containing_sequence in the following pseudo

      // assembly requires that we save LR at the point of the call, but there

      // are no available registers: in this case we save using SP and as a

      // result the SP offsets requires stack fixups by multiples of 16.

      //

      // function_containing_sequence:

      //   ...

      //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N

      //   call OUTLINED_FUNCTION_N

      //   restore LR from SP

      //   ...

      //

      // OUTLINED_FUNCTION_N:

      //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N

      //   ...

      //   bl foo

      //   restore LR from SP

      //   ret

      //

      // Because the code to handle more than one stack fixup does not

      // currently have the proper checks for legality, these cases will assert

      // in the AArch64 MachineOutliner. This is because the code to do this

      // needs more hardening, testing, better checks that generated code is

      // legal, etc and because it is only verified to handle a single pass of

      // stack fixup.

      //

      // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch

      // these cases until they are known to be handled. Bugzilla 46767 is

      // referenced in comments at the assert site.

      //

      // To avoid asserting (or generating non-legal code on noassert builds)

      // we remove all candidates which would need more than one stack fixup by

      // pruning the cases where the candidate has calls while also having no

      // available LR and having no available general purpose registers to copy

      // LR to (ie one extra stack save/restore).

      //

      if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {

        erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {

          auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };

          return (llvm::any_of(C, IsCall)) &&

                 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||

                  !findRegisterToSaveLRTo(C));

        });

      }

    }


    // If we dropped all of the candidates, bail out here.

    if (RepeatedSequenceLocs.size() < MinRepeats)

      return std::nullopt;

  }


  // Does every candidate's MBB contain a call? If so, then we might have a call

  // in the range.

  if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {

    // Check if the range contains a call. These require a save + restore of the

    // link register.

    outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];

    bool ModStackToSaveLR = false;

    if (any_of(drop_end(FirstCand),

               [](const MachineInstr &MI) { return MI.isCall(); }))

      ModStackToSaveLR = true;


    // Handle the last instruction separately. If this is a tail call, then the

    // last instruction is a call. We don't want to save + restore in this case.

    // However, it could be possible that the last instruction is a call without

    // it being valid to tail call this sequence. We should consider this as

    // well.

    else if (FrameID != MachineOutlinerThunk &&

             FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())

      ModStackToSaveLR = true;


    if (ModStackToSaveLR) {

      // We can't fix up the stack. Bail out.

      if (!AllStackInstrsSafe)

        return std::nullopt;


      // Save + restore LR.

      NumBytesToCreateFrame += 8;

    }

  }


  // If we have CFI instructions, we can only outline if the outlined section

  // can be a tail call

  if (FrameID != MachineOutlinerTailCall && CFICount > 0)

    return std::nullopt;


  return std::make_unique<outliner::OutlinedFunction>(

      RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);

}


void AArch64InstrInfo::mergeOutliningCandidateAttributes(

    Function &F, std::vector<outliner::Candidate> &Candidates) const {

  // If a bunch of candidates reach this point they must agree on their return

  // address signing. It is therefore enough to just consider the signing

  // behaviour of one of them

  const auto &CFn = Candidates.front().getMF()->getFunction();


  if (CFn.hasFnAttribute("ptrauth-returns"))

    F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));

  if (CFn.hasFnAttribute("ptrauth-auth-traps"))

    F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));

  // Since all candidates belong to the same module, just copy the

  // function-level attributes of an arbitrary function.

  if (CFn.hasFnAttribute("sign-return-address"))

    F.addFnAttr(CFn.getFnAttribute("sign-return-address"));

  if (CFn.hasFnAttribute("sign-return-address-key"))

    F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));


  AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);

}


bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(

    MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {

  const Function &F = MF.getFunction();


  // Can F be deduplicated by the linker? If it can, don't outline from it.

  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())

    return false;


  // Don't outline from functions with section markings; the program could

  // expect that all the code is in the named section.

  // FIXME: Allow outlining from multiple functions with the same section

  // marking.

  if (F.hasSection())

    return false;


  // Outlining from functions with redzones is unsafe since the outliner may

  // modify the stack. Check if hasRedZone is true or unknown; if yes, don't

  // outline from it.

  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();

  if (!AFI || AFI->hasRedZone().value_or(true))

    return false;


  // FIXME: Determine whether it is safe to outline from functions which contain

  // streaming-mode changes. We may need to ensure any smstart/smstop pairs are

  // outlined together and ensure it is safe to outline with async unwind info,

  // required for saving & restoring VG around calls.

  if (AFI->hasStreamingModeChanges())

    return false;


  // FIXME: Teach the outliner to generate/handle Windows unwind info.

  if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())

    return false;


  // It's safe to outline from MF.

  return true;

}


SmallVector<std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>

AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,

                                      unsigned &Flags) const {

  assert(MBB.getParent()->getRegInfo().tracksLiveness() &&

         "Must track liveness!");

  SmallVector<

      std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>

      Ranges;

  // According to the AArch64 Procedure Call Standard, the following are

  // undefined on entry/exit from a function call:

  //

  // * Registers x16, x17, (and thus w16, w17)

  // * Condition codes (and thus the NZCV register)

  //

  // If any of these registers are used inside or live across an outlined

  // function, then they may be modified later, either by the compiler or

  // some other tool (like the linker).

  //

  // To avoid outlining in these situations, partition each block into ranges

  // where these registers are dead. We will only outline from those ranges.

  LiveRegUnits LRU(getRegisterInfo());

  auto AreAllUnsafeRegsDead = [&LRU]() {

    return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&

           LRU.available(AArch64::NZCV);

  };


  // We need to know if LR is live across an outlining boundary later on in

  // order to decide how we'll create the outlined call, frame, etc.

  //

  // It's pretty expensive to check this for *every candidate* within a block.

  // That's some potentially n^2 behaviour, since in the worst case, we'd need

  // to compute liveness from the end of the block for O(n) candidates within

  // the block.

  //

  // So, to improve the average case, let's keep track of liveness from the end

  // of the block to the beginning of *every outlinable range*. If we know that

  // LR is available in every range we could outline from, then we know that

  // we don't need to check liveness for any candidate within that range.

  bool LRAvailableEverywhere = true;

  // Compute liveness bottom-up.

  LRU.addLiveOuts(MBB);

  // Update flags that require info about the entire MBB.

  auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {

    if (MI.isCall() && !MI.isTerminator())

      Flags |= MachineOutlinerMBBFlags::HasCalls;

  };

  // Range: [RangeBegin, RangeEnd)

  MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;

  unsigned RangeLen;

  auto CreateNewRangeStartingAt =

      [&RangeBegin, &RangeEnd,

       &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {

        RangeBegin = NewBegin;

        RangeEnd = std::next(RangeBegin);

        RangeLen = 0;

      };

  auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {

    // At least one unsafe register is not dead. We do not want to outline at

    // this point. If it is long enough to outline from and does not cross a

    // bundle boundary, save the range [RangeBegin, RangeEnd).

    if (RangeLen <= 1)

      return;

    if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())

      return;

    if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())

      return;

    Ranges.emplace_back(RangeBegin, RangeEnd);

  };

  // Find the first point where all unsafe registers are dead.

  // FIND: <safe instr> <-- end of first potential range

  // SKIP: <unsafe def>

  // SKIP: ... everything between ...

  // SKIP: <unsafe use>

  auto FirstPossibleEndPt = MBB.instr_rbegin();

  for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {

    LRU.stepBackward(*FirstPossibleEndPt);

    // Update flags that impact how we outline across the entire block,

    // regardless of safety.

    UpdateWholeMBBFlags(*FirstPossibleEndPt);

    if (AreAllUnsafeRegsDead())

      break;

  }

  // If we exhausted the entire block, we have no safe ranges to outline.

  if (FirstPossibleEndPt == MBB.instr_rend())

    return Ranges;

  // Current range.

  CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());

  // StartPt points to the first place where all unsafe registers

  // are dead (if there is any such point). Begin partitioning the MBB into

  // ranges.

  for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {

    LRU.stepBackward(MI);

    UpdateWholeMBBFlags(MI);

    if (!AreAllUnsafeRegsDead()) {

      SaveRangeIfNonEmpty();

      CreateNewRangeStartingAt(MI.getIterator());

      continue;

    }

    LRAvailableEverywhere &= LRU.available(AArch64::LR);

    RangeBegin = MI.getIterator();

    ++RangeLen;

  }

  // Above loop misses the last (or only) range. If we are still safe, then

  // let's save the range.

  if (AreAllUnsafeRegsDead())

    SaveRangeIfNonEmpty();

  if (Ranges.empty())

    return Ranges;

  // We found the ranges bottom-up. Mapping expects the top-down. Reverse

  // the order.

  std::reverse(Ranges.begin(), Ranges.end());

  // If there is at least one outlinable range where LR is unavailable

  // somewhere, remember that.

  if (!LRAvailableEverywhere)

    Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;

  return Ranges;

}


outliner::InstrType

AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,

                                       MachineBasicBlock::iterator &MIT,

                                       unsigned Flags) const {

  MachineInstr &MI = *MIT;


  // Don't outline anything used for return address signing. The outlined

  // function will get signed later if needed

  switch (MI.getOpcode()) {

  case AArch64::PACM:

  case AArch64::PACIASP:

  case AArch64::PACIBSP:

  case AArch64::PACIASPPC:

  case AArch64::PACIBSPPC:

  case AArch64::AUTIASP:

  case AArch64::AUTIBSP:

  case AArch64::AUTIASPPCi:

  case AArch64::AUTIASPPCr:

  case AArch64::AUTIBSPPCi:

  case AArch64::AUTIBSPPCr:

  case AArch64::RETAA:

  case AArch64::RETAB:

  case AArch64::RETAASPPCi:

  case AArch64::RETAASPPCr:

  case AArch64::RETABSPPCi:

  case AArch64::RETABSPPCr:

  case AArch64::EMITBKEY:

  case AArch64::PAUTH_PROLOGUE:

  case AArch64::PAUTH_EPILOGUE:

    return outliner::InstrType::Illegal;

  }


  // We can only outline these if we will tail call the outlined function, or

  // fix up the CFI offsets. Currently, CFI instructions are outlined only if

  // in a tail call.

  //

  // FIXME: If the proper fixups for the offset are implemented, this should be

  // possible.

  if (MI.isCFIInstruction())

    return outliner::InstrType::Legal;


  // Is this a terminator for a basic block?

  if (MI.isTerminator())

    // TargetInstrInfo::getOutliningType has already filtered out anything

    // that would break this, so we can allow it here.

    return outliner::InstrType::Legal;


  // Make sure none of the operands are un-outlinable.

  for (const MachineOperand &MOP : MI.operands()) {

    // A check preventing CFI indices was here before, but only CFI

    // instructions should have those.

    assert(!MOP.isCFIIndex());


    // If it uses LR or W30 explicitly, then don't touch it.

    if (MOP.isReg() && !MOP.isImplicit() &&

        (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))

      return outliner::InstrType::Illegal;

  }


  // Special cases for instructions that can always be outlined, but will fail

  // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always

  // be outlined because they don't require a *specific* value to be in LR.

  if (MI.getOpcode() == AArch64::ADRP)

    return outliner::InstrType::Legal;


  // If MI is a call we might be able to outline it. We don't want to outline

  // any calls that rely on the position of items on the stack. When we outline

  // something containing a call, we have to emit a save and restore of LR in

  // the outlined function. Currently, this always happens by saving LR to the

  // stack. Thus, if we outline, say, half the parameters for a function call

  // plus the call, then we'll break the callee's expectations for the layout

  // of the stack.

  //

  // FIXME: Allow calls to functions which construct a stack frame, as long

  // as they don't access arguments on the stack.

  // FIXME: Figure out some way to analyze functions defined in other modules.

  // We should be able to compute the memory usage based on the IR calling

  // convention, even if we can't see the definition.

  if (MI.isCall()) {

    // Get the function associated with the call. Look at each operand and find

    // the one that represents the callee and get its name.

    const Function *Callee = nullptr;

    for (const MachineOperand &MOP : MI.operands()) {

      if (MOP.isGlobal()) {

        Callee = dyn_cast<Function>(MOP.getGlobal());

        break;

      }

    }


    // Never outline calls to mcount.  There isn't any rule that would require

    // this, but the Linux kernel's "ftrace" feature depends on it.

    if (Callee && Callee->getName() == "\01_mcount")

      return outliner::InstrType::Illegal;


    // If we don't know anything about the callee, assume it depends on the

    // stack layout of the caller. In that case, it's only legal to outline

    // as a tail-call. Explicitly list the call instructions we know about so we

    // don't get unexpected results with call pseudo-instructions.

    auto UnknownCallOutlineType = outliner::InstrType::Illegal;

    if (MI.getOpcode() == AArch64::BLR ||

        MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)

      UnknownCallOutlineType = outliner::InstrType::LegalTerminator;


    if (!Callee)

      return UnknownCallOutlineType;


    // We have a function we have information about. Check it if it's something

    // can safely outline.

    MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);


    // We don't know what's going on with the callee at all. Don't touch it.

    if (!CalleeMF)

      return UnknownCallOutlineType;


    // Check if we know anything about the callee saves on the function. If we

    // don't, then don't touch it, since that implies that we haven't

    // computed anything about its stack frame yet.

    MachineFrameInfo &MFI = CalleeMF->getFrameInfo();

    if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||

        MFI.getNumObjects() > 0)

      return UnknownCallOutlineType;


    // At this point, we can say that CalleeMF ought to not pass anything on the

    // stack. Therefore, we can outline it.

    return outliner::InstrType::Legal;

  }


  // Don't touch the link register or W30.

  if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||

      MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))

    return outliner::InstrType::Illegal;


  // Don't outline BTI instructions, because that will prevent the outlining

  // site from being indirectly callable.

  if (hasBTISemantics(MI))

    return outliner::InstrType::Illegal;


  return outliner::InstrType::Legal;

}


void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {

  for (MachineInstr &MI : MBB) {

    const MachineOperand *Base;

    TypeSize Width(0, false);

    int64_t Offset;

    bool OffsetIsScalable;


    // Is this a load or store with an immediate offset with SP as the base?

    if (!MI.mayLoadOrStore() ||

        !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,

                                      &RI) ||

        (Base->isReg() && Base->getReg() != AArch64::SP))

      continue;


    // It is, so we have to fix it up.

    TypeSize Scale(0U, false);

    int64_t Dummy1, Dummy2;


    MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);

    assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");

    getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);

    assert(Scale != 0 && "Unexpected opcode!");

    assert(!OffsetIsScalable && "Expected offset to be a byte offset");


    // We've pushed the return address to the stack, so add 16 to the offset.

    // This is safe, since we already checked if it would overflow when we

    // checked if this instruction was legal to outline.

    int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();

    StackOffsetOperand.setImm(NewImm);

  }

}


static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,

                                 const AArch64InstrInfo *TII,

                                 bool ShouldSignReturnAddr) {

  if (!ShouldSignReturnAddr)

    return;


  BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))

      .setMIFlag(MachineInstr::FrameSetup);

  BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),

          TII->get(AArch64::PAUTH_EPILOGUE))

      .setMIFlag(MachineInstr::FrameDestroy);

}


void AArch64InstrInfo::buildOutlinedFrame(

    MachineBasicBlock &MBB, MachineFunction &MF,

    const outliner::OutlinedFunction &OF) const {


  AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();


  if (OF.FrameConstructionID == MachineOutlinerTailCall)

    FI->setOutliningStyle("Tail Call");

  else if (OF.FrameConstructionID == MachineOutlinerThunk) {

    // For thunk outlining, rewrite the last instruction from a call to a

    // tail-call.

    MachineInstr *Call = &*--MBB.instr_end();

    unsigned TailOpcode;

    if (Call->getOpcode() == AArch64::BL) {

      TailOpcode = AArch64::TCRETURNdi;

    } else {

      assert(Call->getOpcode() == AArch64::BLR ||

             Call->getOpcode() == AArch64::BLRNoIP);

      TailOpcode = AArch64::TCRETURNriALL;

    }

    MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))

                           .add(Call->getOperand(0))

                           .addImm(0);

    MBB.insert(MBB.end(), TC);

    Call->eraseFromParent();


    FI->setOutliningStyle("Thunk");

  }


  bool IsLeafFunction = true;


  // Is there a call in the outlined range?

  auto IsNonTailCall = [](const MachineInstr &MI) {

    return MI.isCall() && !MI.isReturn();

  };


  if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {

    // Fix up the instructions in the range, since we're going to modify the

    // stack.


    // Bugzilla ID: 46767

    // TODO: Check if fixing up twice is safe so we can outline these.

    assert(OF.FrameConstructionID != MachineOutlinerDefault &&

           "Can only fix up stack references once");

    fixupPostOutline(MBB);


    IsLeafFunction = false;


    // LR has to be a live in so that we can save it.

    if (!MBB.isLiveIn(AArch64::LR))

      MBB.addLiveIn(AArch64::LR);


    MachineBasicBlock::iterator It = MBB.begin();

    MachineBasicBlock::iterator Et = MBB.end();


    if (OF.FrameConstructionID == MachineOutlinerTailCall ||

        OF.FrameConstructionID == MachineOutlinerThunk)

      Et = std::prev(MBB.end());


    // Insert a save before the outlined region

    MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))

                                .addReg(AArch64::SP, RegState::Define)

                                .addReg(AArch64::LR)

                                .addReg(AArch64::SP)

                                .addImm(-16);

    It = MBB.insert(It, STRXpre);


    if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {

      CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);


      // Add a CFI saying the stack was moved 16 B down.

      CFIBuilder.buildDefCFAOffset(16);


      // Add a CFI saying that the LR that we want to find is now 16 B higher

      // than before.

      CFIBuilder.buildOffset(AArch64::LR, -16);

    }


    // Insert a restore before the terminator for the function.

    MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))

                                 .addReg(AArch64::SP, RegState::Define)

                                 .addReg(AArch64::LR, RegState::Define)

                                 .addReg(AArch64::SP)

                                 .addImm(16);

    Et = MBB.insert(Et, LDRXpost);

  }


  bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);


  // If this is a tail call outlined function, then there's already a return.

  if (OF.FrameConstructionID == MachineOutlinerTailCall ||

      OF.FrameConstructionID == MachineOutlinerThunk) {

    signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);

    return;

  }


  // It's not a tail call, so we have to insert the return ourselves.


  // LR has to be a live in so that we can return to it.

  if (!MBB.isLiveIn(AArch64::LR))

    MBB.addLiveIn(AArch64::LR);


  MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))

                          .addReg(AArch64::LR);

  MBB.insert(MBB.end(), ret);


  signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);


  FI->setOutliningStyle("Function");


  // Did we have to modify the stack by saving the link register?

  if (OF.FrameConstructionID != MachineOutlinerDefault)

    return;


  // We modified the stack.

  // Walk over the basic block and fix up all the stack accesses.

  fixupPostOutline(MBB);

}


MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(

    Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,

    MachineFunction &MF, outliner::Candidate &C) const {


  // Are we tail calling?

  if (C.CallConstructionID == MachineOutlinerTailCall) {

    // If yes, then we can just branch to the label.

    It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))

                            .addGlobalAddress(M.getNamedValue(MF.getName()))

                            .addImm(0));

    return It;

  }


  // Are we saving the link register?

  if (C.CallConstructionID == MachineOutlinerNoLRSave ||

      C.CallConstructionID == MachineOutlinerThunk) {

    // No, so just insert the call.

    It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))

                            .addGlobalAddress(M.getNamedValue(MF.getName())));

    return It;

  }


  // We want to return the spot where we inserted the call.

  MachineBasicBlock::iterator CallPt;


  // Instructions for saving and restoring LR around the call instruction we're

  // going to insert.

  MachineInstr *Save;

  MachineInstr *Restore;

  // Can we save to a register?

  if (C.CallConstructionID == MachineOutlinerRegSave) {

    // FIXME: This logic should be sunk into a target-specific interface so that

    // we don't have to recompute the register.

    Register Reg = findRegisterToSaveLRTo(C);

    assert(Reg && "No callee-saved register available?");


    // LR has to be a live in so that we can save it.

    if (!MBB.isLiveIn(AArch64::LR))

      MBB.addLiveIn(AArch64::LR);


    // Save and restore LR from Reg.

    Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)

               .addReg(AArch64::XZR)

               .addReg(AArch64::LR)

               .addImm(0);

    Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)

                .addReg(AArch64::XZR)

                .addReg(Reg)

                .addImm(0);

  } else {

    // We have the default case. Save and restore from SP.

    Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))

               .addReg(AArch64::SP, RegState::Define)

               .addReg(AArch64::LR)

               .addReg(AArch64::SP)

               .addImm(-16);

    Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))

                  .addReg(AArch64::SP, RegState::Define)

                  .addReg(AArch64::LR, RegState::Define)

                  .addReg(AArch64::SP)

                  .addImm(16);

  }


  It = MBB.insert(It, Save);

  It++;


  // Insert the call.

  It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))

                          .addGlobalAddress(M.getNamedValue(MF.getName())));

  CallPt = It;

  It++;


  It = MBB.insert(It, Restore);

  return CallPt;

}


bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(

  MachineFunction &MF) const {

  return MF.getFunction().hasMinSize();

}


void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,

                                          MachineBasicBlock::iterator Iter,

                                          DebugLoc &DL,

                                          bool AllowSideEffects) const {

  const MachineFunction &MF = *MBB.getParent();

  const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();

  const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();


  if (TRI.isGeneralPurposeRegister(MF, Reg)) {

    BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);

  } else if (STI.isSVEorStreamingSVEAvailable()) {

    BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)

      .addImm(0)

      .addImm(0);

  } else if (STI.isNeonAvailable()) {

    BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)

      .addImm(0);

  } else {

    // This is a streaming-compatible function without SVE. We don't have full

    // Neon (just FPRs), so we can at most use the first 64-bit sub-register.

    // So given `movi v..` would be illegal use `fmov d..` instead.

    assert(STI.hasNEON() && "Expected to have NEON.");

    Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);

    BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);

  }

}


std::optional<DestSourcePair>


AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {


  // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg

  // and zero immediate operands used as an alias for mov instruction.

  if (((MI.getOpcode() == AArch64::ORRWrs &&

        MI.getOperand(1).getReg() == AArch64::WZR &&

        MI.getOperand(3).getImm() == 0x0) ||

       (MI.getOpcode() == AArch64::ORRWrr &&

        MI.getOperand(1).getReg() == AArch64::WZR)) &&

      // Check that the w->w move is not a zero-extending w->x mov.

      (!MI.getOperand(0).getReg().isVirtual() ||

       MI.getOperand(0).getSubReg() == 0) &&

      (!MI.getOperand(0).getReg().isPhysical() ||

       MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),

                                    /*TRI=*/nullptr) == -1))

    return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};


  if (MI.getOpcode() == AArch64::ORRXrs &&

      MI.getOperand(1).getReg() == AArch64::XZR &&

      MI.getOperand(3).getImm() == 0x0)

    return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};


  return std::nullopt;

}


std::optional<DestSourcePair>


AArch64InstrInfo::isCopyLikeInstrImpl(const MachineInstr &MI) const {

  if ((MI.getOpcode() == AArch64::ORRWrs &&

       MI.getOperand(1).getReg() == AArch64::WZR &&

       MI.getOperand(3).getImm() == 0x0) ||

      (MI.getOpcode() == AArch64::ORRWrr &&

       MI.getOperand(1).getReg() == AArch64::WZR))

    return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};

  return std::nullopt;

}


std::optional<RegImmPair>

AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {

  int Sign = 1;

  int64_t Offset = 0;


  // TODO: Handle cases where Reg is a super- or sub-register of the

  // destination register.

  const MachineOperand &Op0 = MI.getOperand(0);

  if (!Op0.isReg() || Reg != Op0.getReg())

    return std::nullopt;


  switch (MI.getOpcode()) {

  default:

    return std::nullopt;

  case AArch64::SUBWri:

  case AArch64::SUBXri:

  case AArch64::SUBSWri:

  case AArch64::SUBSXri:

    Sign *= -1;

    [[fallthrough]];

  case AArch64::ADDSWri:

  case AArch64::ADDSXri:

  case AArch64::ADDWri:

  case AArch64::ADDXri: {

    // TODO: Third operand can be global address (usually some string).

    if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||

        !MI.getOperand(2).isImm())

      return std::nullopt;

    int Shift = MI.getOperand(3).getImm();

    assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");

    Offset = Sign * (MI.getOperand(2).getImm() << Shift);

  }

  }

  return RegImmPair{MI.getOperand(1).getReg(), Offset};

}


/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with

/// the destination register then, if possible, describe the value in terms of

/// the source register.

static std::optional<ParamLoadedValue>


describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,

                       const TargetInstrInfo *TII,

                       const TargetRegisterInfo *TRI) {

  auto DestSrc = TII->isCopyLikeInstr(MI);

  if (!DestSrc)

    return std::nullopt;


  Register DestReg = DestSrc->Destination->getReg();

  Register SrcReg = DestSrc->Source->getReg();


  if (!DestReg.isValid() || !SrcReg.isValid())

    return std::nullopt;


  auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});


  // If the described register is the destination, just return the source.

  if (DestReg == DescribedReg)

    return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);


  // ORRWrs zero-extends to 64-bits, so we need to consider such cases.

  if (MI.getOpcode() == AArch64::ORRWrs &&

      TRI->isSuperRegister(DestReg, DescribedReg))

    return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);


  // We may need to describe the lower part of a ORRXrs move.

  if (MI.getOpcode() == AArch64::ORRXrs &&

      TRI->isSubRegister(DestReg, DescribedReg)) {

    Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);

    return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);

  }


  assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&

         "Unhandled ORR[XW]rs copy case");


  return std::nullopt;

}


bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {

  // Functions cannot be split to different sections on AArch64 if they have

  // a red zone. This is because relaxing a cross-section branch may require

  // incrementing the stack pointer to spill a register, which would overwrite

  // the red zone.

  if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))

    return false;


  return TargetInstrInfo::isFunctionSafeToSplit(MF);

}


bool AArch64InstrInfo::isMBBSafeToSplitToCold(

    const MachineBasicBlock &MBB) const {

  // Asm Goto blocks can contain conditional branches to goto labels, which can

  // get moved out of range of the branch instruction.

  auto isAsmGoto = [](const MachineInstr &MI) {

    return MI.getOpcode() == AArch64::INLINEASM_BR;

  };

  if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())

    return false;


  // Because jump tables are label-relative instead of table-relative, they all

  // must be in the same section or relocation fixup handling will fail.


  // Check if MBB is a jump table target

  const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();

  auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {

    return llvm::is_contained(JTE.MBBs, &MBB);

  };

  if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))

    return false;


  // Check if MBB contains a jump table lookup

  for (const MachineInstr &MI : MBB) {

    switch (MI.getOpcode()) {

    case TargetOpcode::G_BRJT:

    case AArch64::JumpTableDest32:

    case AArch64::JumpTableDest16:

    case AArch64::JumpTableDest8:

      return false;

    default:

      continue;

    }

  }


  // MBB isn't a special case, so it's safe to be split to the cold section.

  return true;

}


std::optional<ParamLoadedValue>

AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,

                                      Register Reg) const {

  const MachineFunction *MF = MI.getMF();

  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();

  switch (MI.getOpcode()) {

  case AArch64::MOVZWi:

  case AArch64::MOVZXi: {

    // MOVZWi may be used for producing zero-extended 32-bit immediates in

    // 64-bit parameters, so we need to consider super-registers.

    if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))

      return std::nullopt;


    if (!MI.getOperand(1).isImm())

      return std::nullopt;

    int64_t Immediate = MI.getOperand(1).getImm();

    int Shift = MI.getOperand(2).getImm();

    return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),

                            nullptr);

  }

  case AArch64::ORRWrs:

  case AArch64::ORRXrs:

    return describeORRLoadedValue(MI, Reg, this, TRI);

  }


  return TargetInstrInfo::describeLoadedValue(MI, Reg);

}


bool AArch64InstrInfo::isExtendLikelyToBeFolded(

    MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {

  assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||

         ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||

         ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);


  // Anyexts are nops.

  if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)

    return true;


  Register DefReg = ExtMI.getOperand(0).getReg();

  if (!MRI.hasOneNonDBGUse(DefReg))

    return false;


  // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an

  // addressing mode.

  auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);

  return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;

}


uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {

  return get(Opc).TSFlags & AArch64::ElementSizeMask;

}


bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {

  return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;

}


bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {

  return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;

}


unsigned int

AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {

  return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;

}


bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,

                                             unsigned Scale) const {

  if (Offset && Scale)

    return false;


  // Check Reg + Imm

  if (!Scale) {

    // 9-bit signed offset

    if (isInt<9>(Offset))

      return true;


    // 12-bit unsigned offset

    unsigned Shift = Log2_64(NumBytes);

    if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&

        // Must be a multiple of NumBytes (NumBytes is a power of 2)

        (Offset >> Shift) << Shift == Offset)

      return true;

    return false;

  }


  // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2

  return Scale == 1 || (Scale > 0 && Scale == NumBytes);

}


unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {

  if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())

    return AArch64::BLRNoIP;

  else

    return AArch64::BLR;

}


MachineBasicBlock::iterator


AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,

                                   Register TargetReg, bool FrameSetup) const {

  assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");


  MachineBasicBlock &MBB = *MBBI->getParent();

  MachineFunction &MF = *MBB.getParent();

  const AArch64InstrInfo *TII =

      MF.getSubtarget<AArch64Subtarget>().getInstrInfo();

  int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();

  DebugLoc DL = MBB.findDebugLoc(MBBI);


  MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());

  MachineBasicBlock *LoopTestMBB =

      MF.CreateMachineBasicBlock(MBB.getBasicBlock());

  MF.insert(MBBInsertPoint, LoopTestMBB);

  MachineBasicBlock *LoopBodyMBB =

      MF.CreateMachineBasicBlock(MBB.getBasicBlock());

  MF.insert(MBBInsertPoint, LoopBodyMBB);

  MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());

  MF.insert(MBBInsertPoint, ExitMBB);

  MachineInstr::MIFlag Flags =

      FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;


  // LoopTest:

  //   SUB SP, SP, #ProbeSize

  emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,

                  AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);


  //   CMP SP, TargetReg

  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),

          AArch64::XZR)

      .addReg(AArch64::SP)

      .addReg(TargetReg)

      .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))

      .setMIFlags(Flags);


  //   B.<Cond> LoopExit

  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))

      .addImm(AArch64CC::LE)

      .addMBB(ExitMBB)

      .setMIFlags(Flags);


  //   STR XZR, [SP]

  BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))

      .addReg(AArch64::XZR)

      .addReg(AArch64::SP)

      .addImm(0)

      .setMIFlags(Flags);


  //   B loop

  BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))

      .addMBB(LoopTestMBB)

      .setMIFlags(Flags);


  // LoopExit:

  //   MOV SP, TargetReg

  BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)

      .addReg(TargetReg)

      .addImm(0)

      .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))

      .setMIFlags(Flags);


  //   LDR XZR, [SP]

  BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))

      .addReg(AArch64::XZR, RegState::Define)

      .addReg(AArch64::SP)

      .addImm(0)

      .setMIFlags(Flags);


  ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());

  ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);


  LoopTestMBB->addSuccessor(ExitMBB);

  LoopTestMBB->addSuccessor(LoopBodyMBB);

  LoopBodyMBB->addSuccessor(LoopTestMBB);

  MBB.addSuccessor(LoopTestMBB);


  // Update liveins.

  if (MF.getRegInfo().reservedRegsFrozen())

    fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});


  return ExitMBB->begin();

}


namespace {

class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {

  MachineFunction *MF;

  const TargetInstrInfo *TII;

  const TargetRegisterInfo *TRI;

  MachineRegisterInfo &MRI;


  /// The block of the loop

  MachineBasicBlock *LoopBB;

  /// The conditional branch of the loop

  MachineInstr *CondBranch;

  /// The compare instruction for loop control

  MachineInstr *Comp;

  /// The number of the operand of the loop counter value in Comp

  unsigned CompCounterOprNum;

  /// The instruction that updates the loop counter value

  MachineInstr *Update;

  /// The number of the operand of the loop counter value in Update

  unsigned UpdateCounterOprNum;

  /// The initial value of the loop counter

  Register Init;

  /// True iff Update is a predecessor of Comp

  bool IsUpdatePriorComp;


  /// The normalized condition used by createTripCountGreaterCondition()

  SmallVector<MachineOperand, 4> Cond;


public:

  AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,

                           MachineInstr *Comp, unsigned CompCounterOprNum,

                           MachineInstr *Update, unsigned UpdateCounterOprNum,

                           Register Init, bool IsUpdatePriorComp,

                           const SmallVectorImpl<MachineOperand> &Cond)

      : MF(Comp->getParent()->getParent()),

        TII(MF->getSubtarget().getInstrInfo()),

        TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),

        LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),

        CompCounterOprNum(CompCounterOprNum), Update(Update),

        UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),

        IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}


  bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {

    // Make the instructions for loop control be placed in stage 0.

    // The predecessors of Comp are considered by the caller.

    return MI == Comp;

  }


  std::optional<bool> createTripCountGreaterCondition(

      int TC, MachineBasicBlock &MBB,

      SmallVectorImpl<MachineOperand> &CondParam) override {

    // A branch instruction will be inserted as "if (Cond) goto epilogue".

    // Cond is normalized for such use.

    // The predecessors of the branch are assumed to have already been inserted.

    CondParam = Cond;

    return {};

  }


  void createRemainingIterationsGreaterCondition(

      int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,

      DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;


  void setPreheader(MachineBasicBlock *NewPreheader) override {}


  void adjustTripCount(int TripCountAdjust) override {}


  bool isMVEExpanderSupported() override { return true; }

};

} // namespace


/// Clone an instruction from MI. The register of ReplaceOprNum-th operand

/// is replaced by ReplaceReg. The output register is newly created.

/// The other operands are unchanged from MI.


static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,

                           Register ReplaceReg, MachineBasicBlock &MBB,

                           MachineBasicBlock::iterator InsertTo) {

  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();

  const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();

  const TargetRegisterInfo *TRI =

      MBB.getParent()->getSubtarget().getRegisterInfo();

  MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);

  Register Result = 0;

  for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {

    if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {

      Result = MRI.createVirtualRegister(

          MRI.getRegClass(NewMI->getOperand(0).getReg()));

      NewMI->getOperand(I).setReg(Result);

    } else if (I == ReplaceOprNum) {

      MRI.constrainRegClass(ReplaceReg,

                            TII->getRegClass(NewMI->getDesc(), I, TRI));

      NewMI->getOperand(I).setReg(ReplaceReg);

    }

  }

  MBB.insert(InsertTo, NewMI);

  return Result;

}


void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(

    int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,

    DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) {

  // Create and accumulate conditions for next TC iterations.

  // Example:

  //   SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last

  //                                          # iteration of the kernel

  //

  //   # insert the following instructions

  //   cond = CSINCXr 0, 0, C, implicit $nzcv

  //   counter = ADDXri counter, 1            # clone from this->Update

  //   SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp

  //   cond = CSINCXr cond, cond, C, implicit $nzcv

  //   ... (repeat TC times)

  //   SUBSXri cond, 0, implicit-def $nzcv


  assert(CondBranch->getOpcode() == AArch64::Bcc);

  // CondCode to exit the loop

  AArch64CC::CondCode CC =

      (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();

  if (CondBranch->getOperand(1).getMBB() == LoopBB)

    CC = AArch64CC::getInvertedCondCode(CC);


  // Accumulate conditions to exit the loop

  Register AccCond = AArch64::XZR;


  // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.

  auto AccumulateCond = [&](Register CurCond,

                            AArch64CC::CondCode CC) -> Register {

    Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);

    BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))

        .addReg(NewCond, RegState::Define)

        .addReg(CurCond)

        .addReg(CurCond)

        .addImm(AArch64CC::getInvertedCondCode(CC));

    return NewCond;

  };


  if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {

    // Update and Comp for I==0 are already exists in MBB

    // (MBB is an unrolled kernel)

    Register Counter;

    for (int I = 0; I <= TC; ++I) {

      Register NextCounter;

      if (I != 0)

        NextCounter =

            cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());


      AccCond = AccumulateCond(AccCond, CC);


      if (I != TC) {

        if (I == 0) {

          if (Update != Comp && IsUpdatePriorComp) {

            Counter =

                LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();

            NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,

                                     MBB.end());

          } else {

            // can use already calculated value

            NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();

          }

        } else if (Update != Comp) {

          NextCounter =

              cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());

        }

      }

      Counter = NextCounter;

    }

  } else {

    Register Counter;

    if (LastStage0Insts.empty()) {

      // use initial counter value (testing if the trip count is sufficient to

      // be executed by pipelined code)

      Counter = Init;

      if (IsUpdatePriorComp)

        Counter =

            cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());

    } else {

      // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.

      Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();

    }


    for (int I = 0; I <= TC; ++I) {

      Register NextCounter;

      NextCounter =

          cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());

      AccCond = AccumulateCond(AccCond, CC);

      if (I != TC && Update != Comp)

        NextCounter =

            cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());

      Counter = NextCounter;

    }

  }


  // If AccCond == 0, the remainder is greater than TC.

  BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))

      .addReg(AArch64::XZR, RegState::Define | RegState::Dead)

      .addReg(AccCond)

      .addImm(0)

      .addImm(0);

  Cond.clear();

  Cond.push_back(MachineOperand::CreateImm(AArch64CC::EQ));

}


static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,

                          Register &RegMBB, Register &RegOther) {

  assert(Phi.getNumOperands() == 5);

  if (Phi.getOperand(2).getMBB() == MBB) {

    RegMBB = Phi.getOperand(1).getReg();

    RegOther = Phi.getOperand(3).getReg();

  } else {

    assert(Phi.getOperand(4).getMBB() == MBB);

    RegMBB = Phi.getOperand(3).getReg();

    RegOther = Phi.getOperand(1).getReg();

  }

}


static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {

  if (!Reg.isVirtual())

    return false;

  const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();

  return MRI.getVRegDef(Reg)->getParent() != BB;

}


/// If Reg is an induction variable, return true and set some parameters


static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,

                          MachineInstr *&UpdateInst,

                          unsigned &UpdateCounterOprNum, Register &InitReg,

                          bool &IsUpdatePriorComp) {

  // Example:

  //

  // Preheader:

  //   InitReg = ...

  // LoopBB:

  //   Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)

  //   Reg = COPY Reg0 ; COPY is ignored.

  //   Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.

  //                     ; Reg is the value calculated in the previous

  //                     ; iteration, so IsUpdatePriorComp == false.


  if (LoopBB->pred_size() != 2)

    return false;

  if (!Reg.isVirtual())

    return false;

  const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();

  UpdateInst = nullptr;

  UpdateCounterOprNum = 0;

  InitReg = 0;

  IsUpdatePriorComp = true;

  Register CurReg = Reg;

  while (true) {

    MachineInstr *Def = MRI.getVRegDef(CurReg);

    if (Def->getParent() != LoopBB)

      return false;

    if (Def->isCopy()) {

      // Ignore copy instructions unless they contain subregisters

      if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())

        return false;

      CurReg = Def->getOperand(1).getReg();

    } else if (Def->isPHI()) {

      if (InitReg != 0)

        return false;

      if (!UpdateInst)

        IsUpdatePriorComp = false;

      extractPhiReg(*Def, LoopBB, CurReg, InitReg);

    } else {

      if (UpdateInst)

        return false;

      switch (Def->getOpcode()) {

      case AArch64::ADDSXri:

      case AArch64::ADDSWri:

      case AArch64::SUBSXri:

      case AArch64::SUBSWri:

      case AArch64::ADDXri:

      case AArch64::ADDWri:

      case AArch64::SUBXri:

      case AArch64::SUBWri:

        UpdateInst = Def;

        UpdateCounterOprNum = 1;

        break;

      case AArch64::ADDSXrr:

      case AArch64::ADDSWrr:

      case AArch64::SUBSXrr:

      case AArch64::SUBSWrr:

      case AArch64::ADDXrr:

      case AArch64::ADDWrr:

      case AArch64::SUBXrr:

      case AArch64::SUBWrr:

        UpdateInst = Def;

        if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))

          UpdateCounterOprNum = 1;

        else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))

          UpdateCounterOprNum = 2;

        else

          return false;

        break;

      default:

        return false;

      }

      CurReg = Def->getOperand(UpdateCounterOprNum).getReg();

    }


    if (!CurReg.isVirtual())

      return false;

    if (Reg == CurReg)

      break;

  }


  if (!UpdateInst)

    return false;


  return true;

}


std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>


AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {

  // Accept loops that meet the following conditions

  // * The conditional branch is BCC

  // * The compare instruction is ADDS/SUBS/WHILEXX

  // * One operand of the compare is an induction variable and the other is a

  //   loop invariant value

  // * The induction variable is incremented/decremented by a single instruction

  // * Does not contain CALL or instructions which have unmodeled side effects


  for (MachineInstr &MI : *LoopBB)

    if (MI.isCall() || MI.hasUnmodeledSideEffects())

      // This instruction may use NZCV, which interferes with the instruction to

      // be inserted for loop control.

      return nullptr;


  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;

  SmallVector<MachineOperand, 4> Cond;

  if (analyzeBranch(*LoopBB, TBB, FBB, Cond))

    return nullptr;


  // Infinite loops are not supported

  if (TBB == LoopBB && FBB == LoopBB)

    return nullptr;


  // Must be conditional branch

  if (TBB != LoopBB && FBB == nullptr)

    return nullptr;


  assert((TBB == LoopBB || FBB == LoopBB) &&

         "The Loop must be a single-basic-block loop");


  MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();

  const TargetRegisterInfo &TRI = getRegisterInfo();


  if (CondBranch->getOpcode() != AArch64::Bcc)

    return nullptr;


  // Normalization for createTripCountGreaterCondition()

  if (TBB == LoopBB)

    reverseBranchCondition(Cond);


  MachineInstr *Comp = nullptr;

  unsigned CompCounterOprNum = 0;

  for (MachineInstr &MI : reverse(*LoopBB)) {

    if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {

      // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the

      // operands is a loop invariant value


      switch (MI.getOpcode()) {

      case AArch64::SUBSXri:

      case AArch64::SUBSWri:

      case AArch64::ADDSXri:

      case AArch64::ADDSWri:

        Comp = &MI;

        CompCounterOprNum = 1;

        break;

      case AArch64::ADDSWrr:

      case AArch64::ADDSXrr:

      case AArch64::SUBSWrr:

      case AArch64::SUBSXrr:

        Comp = &MI;

        break;

      default:

        if (isWhileOpcode(MI.getOpcode())) {

          Comp = &MI;

          break;

        }

        return nullptr;

      }


      if (CompCounterOprNum == 0) {

        if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))

          CompCounterOprNum = 2;

        else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))

          CompCounterOprNum = 1;

        else

          return nullptr;

      }

      break;

    }

  }

  if (!Comp)

    return nullptr;


  MachineInstr *Update = nullptr;

  Register Init;

  bool IsUpdatePriorComp;

  unsigned UpdateCounterOprNum;

  if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,

                     Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))

    return nullptr;


  return std::make_unique<AArch64PipelinerLoopInfo>(

      LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,

      Init, IsUpdatePriorComp, Cond);

}


/// verifyInstruction - Perform target specific instruction verification.

bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,

                                         StringRef &ErrInfo) const {

  // Verify that immediate offsets on load/store instructions are within range.

  // Stack objects with an FI operand are excluded as they can be fixed up

  // during PEI.

  TypeSize Scale(0U, false), Width(0U, false);

  int64_t MinOffset, MaxOffset;

  if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {

    unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());

    if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {

      int64_t Imm = MI.getOperand(ImmIdx).getImm();

      if (Imm < MinOffset || Imm > MaxOffset) {

        ErrInfo = "Unexpected immediate on load/store instruction";

        return false;

      }

    }

  }


  const MCInstrDesc &MCID = MI.getDesc();

  for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {

    const MachineOperand &MO = MI.getOperand(Op);

    switch (MCID.operands()[Op].OperandType) {

    case AArch64::OPERAND_IMPLICIT_IMM_0:

      if (!MO.isImm() || MO.getImm() != 0) {

        ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";

        return false;

      }

      break;

    case AArch64::OPERAND_SHIFT_MSL:

      if (!MO.isImm() ||

          AArch64_AM::getShiftType(MO.getImm()) != AArch64_AM::MSL ||

          (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&

           AArch64_AM::getShiftValue(MO.getImm()) != 16)) {

        ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";

        return false;

      }

      break;

    default:

      break;

    }

  }

  return true;

}


#define GET_INSTRINFO_HELPERS

#define GET_INSTRMAP_INFO

#include "AArch64GenInstrInfo.inc"

AArch64AddressingModes.h

SubReg
unsigned SubReg
Definition AArch64AdvSIMDScalarPass.cpp:102

MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

AArch64BaseInfo.h

AArch64ExpandImm.h

UseMI
MachineInstrBuilder & UseMI
Definition AArch64ExpandPseudoInsts.cpp:120

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition AArch64ExpandPseudoInsts.cpp:121

forwardCopyWillClobberTuple
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
Definition AArch64InstrInfo.cpp:5000

BCCDisplacementBits
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))

genNeg
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
Definition AArch64InstrInfo.cpp:8048

GatherOptSearchLimit
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))

getMaddPatterns
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
Definition AArch64InstrInfo.cpp:6875

findCondCodeUsedByInstr
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
Definition AArch64InstrInfo.cpp:1821

genFusedMultiplyAcc
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
Definition AArch64InstrInfo.cpp:8038

genFusedMultiplyAccNeg
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
Definition AArch64InstrInfo.cpp:8067

isCombineInstrCandidate64
static bool isCombineInstrCandidate64(unsigned Opc)
Definition AArch64InstrInfo.cpp:6654

removeCopies
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
Definition AArch64InstrInfo.cpp:689

areCFlagsAccessedBetweenInstrs
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
Definition AArch64InstrInfo.cpp:1454

getFMAPatterns
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
Definition AArch64InstrInfo.cpp:7144

isADDSRegImm
static bool isADDSRegImm(unsigned Opcode)
Definition AArch64InstrInfo.cpp:1905

appendOffsetComment
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
Definition AArch64InstrInfo.cpp:5984

sForm
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
Definition AArch64InstrInfo.cpp:1733

isCombineInstrSettingFlag
static bool isCombineInstrSettingFlag(unsigned Opc)
Definition AArch64InstrInfo.cpp:6616

AccessKind
AccessKind
Definition AArch64InstrInfo.cpp:1447

AK_All
@ AK_All
Definition AArch64InstrInfo.cpp:1447

AK_Write
@ AK_Write
Definition AArch64InstrInfo.cpp:1447

AK_Read
@ AK_Read
Definition AArch64InstrInfo.cpp:1447

getFNEGPatterns
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Definition AArch64InstrInfo.cpp:7338

getIndVarInfo
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
Definition AArch64InstrInfo.cpp:11081

AddSubReg
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
Definition AArch64InstrInfo.cpp:4988

canPairLdStOpc
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
Definition AArch64InstrInfo.cpp:4845

findCondCodeUseOperandIdxForBranchOrSelect
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
Definition AArch64InstrInfo.cpp:1790

isPostIndexLdStOpcode
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
Definition AArch64InstrInfo.cpp:3799

getBranchDisplacementBits
static unsigned getBranchDisplacementBits(unsigned Opc)
Definition AArch64InstrInfo.cpp:247

CBDisplacementBits
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))

describeORRLoadedValue
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
Definition AArch64InstrInfo.cpp:10593

getFMULPatterns
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Definition AArch64InstrInfo.cpp:7286

appendReadRegExpr
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
Definition AArch64InstrInfo.cpp:5964

genMaddR
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
Definition AArch64InstrInfo.cpp:8127

cloneInstr
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
Definition AArch64InstrInfo.cpp:10932

scaleOffset
static bool scaleOffset(unsigned Opc, int64_t &Offset)
Definition AArch64InstrInfo.cpp:4831

canCombineWithFMUL
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
Definition AArch64InstrInfo.cpp:6762

scaledOffsetOpcode
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
Definition AArch64InstrInfo.cpp:3447

genFusedMultiplyIdx
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
Definition AArch64InstrInfo.cpp:8084

genIndexedMultiply
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
Definition AArch64InstrInfo.cpp:7997

isSUBSRegImm
static bool isSUBSRegImm(unsigned Opcode)
Definition AArch64InstrInfo.cpp:1909

UpdateOperandRegClass
static bool UpdateOperandRegClass(MachineInstr &Instr)
Definition AArch64InstrInfo.cpp:1360

getRegClass
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
Definition AArch64InstrInfo.cpp:4737

canCmpInstrBeRemoved
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
Definition AArch64InstrInfo.cpp:1994

unscaledOffsetOpcode
unsigned unscaledOffsetOpcode(unsigned Opcode)
Definition AArch64InstrInfo.cpp:3552

getLoadPatterns
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
Definition AArch64InstrInfo.cpp:7643

canInstrSubstituteCmpInstr
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
Definition AArch64InstrInfo.cpp:1926

getUsedNZCV
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
Definition AArch64InstrInfo.cpp:1828

isCombineInstrCandidateFP
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
Definition AArch64InstrInfo.cpp:6685

appendLoadRegExpr
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
Definition AArch64InstrInfo.cpp:5972

appendConstantExpr
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
Definition AArch64InstrInfo.cpp:5946

convertToNonFlagSettingOpc
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
Definition AArch64InstrInfo.cpp:1401

outliningCandidatesV8_3OpsConsensus
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
Definition AArch64InstrInfo.cpp:9511

isCombineInstrCandidate32
static bool isCombineInstrCandidate32(unsigned Opc)
Definition AArch64InstrInfo.cpp:6635

parseCondBranch
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
Definition AArch64InstrInfo.cpp:204

offsetExtendOpcode
static unsigned offsetExtendOpcode(unsigned Opcode)
Definition AArch64InstrInfo.cpp:3628

MachineOutlinerMBBFlags
MachineOutlinerMBBFlags
Definition AArch64InstrInfo.cpp:9466

LRUnavailableSomewhere
@ LRUnavailableSomewhere
Definition AArch64InstrInfo.cpp:9467

HasCalls
@ HasCalls
Definition AArch64InstrInfo.cpp:9468

UnsafeRegsDead
@ UnsafeRegsDead
Definition AArch64InstrInfo.cpp:9469

loadRegPairFromStackSlot
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
Definition AArch64InstrInfo.cpp:5711

generateGatherLanePattern
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
Definition AArch64InstrInfo.cpp:7664

getMiscPatterns
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
Definition AArch64InstrInfo.cpp:7489

FMAInstKind
FMAInstKind
Definition AArch64InstrInfo.cpp:7873

FMAInstKind::Default
@ Default
Definition AArch64InstrInfo.cpp:7873

FMAInstKind::Accumulator
@ Accumulator
Definition AArch64InstrInfo.cpp:7873

FMAInstKind::Indexed
@ Indexed
Definition AArch64InstrInfo.cpp:7873

outliningCandidatesSigningKeyConsensus
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
Definition AArch64InstrInfo.cpp:9503

outliningCandidatesSigningScopeConsensus
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
Definition AArch64InstrInfo.cpp:9493

shouldClusterFI
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
Definition AArch64InstrInfo.cpp:4890

TBZDisplacementBits
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))

extractPhiReg
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
Definition AArch64InstrInfo.cpp:11060

createDefCFAExpression
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
Definition AArch64InstrInfo.cpp:5995

isDefinedOutside
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
Definition AArch64InstrInfo.cpp:11073

genFusedMultiply
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
Definition AArch64InstrInfo.cpp:7894

getGatherLanePattern
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
Definition AArch64InstrInfo.cpp:7542

genFusedMultiplyIdxNeg
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
Definition AArch64InstrInfo.cpp:8094

isCombineInstrCandidate
static bool isCombineInstrCandidate(unsigned Opc)
Definition AArch64InstrInfo.cpp:6715

regOffsetOpcode
static unsigned regOffsetOpcode(unsigned Opcode)
Definition AArch64InstrInfo.cpp:3373

MachineOutlinerClass
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
Definition AArch64InstrInfo.cpp:9458

MachineOutlinerTailCall
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
Definition AArch64InstrInfo.cpp:9460

MachineOutlinerRegSave
@ MachineOutlinerRegSave
Emit a call and tail-call.
Definition AArch64InstrInfo.cpp:9463

MachineOutlinerNoLRSave
@ MachineOutlinerNoLRSave
Only emit a branch.
Definition AArch64InstrInfo.cpp:9461

MachineOutlinerThunk
@ MachineOutlinerThunk
Emit a call and return.
Definition AArch64InstrInfo.cpp:9462

MachineOutlinerDefault
@ MachineOutlinerDefault
Definition AArch64InstrInfo.cpp:9459

BDisplacementBits
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))

areCFlagsAliveInSuccessors
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
Definition AArch64InstrInfo.cpp:1780

emitFrameOffsetAdj
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
Definition AArch64InstrInfo.cpp:6103

isCheapImmediate
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
Definition AArch64InstrInfo.cpp:999

CBZDisplacementBits
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))

canFoldIntoCSel
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
Definition AArch64InstrInfo.cpp:702

genSubAdd2SubSub
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
Definition AArch64InstrInfo.cpp:8163

signOutlinedFunction
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
Definition AArch64InstrInfo.cpp:10276

genFNegatedMAD
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
Definition AArch64InstrInfo.cpp:7955

canCombineWithMUL
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
Definition AArch64InstrInfo.cpp:6755

storeRegPairToStackSlot
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
Definition AArch64InstrInfo.cpp:5532

AArch64InstrInfo.h

AArch64MCTargetDesc.h

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AArch64MachineFunctionInfo.h

AArch64PointerAuth.h

AArch64Subtarget.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition ARMSLSHardening.cpp:72

AliasAnalysis.h

ArrayRef.h

getParent
static const Function * getParent(const Value *V)
Definition BasicAliasAnalysis.cpp:885

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CFIInstBuilder.h

Casting.h

CodeGen.h

CommandLine.h

DebugInfoMetadata.h

DebugLoc.h

Default
@ Default
Definition DwarfDebug.cpp:86

GlobalValue.h

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

Module.h
Module.h This file contains the declarations for the Module class.

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3368

LEB128.h

Options
static LVOptions Options
Definition LVOptions.cpp:25

LivePhysRegs.h
This file implements the LivePhysRegs utility for tracking liveness of physical registers.

MCAsmInfo.h

MCInstBuilder.h

MCInst.h

MCInstrDesc.h

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

MachineBasicBlock.h

Module
Machine Check Debug Module
Definition MachineCheckDebugify.cpp:124

MachineCombinerPattern.h

MachineFrameInfo.h

MachineFunction.h

MachineInstrBuilder.h

MachineInstr.h

MachineMemOperand.h

MachineModuleInfo.h

MachineOperand.h

MachineRegisterInfo.h

Reg
Register Reg
Definition MachineSink.cpp:2117

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

MathExtras.h

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition MipsDisassembler.cpp:106

OpIdx
MachineInstr unsigned OpIdx
Definition NVPTXPrologEpilogPass.cpp:56

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

Operation
PowerPC Reduce CR logical Operation
Definition PPCReduceCRLogicals.cpp:735

TBB
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
Definition RISCVRedundantCopyElimination.cpp:72

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:71

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:75

RegisterScavenging.h
This file declares the machine register scavenger class.

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

contains
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480

SmallSet.h
This file defines the SmallSet class.

SmallVector.h
This file defines the SmallVector class.

StackMaps.h

canCombine
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
Definition TargetInstrInfo.cpp:989

TargetOptions.h

TargetRegisterInfo.h

TargetSubtargetInfo.h

AArch64GenInstrInfo

llvm::AArch64FunctionInfo
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Definition AArch64MachineFunctionInfo.h:47

llvm::AArch64FunctionInfo::hasStreamingModeChanges
bool hasStreamingModeChanges() const
Definition AArch64MachineFunctionInfo.h:625

llvm::AArch64FunctionInfo::shouldSignReturnAddress
bool shouldSignReturnAddress(const MachineFunction &MF) const
Definition AArch64MachineFunctionInfo.cpp:186

llvm::AArch64FunctionInfo::setOutliningStyle
void setOutliningStyle(const std::string &Style)
Definition AArch64MachineFunctionInfo.h:357

llvm::AArch64FunctionInfo::hasRedZone
std::optional< bool > hasRedZone() const
Definition AArch64MachineFunctionInfo.h:462

llvm::AArch64FunctionInfo::shouldSignWithBKey
bool shouldSignWithBKey() const
Definition AArch64MachineFunctionInfo.h:599

llvm::AArch64InstrInfo
Definition AArch64InstrInfo.h:180

llvm::AArch64InstrInfo::isHForm
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
Definition AArch64InstrInfo.cpp:4745

llvm::AArch64InstrInfo::insertSelect
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
Definition AArch64InstrInfo.cpp:819

llvm::AArch64InstrInfo::hasBTISemantics
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
Definition AArch64InstrInfo.cpp:4773

llvm::AArch64InstrInfo::isQForm
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
Definition AArch64InstrInfo.cpp:4759

llvm::AArch64InstrInfo::getMemOpInfo
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
Definition AArch64InstrInfo.cpp:4076

llvm::AArch64InstrInfo::isTailCallReturnInst
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
Definition AArch64InstrInfo.cpp:2812

llvm::AArch64InstrInfo::isFPRCopy
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
Definition AArch64InstrInfo.cpp:2341

llvm::AArch64InstrInfo::emitLdStWithAddr
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
Definition AArch64InstrInfo.cpp:3720

llvm::AArch64InstrInfo::isCopyInstrImpl
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
Definition AArch64InstrInfo.cpp:10517

llvm::AArch64InstrInfo::getBranchDestBlock
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
Definition AArch64InstrInfo.cpp:282

llvm::AArch64InstrInfo::getInstSizeInBytes
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
Definition AArch64InstrInfo.cpp:100

llvm::AArch64InstrInfo::areMemAccessesTriviallyDisjoint
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Definition AArch64InstrInfo.cpp:1214

llvm::AArch64InstrInfo::copyPhysReg
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
Definition AArch64InstrInfo.cpp:5059

llvm::AArch64InstrInfo::isGPRCopy
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
Definition AArch64InstrInfo.cpp:2311

llvm::AArch64InstrInfo::convertToFlagSettingOpc
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
Definition AArch64InstrInfo.cpp:2830

llvm::AArch64InstrInfo::isBranchOffsetInRange
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
Definition AArch64InstrInfo.cpp:273

llvm::AArch64InstrInfo::canInsertSelect
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
Definition AArch64InstrInfo.cpp:770

llvm::AArch64InstrInfo::getLdStOffsetOp
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
Definition AArch64InstrInfo.cpp:4706

llvm::AArch64InstrInfo::isCoalescableExtInstr
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
Definition AArch64InstrInfo.cpp:1194

llvm::AArch64InstrInfo::getUnscaledLdSt
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
Definition AArch64InstrInfo.cpp:2464

llvm::AArch64InstrInfo::hasUnscaledLdStOffset
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
Definition AArch64InstrInfo.cpp:2428

llvm::AArch64InstrInfo::getLdStAmountOp
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
Definition AArch64InstrInfo.cpp:4715

llvm::AArch64InstrInfo::isPreLdSt
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
Definition AArch64InstrInfo.cpp:4673

llvm::AArch64InstrInfo::getAddrModeFromMemoryOp
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
Definition AArch64InstrInfo.cpp:3024

llvm::AArch64InstrInfo::getMemOperandsWithOffsetWidth
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Definition AArch64InstrInfo.cpp:3004

llvm::AArch64InstrInfo::analyzeBranchPredicate
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
Definition AArch64InstrInfo.cpp:496

llvm::AArch64InstrInfo::insertIndirectBranch
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
Definition AArch64InstrInfo.cpp:307

llvm::AArch64InstrInfo::isPairableLdStInst
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
Definition AArch64InstrInfo.cpp:2766

llvm::AArch64InstrInfo::getRegisterInfo
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition AArch64InstrInfo.h:190

llvm::AArch64InstrInfo::isPreSt
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
Definition AArch64InstrInfo.cpp:4660

llvm::AArch64InstrInfo::foldMemoryOperandImpl
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
Definition AArch64InstrInfo.cpp:6308

llvm::AArch64InstrInfo::insertNoop
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
Definition AArch64InstrInfo.cpp:6602

llvm::AArch64InstrInfo::AArch64InstrInfo
AArch64InstrInfo(const AArch64Subtarget &STI)
Definition AArch64InstrInfo.cpp:93

llvm::AArch64InstrInfo::isPairedLdSt
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
Definition AArch64InstrInfo.cpp:4677

llvm::AArch64InstrInfo::getMemOperandWithOffsetWidth
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Definition AArch64InstrInfo.cpp:4020

llvm::AArch64InstrInfo::isLoadFromStackSlot
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Definition AArch64InstrInfo.cpp:2360

llvm::AArch64InstrInfo::reverseBranchCondition
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Definition AArch64InstrInfo.cpp:547

llvm::AArch64InstrInfo::isStridedAccess
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
Definition AArch64InstrInfo.cpp:2422

llvm::AArch64InstrInfo::shouldClusterMemOps
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
Definition AArch64InstrInfo.cpp:4919

llvm::AArch64InstrInfo::removeBranch
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Definition AArch64InstrInfo.cpp:599

llvm::AArch64InstrInfo::isThroughputPattern
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
Definition AArch64InstrInfo.cpp:7374

llvm::AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
Definition AArch64InstrInfo.cpp:4069

llvm::AArch64InstrInfo::analyzeBranch
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
Definition AArch64InstrInfo.cpp:380

llvm::AArch64InstrInfo::canFoldIntoAddrMode
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
Definition AArch64InstrInfo.cpp:3042

llvm::AArch64InstrInfo::isLdStPairSuppressed
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
Definition AArch64InstrInfo.cpp:2408

llvm::AArch64InstrInfo::analyzeLoopForPipelining
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Definition AArch64InstrInfo.cpp:11171

llvm::AArch64InstrInfo::isSchedulingBoundary
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Definition AArch64InstrInfo.cpp:1253

llvm::AArch64InstrInfo::insertBranch
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Definition AArch64InstrInfo.cpp:660

llvm::AArch64InstrInfo::probedStackAlloc
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
Definition AArch64InstrInfo.cpp:10776

llvm::AArch64InstrInfo::optimizeCompareInstr
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
Definition AArch64InstrInfo.cpp:1683

llvm::AArch64InstrInfo::getLoadStoreImmIdx
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
Definition AArch64InstrInfo.cpp:2494

llvm::AArch64InstrInfo::isGPRZero
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
Definition AArch64InstrInfo.cpp:2287

llvm::AArch64InstrInfo::copyGPRRegTuple
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
Definition AArch64InstrInfo.cpp:5034

llvm::AArch64InstrInfo::analyzeCompare
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
Definition AArch64InstrInfo.cpp:1287

llvm::AArch64InstrInfo::getCombinerObjective
CombinerObjective getCombinerObjective(unsigned Pattern) const override
Definition AArch64InstrInfo.cpp:7829

llvm::AArch64InstrInfo::isFpOrNEON
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
Definition AArch64InstrInfo.cpp:4799

llvm::AArch64InstrInfo::isAsCheapAsAMove
bool isAsCheapAsAMove(const MachineInstr &MI) const override
Definition AArch64InstrInfo.cpp:1013

llvm::AArch64InstrInfo::isCopyLikeInstrImpl
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
Definition AArch64InstrInfo.cpp:10543

llvm::AArch64InstrInfo::suppressLdStPair
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Definition AArch64InstrInfo.cpp:2415

llvm::AArch64InstrInfo::isStoreToStackSlot
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Definition AArch64InstrInfo.cpp:2384

llvm::AArch64InstrInfo::isPreLd
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
Definition AArch64InstrInfo.cpp:4646

llvm::AArch64InstrInfo::storeRegToStackSlot
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Definition AArch64InstrInfo.cpp:5555

llvm::AArch64InstrInfo::loadRegFromStackSlot
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Definition AArch64InstrInfo.cpp:5736

llvm::AArch64InstrInfo::copyPhysRegTuple
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
Definition AArch64InstrInfo.cpp:5007

llvm::AArch64InstrInfo::optimizeCondBranch
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
Definition AArch64InstrInfo.cpp:9192

llvm::AArch64InstrInfo::getMemScale
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
Definition AArch64InstrInfo.cpp:4573

llvm::AArch64InstrInfo::isCandidateToMergeOrPair
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
Definition AArch64InstrInfo.cpp:2922

llvm::AArch64InstrInfo::getNop
MCInst getNop() const override
Definition AArch64InstrInfo.cpp:6608

llvm::AArch64InstrInfo::getLdStBaseOp
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
Definition AArch64InstrInfo.cpp:4697

llvm::AArch64RegisterInfo::isReservedReg
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
Definition AArch64RegisterInfo.cpp:567

llvm::AArch64Subtarget
Definition AArch64Subtarget.h:38

llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition AArch64Subtarget.h:148

llvm::AArch64Subtarget::isNeonAvailable
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
Definition AArch64Subtarget.h:190

llvm::AArch64Subtarget::isSVEorStreamingSVEAvailable
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
Definition AArch64Subtarget.h:209

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::DebugLoc
A debug info location.
Definition DebugLoc.h:124

llvm::DenseMapBase::empty
bool empty() const
Definition DenseMap.h:109

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222

llvm::DenseMap
Definition DenseMap.h:701

llvm::Function
Definition Function.h:64

llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706

llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703

llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition GlobalValue.h:663

llvm::Init
Definition Record.h:286

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:108

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition Instruction.h:312

llvm::LiveIntervals
Definition LiveIntervals.h:55

llvm::LocationSize
Definition MemoryLocation.h:67

llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition MemoryLocation.h:95

llvm::MCAsmInfo
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64

llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652

llvm::MCCFIInstruction
Definition MCDwarf.h:509

llvm::MCCFIInstruction::cfiDefCfa
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585

llvm::MCCFIInstruction::createOffset
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627

llvm::MCCFIInstruction::cfiDefCfaOffset
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600

llvm::MCCFIInstruction::createEscape
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697

llvm::MCInstBuilder
Definition MCInstBuilder.h:21

llvm::MCInstBuilder::addImm
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition MCInstBuilder.h:43

llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188

llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition MCInstrDesc.h:210

llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition MCInstrDesc.h:249

llvm::MCInstrDesc::operands
ArrayRef< MCOperandInfo > operands() const
Definition MCInstrDesc.h:251

llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33

llvm::MCRegister::isValid
constexpr bool isValid() const
Definition MCRegister.h:76

llvm::MCRegister::NoRegister
static constexpr unsigned NoRegister
Definition MCRegister.h:52

llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569

llvm::MIMetadata
Set of metadata that should be preserved when using BuildMI().
Definition MachineInstrBuilder.h:74

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::isInlineAsmBrIndirectTarget
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
Definition MachineBasicBlock.h:734

llvm::MachineBasicBlock::pred_size
unsigned pred_size() const
Definition MachineBasicBlock.h:439

llvm::MachineBasicBlock::transferSuccessorsAndUpdatePHIs
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
Definition MachineBasicBlock.cpp:955

llvm::MachineBasicBlock::empty
bool empty() const
Definition MachineBasicBlock.h:349

llvm::MachineBasicBlock::insert
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition MachineBasicBlock.cpp:1492

llvm::MachineBasicBlock::instr_rbegin
reverse_instr_iterator instr_rbegin()
Definition MachineBasicBlock.h:365

llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition MachineBasicBlock.h:372

llvm::MachineBasicBlock::getFirstTerminator
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition MachineBasicBlock.cpp:242

llvm::MachineBasicBlock::addSuccessor
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition MachineBasicBlock.cpp:816

llvm::MachineBasicBlock::begin
iterator begin()
Definition MachineBasicBlock.h:377

llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition MachineBasicBlock.h:367

llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition MachineBasicBlock.h:336

llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition MachineBasicBlock.h:363

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:379

llvm::MachineBasicBlock::const_instr_iterator
Instructions::const_iterator const_instr_iterator
Definition MachineBasicBlock.h:337

llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition MachineBasicBlock.h:478

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition MachineBasicBlock.h:323

llvm::MachineBasicBlock::erase
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition MachineBasicBlock.cpp:1479

llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition MachineBasicBlock.h:1156

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:341

llvm::MachineBasicBlock::setMachineBlockAddressTaken
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
Definition MachineBasicBlock.h:309

llvm::MachineBasicBlock::isLiveIn
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Definition MachineBasicBlock.cpp:636

llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition MachineFrameInfo.h:111

llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition MachineFrameInfo.h:603

llvm::MachineFrameInfo::setStackID
void setStackID(int ObjectIdx, uint8_t ID)
Definition MachineFrameInfo.h:771

llvm::MachineFrameInfo::isCalleeSavedInfoValid
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Definition MachineFrameInfo.h:838

llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition MachineFrameInfo.h:491

llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition MachineFrameInfo.h:477

llvm::MachineFrameInfo::getNumObjects
unsigned getNumObjects() const
Return the number of objects.
Definition MachineFrameInfo.h:423

llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition MachineFrameInfo.h:544

llvm::MachineFrameInfo::isFixedObjectIndex
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Definition MachineFrameInfo.h:716

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::addFrameInst
unsigned addFrameInst(const MCCFIInstruction &Inst)
Definition MachineFunction.cpp:334

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition MachineFunction.cpp:645

llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition MachineFunction.cpp:536

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition MachineFunction.h:778

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:772

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::iterator
BasicBlockListType::iterator iterator
Definition MachineFunction.h:966

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition MachineFunction.h:860

llvm::MachineFunction::getJumpTableInfo
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Definition MachineFunction.h:785

llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
Definition MachineFunction.cpp:499

llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition MachineFunction.h:1003

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition MachineFunction.h:758

llvm::MachineInstrBuilder
Definition MachineInstrBuilder.h:98

llvm::MachineInstrBuilder::setMemRefs
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
Definition MachineInstrBuilder.h:237

llvm::MachineInstrBuilder::addCFIIndex
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
Definition MachineInstrBuilder.h:275

llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition MachineInstrBuilder.h:306

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:160

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition MachineInstrBuilder.h:253

llvm::MachineInstrBuilder::addSym
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
Definition MachineInstrBuilder.h:295

llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition MachineInstrBuilder.h:181

llvm::MachineInstrBuilder::addGlobalAddress
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:206

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition MachineInstrBuilder.h:126

llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:175

llvm::MachineInstrBuilder::addUse
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Definition MachineInstrBuilder.h:152

llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition MachineInstrBuilder.h:301

llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition MachineInstrBuilder.h:231

llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition MachineInstrBuilder.h:145

llvm::MachineInstrBundleIterator::getReverse
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Definition MachineInstrBundleIterator.h:283

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:587

llvm::MachineInstr::mayLoadOrStore
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition MachineInstr.h:1159

llvm::MachineInstr::isCopy
bool isCopy() const
Definition MachineInstr.h:1431

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition MachineInstr.h:359

llvm::MachineInstr::isCall
bool isCall(QueryType Type=AnyInBundle) const
Definition MachineInstr.h:948

llvm::MachineInstr::getFlag
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition MachineInstr.h:409

llvm::MachineInstr::mergeFlagsWith
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
Definition MachineInstr.cpp:563

llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition MachineInstr.h:590

llvm::MachineInstr::getNumExplicitOperands
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
Definition MachineInstr.cpp:821

llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition MachineInstr.h:1136

llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition MachineInstr.h:584

llvm::MachineInstr::hasUnmodeledSideEffects
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
Definition MachineInstr.cpp:1642

llvm::MachineInstr::registerDefIsDead
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
Definition MachineInstr.h:1530

llvm::MachineInstr::definesRegister
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
Definition MachineInstr.h:1516

llvm::MachineInstr::setDesc
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
Definition MachineInstr.cpp:145

llvm::MachineInstr::hasOrderedMemoryRef
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition MachineInstr.cpp:1571

llvm::MachineInstr::MIFlag
MIFlag
Definition MachineInstr.h:86

llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition MachineInstr.h:90

llvm::MachineInstr::NoFlags
@ NoFlags
Definition MachineInstr.h:87

llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition MachineInstr.h:88

llvm::MachineInstr::FmReassoc
@ FmReassoc
Definition MachineInstr.h:106

llvm::MachineInstr::FmContract
@ FmContract
Definition MachineInstr.h:102

llvm::MachineInstr::FmNsz
@ FmNsz
Definition MachineInstr.h:98

llvm::MachineInstr::NoSWrap
@ NoSWrap
Definition MachineInstr.h:110

llvm::MachineInstr::getMF
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition MachineInstr.cpp:756

llvm::MachineInstr::memoperands
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition MachineInstr.h:780

llvm::MachineInstr::isLoadFoldBarrier
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
Definition MachineInstr.cpp:1654

llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition MachineInstr.h:511

llvm::MachineInstr::eraseFromParent
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition MachineInstr.cpp:770

llvm::MachineInstr::removeOperand
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
Definition MachineInstr.cpp:296

llvm::MachineInstr::addRegisterDefined
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
Definition MachineInstr.cpp:2257

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:595

llvm::MachineInstr::getFlags
uint32_t getFlags() const
Return the MI flags bitvector.
Definition MachineInstr.h:404

llvm::MachineInstr::findRegisterDefOperandIdx
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
Definition MachineInstr.cpp:1130

llvm::MachineJumpTableInfo::getJumpTables
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Definition MachineJumpTableInfo.h:113

llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition MachineMemOperand.h:130

llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition MachineMemOperand.h:137

llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition MachineMemOperand.h:139

llvm::MachineModuleInfo
This class contains meta information specific to a module.
Definition MachineModuleInfo.h:83

llvm::MachineModuleInfo::getMachineFunction
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
Definition MachineModuleInfo.cpp:72

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:48

llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition MachineOperand.h:489

llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition MachineOperand.h:373

llvm::MachineOperand::isUndef
bool isUndef() const
Definition MachineOperand.h:403

llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition MachineOperand.h:685

llvm::MachineOperand::getImm
int64_t getImm() const
Definition MachineOperand.h:556

llvm::MachineOperand::isKill
bool isKill() const
Definition MachineOperand.h:398

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:328

llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition MachineOperand.h:571

llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition MachineOperand.h:525

llvm::MachineOperand::setReg
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Definition MachineOperand.cpp:60

llvm::MachineOperand::isDef
bool isDef() const
Definition MachineOperand.h:383

llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition MachineOperand.h:330

llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition MachineOperand.h:519

llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition MachineOperand.h:243

llvm::MachineOperand::getIndex
int getIndex() const
Definition MachineOperand.h:576

llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition MachineOperand.h:824

llvm::MachineOperand::getType
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Definition MachineOperand.h:224

llvm::MachineOperand::setIsUndef
void setIsUndef(bool Val=true)
Definition MachineOperand.h:530

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:368

llvm::MachineOperand::isFI
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
Definition MachineOperand.h:338

llvm::MachineOperand::isIdenticalTo
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition MachineOperand.cpp:331

llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition MachineOperand.h:842

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::tracksLiveness
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Definition MachineRegisterInfo.h:209

llvm::MachineRegisterInfo::reservedRegsFrozen
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
Definition MachineRegisterInfo.h:950

llvm::MachineRegisterInfo::getRegClassOrNull
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
Definition MachineRegisterInfo.h:666

llvm::MachineRegisterInfo::constrainRegClass
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition MachineRegisterInfo.cpp:84

llvm::MachineRegisterInfo::getUniqueVRegDef
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition MachineRegisterInfo.cpp:417

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::PatchPointOpers
MI-level patchpoint operands.
Definition StackMaps.h:77

llvm::PatchPointOpers::getNumPatchBytes
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105

llvm::Pattern
Definition FileCheckImpl.h:587

llvm::RegScavenger
Definition RegisterScavenging.h:34

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:19

llvm::Register::asMCReg
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102

llvm::Register::isValid
constexpr bool isValid() const
Definition Register.h:107

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74

llvm::Register::isVirtualRegister
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:61

llvm::Register::isPhysical
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78

llvm::SMLoc
Represents a location in source code.
Definition SMLoc.h:23

llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition SmallPtrSet.h:98

llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition SmallPtrSet.h:404

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:389

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:527

llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133

llvm::SmallSet::empty
bool empty() const
Definition SmallSet.h:168

llvm::SmallSet::erase
bool erase(const T &V)
Definition SmallSet.h:199

llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26

llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68

llvm::SmallString::str
StringRef str() const
Explicit conversion to StringRef.
Definition SmallString.h:254

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:573

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:416

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition SmallVector.h:272

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:79

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition SmallVector.h:270

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1196

llvm::StackMapOpers
MI-level stackmap operands.
Definition StackMaps.h:36

llvm::StackMapOpers::getNumPatchBytes
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51

llvm::StackOffset
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31

llvm::StackOffset::getFixed
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47

llvm::StackOffset::getScalable
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50

llvm::StackOffset::get
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:42

llvm::StackOffset::getScalable
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41

llvm::StackOffset::getFixed
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40

llvm::StatepointOpers
MI-level Statepoint operands.
Definition StackMaps.h:159

llvm::StatepointOpers::getNumPatchBytes
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::TargetInstrInfo::PipelinerLoopInfo
Object returned by analyzeLoopForPipelining.
Definition TargetInstrInfo.h:794

llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition TargetInstrInfo.h:114

llvm::TargetInstrInfo::genAlternativeCodeSequence
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
Definition TargetInstrInfo.cpp:1489

llvm::TargetInstrInfo::describeLoadedValue
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
Definition TargetInstrInfo.cpp:1863

llvm::TargetInstrInfo::getMachineCombinerPatterns
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
Definition TargetInstrInfo.cpp:1154

llvm::TargetInstrInfo::isSchedulingBoundary
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Definition TargetInstrInfo.cpp:1690

llvm::TargetInstrInfo::getCombinerObjective
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
Definition TargetInstrInfo.cpp:1184

llvm::TargetInstrInfo::isFunctionSafeToSplit
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Definition TargetInstrInfo.cpp:1843

llvm::TargetMachine::Options
TargetOptions Options
Definition TargetMachine.h:124

llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition TargetMachine.h:240

llvm::TargetOptions
Definition TargetOptions.h:118

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition TargetRegisterInfo.h:95

llvm::TargetRegisterClass::hasSubClassEq
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
Definition TargetRegisterInfo.h:136

llvm::TargetRegisterClass::hasSuperClassEq
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
Definition TargetRegisterInfo.h:148

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition TargetRegisterInfo.h:242

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition TargetSubtargetInfo.h:65

llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition TargetSubtargetInfo.h:99

llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.

llvm::Target
Target - Wrapper for Target specific information.
Definition TargetRegistry.h:146

llvm::TypeSize
Definition TypeSize.h:333

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344

llvm::TypeSize::getScalable
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:232

llvm::VirtRegMap
Definition VirtRegMap.h:35

llvm::cl::opt
Definition CommandLine.h:1455

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition raw_ostream.h:662

uint16_t

uint32_t

uint64_t

Call
CallInst * Call
Definition ObjCARCOpts.cpp:2359

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

TargetMachine.h

llvm::AArch64CC::CondCode
CondCode
Definition AArch64BaseInfo.h:254

llvm::AArch64CC::VC
@ VC
Definition AArch64BaseInfo.h:262

llvm::AArch64CC::NE
@ NE
Definition AArch64BaseInfo.h:256

llvm::AArch64CC::GE
@ GE
Definition AArch64BaseInfo.h:265

llvm::AArch64CC::PL
@ PL
Definition AArch64BaseInfo.h:260

llvm::AArch64CC::EQ
@ EQ
Definition AArch64BaseInfo.h:255

llvm::AArch64CC::HS
@ HS
Definition AArch64BaseInfo.h:257

llvm::AArch64CC::MI
@ MI
Definition AArch64BaseInfo.h:259

llvm::AArch64CC::GT
@ GT
Definition AArch64BaseInfo.h:267

llvm::AArch64CC::LT
@ LT
Definition AArch64BaseInfo.h:266

llvm::AArch64CC::VS
@ VS
Definition AArch64BaseInfo.h:261

llvm::AArch64CC::HI
@ HI
Definition AArch64BaseInfo.h:263

llvm::AArch64CC::LO
@ LO
Definition AArch64BaseInfo.h:258

llvm::AArch64CC::LE
@ LE
Definition AArch64BaseInfo.h:268

llvm::AArch64CC::Invalid
@ Invalid
Definition AArch64BaseInfo.h:272

llvm::AArch64CC::LS
@ LS
Definition AArch64BaseInfo.h:264

llvm::AArch64CC::getInvertedCondCode
static CondCode getInvertedCondCode(CondCode Code)
Definition AArch64BaseInfo.h:303

llvm::AArch64II
Definition AArch64BaseInfo.h:806

llvm::AArch64II::MO_DLLIMPORT
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition AArch64BaseInfo.h:871

llvm::AArch64II::MO_NC
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
Definition AArch64BaseInfo.h:860

llvm::AArch64II::MO_G1
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
Definition AArch64BaseInfo.h:836

llvm::AArch64II::MO_S
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
Definition AArch64BaseInfo.h:875

llvm::AArch64II::MO_PAGEOFF
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
Definition AArch64BaseInfo.h:824

llvm::AArch64II::MO_GOT
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
Definition AArch64BaseInfo.h:855

llvm::AArch64II::MO_FRAGMENT
@ MO_FRAGMENT
Definition AArch64BaseInfo.h:814

llvm::AArch64II::MO_PREL
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
Definition AArch64BaseInfo.h:879

llvm::AArch64II::MO_G0
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
Definition AArch64BaseInfo.h:840

llvm::AArch64II::MO_ARM64EC_CALLMANGLE
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
Definition AArch64BaseInfo.h:893

llvm::AArch64II::MO_PAGE
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
Definition AArch64BaseInfo.h:819

llvm::AArch64II::MO_HI12
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
Definition AArch64BaseInfo.h:845

llvm::AArch64II::MO_TLS
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
Definition AArch64BaseInfo.h:866

llvm::AArch64II::MO_G2
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
Definition AArch64BaseInfo.h:832

llvm::AArch64II::MO_TAGGED
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
Definition AArch64BaseInfo.h:887

llvm::AArch64II::MO_G3
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
Definition AArch64BaseInfo.h:828

llvm::AArch64II::MO_COFFSTUB
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition AArch64BaseInfo.h:850

llvm::AArch64PAuth::getCheckerSizeInBytes
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
Definition AArch64PointerAuth.cpp:228

llvm::AArch64_AM::decodeLogicalImmediate
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
Definition AArch64AddressingModes.h:293

llvm::AArch64_AM::getShiftValue
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
Definition AArch64AddressingModes.h:85

llvm::AArch64_AM::getArithExtendImm
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
Definition AArch64AddressingModes.h:170

llvm::AArch64_AM::getArithShiftValue
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
Definition AArch64AddressingModes.h:118

llvm::AArch64_AM::ShiftExtendType
ShiftExtendType
Definition AArch64AddressingModes.h:32

llvm::AArch64_AM::SXTW
@ SXTW
Definition AArch64AddressingModes.h:47

llvm::AArch64_AM::LSL
@ LSL
Definition AArch64AddressingModes.h:34

llvm::AArch64_AM::UXTW
@ UXTW
Definition AArch64AddressingModes.h:42

llvm::AArch64_AM::ASR
@ ASR
Definition AArch64AddressingModes.h:36

llvm::AArch64_AM::UXTB
@ UXTB
Definition AArch64AddressingModes.h:40

llvm::AArch64_AM::MSL
@ MSL
Definition AArch64AddressingModes.h:38

llvm::AArch64_AM::UXTH
@ UXTH
Definition AArch64AddressingModes.h:41

llvm::AArch64_AM::UXTX
@ UXTX
Definition AArch64AddressingModes.h:43

llvm::AArch64_AM::encodeLogicalImmediate
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
Definition AArch64AddressingModes.h:282

llvm::AArch64_AM::getArithExtendType
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
Definition AArch64AddressingModes.h:138

llvm::AArch64_AM::getShiftType
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
Definition AArch64AddressingModes.h:73

llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition AArch64AddressingModes.h:98

llvm::AArch64_IMM::expandMOVImm
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
Definition AArch64ExpandImm.cpp:533

llvm::AArch64
Definition AArch64TargetParser.h:34

llvm::AArch64::ElementSizeB
@ ElementSizeB
Definition AArch64InstrInfo.h:812

llvm::AArch64::ElementSizeMask
@ ElementSizeMask
Definition AArch64InstrInfo.h:810

llvm::AArch64::InstrFlagIsWhile
static const uint64_t InstrFlagIsWhile
Definition AArch64InstrInfo.h:840

llvm::AArch64::InstrFlagIsPTestLike
static const uint64_t InstrFlagIsPTestLike
Definition AArch64InstrInfo.h:841

llvm::AArch64::OPERAND_SHIFT_MSL
@ OPERAND_SHIFT_MSL
Definition AArch64MCTargetDesc.h:73

llvm::AArch64::OPERAND_IMPLICIT_IMM_0
@ OPERAND_IMPLICIT_IMM_0
Definition AArch64MCTargetDesc.h:72

llvm::AMDGPU::Imm
@ Imm
Definition AMDGPURegBankLegalizeRules.h:129

llvm::ARM::ProfileKind::M
@ M
Definition ARMTargetParser.h:171

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:127

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::CodeModel::Large
@ Large
Definition CodeGen.h:31

llvm::CodeModel::Tiny
@ Tiny
Definition CodeGen.h:31

llvm::FPOpFusion::Fast
@ Fast
Definition TargetOptions.h:31

llvm::ISD::MUL
@ MUL
Definition ISDOpcodes.h:261

llvm::MCID
Definition MCInstrDesc.h:155

llvm::RISCVExceptFlags::OF
@ OF
Definition RISCVBaseInfo.h:575

llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition MachineInstrBuilder.h:49

llvm::RegState::Dead
@ Dead
Unused definition.
Definition MachineInstrBuilder.h:53

llvm::RegState::Renamable
@ Renamable
Register that may be renamed.
Definition MachineInstrBuilder.h:64

llvm::RegState::Define
@ Define
Register definition.
Definition MachineInstrBuilder.h:47

llvm::RegState::Kill
@ Kill
The last use of a register.
Definition MachineInstrBuilder.h:51

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition MachineInstrBuilder.h:55

llvm::SystemZISD::TM
@ TM
Definition SystemZISelLowering.h:66

llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition TargetFrameLowering.h:33

llvm::TargetStackID::Default
@ Default
Definition TargetFrameLowering.h:31

llvm::TargetStackID::ScalablePredicateVector
@ ScalablePredicateVector
Definition TargetFrameLowering.h:35

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:445

llvm::codeview::ProcSymFlags::IsNoReturn
@ IsNoReturn
Definition CodeView.h:421

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:409

llvm::dwarf_linker::DebugSectionKind::DebugLoc
@ DebugLoc
Definition DWARFLinkerBase.h:34

llvm::dwarf::LocationAtom
LocationAtom
Definition Dwarf.h:138

llvm::logicalview::LVWarningKind::Ranges
@ Ranges
Definition LVOptions.h:179

llvm::numbers::e
constexpr double e
Definition MathExtras.h:47

llvm::outliner::InstrType
InstrType
Represents how an instruction should be mapped by the outliner.
Definition MachineOutliner.h:34

llvm::outliner::Legal
@ Legal
Definition MachineOutliner.h:34

llvm::outliner::LegalTerminator
@ LegalTerminator
Definition MachineOutliner.h:34

llvm::outliner::Illegal
@ Illegal
Definition MachineOutliner.h:34

llvm::pdb::PDB_SymType::Callee
@ Callee
Definition PDBTypes.h:282

llvm::pdb::PDB_ColorItem::Comment
@ Comment
Definition LinePrinter.h:161

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm::sandboxir::back
LLVM_ABI Instruction & back() const

llvm::sframe::Flags
Flags
Definition SFrame.h:39

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Offset
@ Offset
Definition DWP.cpp:477

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725

llvm::isCondBranchOpcode
static bool isCondBranchOpcode(int Opc)
Definition AArch64InstrInfo.h:702

llvm::createDefCFA
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
Definition AArch64InstrInfo.cpp:6036

llvm::isPTrueOpcode
static bool isPTrueOpcode(unsigned Opc)
Definition AArch64InstrInfo.h:748

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:369

llvm::succeeded
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition LogicalResult.h:67

llvm::isAArch64FrameOffsetLegal
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
Definition AArch64InstrInfo.cpp:6459

llvm::isInt
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644

llvm::isIndirectBranchOpcode
static bool isIndirectBranchOpcode(int Opc)
Definition AArch64InstrInfo.h:723

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::getBLRCallOpcode
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
Definition AArch64InstrInfo.cpp:10768

llvm::AArch64FrameOffsetIsLegal
@ AArch64FrameOffsetIsLegal
Offset is legal.
Definition AArch64InstrInfo.h:677

llvm::AArch64FrameOffsetCanUpdate
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
Definition AArch64InstrInfo.h:678

llvm::AArch64FrameOffsetCannotUpdate
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
Definition AArch64InstrInfo.h:676

llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293

llvm::Desc
Op::Description Desc
Definition DWARFExpressionPrinter.cpp:23

llvm::Log2_64
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348

llvm::isSEHInstruction
static bool isSEHInstruction(const MachineInstr &MI)
Definition ARMBaseInstrInfo.h:759

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406

llvm::getImm
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition SPIRVUtils.cpp:996

llvm::get
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
Definition PointerIntPair.h:268

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622

llvm::AArch64MachineCombinerPattern
AArch64MachineCombinerPattern
Definition AArch64InstrInfo.h:37

llvm::MULSUBv8i16_OP2
@ MULSUBv8i16_OP2
Definition AArch64InstrInfo.h:76

llvm::GATHER_LANE_i16
@ GATHER_LANE_i16
Definition AArch64InstrInfo.h:177

llvm::MULSUBW_OP1
@ MULSUBW_OP1
Definition AArch64InstrInfo.h:45

llvm::FMLAv2f64_OP2
@ FMLAv2f64_OP2
Definition AArch64InstrInfo.h:127

llvm::FMULv4i16_indexed_OP1
@ FMULv4i16_indexed_OP1
Definition AArch64InstrInfo.h:167

llvm::FMLSv1i32_indexed_OP2
@ FMLSv1i32_indexed_OP2
Definition AArch64InstrInfo.h:140

llvm::MULSUBv2i32_indexed_OP1
@ MULSUBv2i32_indexed_OP1
Definition AArch64InstrInfo.h:95

llvm::MULADDXI_OP1
@ MULADDXI_OP1
Definition AArch64InstrInfo.h:53

llvm::FMULSUBS_OP2
@ FMULSUBS_OP2
Definition AArch64InstrInfo.h:108

llvm::FMLAv2i32_indexed_OP2
@ FMLAv2i32_indexed_OP2
Definition AArch64InstrInfo.h:133

llvm::FNMULSUBS_OP1
@ FNMULSUBS_OP1
Definition AArch64InstrInfo.h:114

llvm::MULADDv4i16_indexed_OP2
@ MULADDv4i16_indexed_OP2
Definition AArch64InstrInfo.h:83

llvm::FMLAv1i64_indexed_OP1
@ FMLAv1i64_indexed_OP1
Definition AArch64InstrInfo.h:118

llvm::MULSUBv16i8_OP1
@ MULSUBv16i8_OP1
Definition AArch64InstrInfo.h:71

llvm::FMLAv8i16_indexed_OP2
@ FMLAv8i16_indexed_OP2
Definition AArch64InstrInfo.h:131

llvm::FMULADDH_OP1
@ FMULADDH_OP1
Definition AArch64InstrInfo.h:101

llvm::FMULv2i32_indexed_OP1
@ FMULv2i32_indexed_OP1
Definition AArch64InstrInfo.h:163

llvm::MULSUBv8i16_indexed_OP2
@ MULSUBv8i16_indexed_OP2
Definition AArch64InstrInfo.h:94

llvm::FMLSv4f16_OP2
@ FMLSv4f16_OP2
Definition AArch64InstrInfo.h:143

llvm::FMLSv2f32_OP1
@ FMLSv2f32_OP1
Definition AArch64InstrInfo.h:146

llvm::FNMULSUBH_OP1
@ FNMULSUBH_OP1
Definition AArch64InstrInfo.h:113

llvm::FMLAv1i64_indexed_OP2
@ FMLAv1i64_indexed_OP2
Definition AArch64InstrInfo.h:119

llvm::MULSUBv4i16_indexed_OP2
@ MULSUBv4i16_indexed_OP2
Definition AArch64InstrInfo.h:92

llvm::FMLSv2f32_OP2
@ FMLSv2f32_OP2
Definition AArch64InstrInfo.h:147

llvm::FMLAv1i32_indexed_OP1
@ FMLAv1i32_indexed_OP1
Definition AArch64InstrInfo.h:116

llvm::FMLAv2i64_indexed_OP2
@ FMLAv2i64_indexed_OP2
Definition AArch64InstrInfo.h:135

llvm::FMLAv2f64_OP1
@ FMLAv2f64_OP1
Definition AArch64InstrInfo.h:126

llvm::FMLSv8i16_indexed_OP1
@ FMLSv8i16_indexed_OP1
Definition AArch64InstrInfo.h:152

llvm::MULSUBv2i32_OP1
@ MULSUBv2i32_OP1
Definition AArch64InstrInfo.h:77

llvm::FMULv4i16_indexed_OP2
@ FMULv4i16_indexed_OP2
Definition AArch64InstrInfo.h:168

llvm::MULADDX_OP1
@ MULADDX_OP1
Definition AArch64InstrInfo.h:49

llvm::SUBADD_OP1
@ SUBADD_OP1
Definition AArch64InstrInfo.h:39

llvm::MULSUBv4i32_indexed_OP2
@ MULSUBv4i32_indexed_OP2
Definition AArch64InstrInfo.h:98

llvm::FMULv2i64_indexed_OP2
@ FMULv2i64_indexed_OP2
Definition AArch64InstrInfo.h:166

llvm::FNMADD
@ FNMADD
Definition AArch64InstrInfo.h:174

llvm::MULSUBXI_OP1
@ MULSUBXI_OP1
Definition AArch64InstrInfo.h:54

llvm::FMULSUBD_OP2
@ FMULSUBD_OP2
Definition AArch64InstrInfo.h:112

llvm::FMLAv4i32_indexed_OP1
@ FMLAv4i32_indexed_OP1
Definition AArch64InstrInfo.h:138

llvm::FMLAv8f16_OP1
@ FMLAv8f16_OP1
Definition AArch64InstrInfo.h:122

llvm::MULADDWI_OP1
@ MULADDWI_OP1
Definition AArch64InstrInfo.h:47

llvm::MULADDv4i16_OP2
@ MULADDv4i16_OP2
Definition AArch64InstrInfo.h:61

llvm::FMULSUBH_OP2
@ FMULSUBH_OP2
Definition AArch64InstrInfo.h:104

llvm::FMULv8i16_indexed_OP2
@ FMULv8i16_indexed_OP2
Definition AArch64InstrInfo.h:172

llvm::MULSUBv4i16_OP1
@ MULSUBv4i16_OP1
Definition AArch64InstrInfo.h:73

llvm::FMULSUBD_OP1
@ FMULSUBD_OP1
Definition AArch64InstrInfo.h:111

llvm::FMLSv2f64_OP2
@ FMLSv2f64_OP2
Definition AArch64InstrInfo.h:149

llvm::MULADDv4i32_OP2
@ MULADDv4i32_OP2
Definition AArch64InstrInfo.h:67

llvm::FMULADDS_OP1
@ FMULADDS_OP1
Definition AArch64InstrInfo.h:105

llvm::MULADDv8i8_OP1
@ MULADDv8i8_OP1
Definition AArch64InstrInfo.h:56

llvm::FMULADDD_OP1
@ FMULADDD_OP1
Definition AArch64InstrInfo.h:109

llvm::MULSUBX_OP2
@ MULSUBX_OP2
Definition AArch64InstrInfo.h:52

llvm::MULADDv2i32_OP2
@ MULADDv2i32_OP2
Definition AArch64InstrInfo.h:65

llvm::MULADDv16i8_OP2
@ MULADDv16i8_OP2
Definition AArch64InstrInfo.h:59

llvm::MULADDv8i8_OP2
@ MULADDv8i8_OP2
Definition AArch64InstrInfo.h:57

llvm::FMULSUBH_OP1
@ FMULSUBH_OP1
Definition AArch64InstrInfo.h:103

llvm::FMLSv4i16_indexed_OP1
@ FMLSv4i16_indexed_OP1
Definition AArch64InstrInfo.h:150

llvm::MULADDv16i8_OP1
@ MULADDv16i8_OP1
Definition AArch64InstrInfo.h:58

llvm::FMLAv2i64_indexed_OP1
@ FMLAv2i64_indexed_OP1
Definition AArch64InstrInfo.h:134

llvm::FMLAv1i32_indexed_OP2
@ FMLAv1i32_indexed_OP2
Definition AArch64InstrInfo.h:117

llvm::FMLAv4f16_OP2
@ FMLAv4f16_OP2
Definition AArch64InstrInfo.h:121

llvm::FMLSv2i64_indexed_OP2
@ FMLSv2i64_indexed_OP2
Definition AArch64InstrInfo.h:157

llvm::MULADDv2i32_OP1
@ MULADDv2i32_OP1
Definition AArch64InstrInfo.h:64

llvm::MULADDv4i32_OP1
@ MULADDv4i32_OP1
Definition AArch64InstrInfo.h:66

llvm::FMLSv4f16_OP1
@ FMLSv4f16_OP1
Definition AArch64InstrInfo.h:142

llvm::FMLSv8f16_OP2
@ FMLSv8f16_OP2
Definition AArch64InstrInfo.h:145

llvm::FMLSv4f32_OP2
@ FMLSv4f32_OP2
Definition AArch64InstrInfo.h:159

llvm::MULADDv2i32_indexed_OP1
@ MULADDv2i32_indexed_OP1
Definition AArch64InstrInfo.h:86

llvm::FMULADDD_OP2
@ FMULADDD_OP2
Definition AArch64InstrInfo.h:110

llvm::MULSUBv16i8_OP2
@ MULSUBv16i8_OP2
Definition AArch64InstrInfo.h:72

llvm::MULADDX_OP2
@ MULADDX_OP2
Definition AArch64InstrInfo.h:50

llvm::FMLSv4f32_OP1
@ FMLSv4f32_OP1
Definition AArch64InstrInfo.h:158

llvm::MULADDv4i32_indexed_OP1
@ MULADDv4i32_indexed_OP1
Definition AArch64InstrInfo.h:88

llvm::MULADDv2i32_indexed_OP2
@ MULADDv2i32_indexed_OP2
Definition AArch64InstrInfo.h:87

llvm::FMLAv4i16_indexed_OP2
@ FMLAv4i16_indexed_OP2
Definition AArch64InstrInfo.h:129

llvm::MULSUBv8i16_OP1
@ MULSUBv8i16_OP1
Definition AArch64InstrInfo.h:75

llvm::FMLAv8f16_OP2
@ FMLAv8f16_OP2
Definition AArch64InstrInfo.h:123

llvm::FMULv2i32_indexed_OP2
@ FMULv2i32_indexed_OP2
Definition AArch64InstrInfo.h:164

llvm::FMLSv2f64_OP1
@ FMLSv2f64_OP1
Definition AArch64InstrInfo.h:148

llvm::GATHER_LANE_i8
@ GATHER_LANE_i8
Definition AArch64InstrInfo.h:178

llvm::FMLSv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
Definition AArch64InstrInfo.h:155

llvm::FMULSUBS_OP1
@ FMULSUBS_OP1
Definition AArch64InstrInfo.h:107

llvm::FMLSv4i32_indexed_OP1
@ FMLSv4i32_indexed_OP1
Definition AArch64InstrInfo.h:160

llvm::FMULv2i64_indexed_OP1
@ FMULv2i64_indexed_OP1
Definition AArch64InstrInfo.h:165

llvm::MULSUBv4i16_OP2
@ MULSUBv4i16_OP2
Definition AArch64InstrInfo.h:74

llvm::FMLSv4i16_indexed_OP2
@ FMLSv4i16_indexed_OP2
Definition AArch64InstrInfo.h:151

llvm::FMLAv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP1
Definition AArch64InstrInfo.h:132

llvm::GATHER_LANE_i32
@ GATHER_LANE_i32
Definition AArch64InstrInfo.h:176

llvm::FMLSv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
Definition AArch64InstrInfo.h:154

llvm::FNMULSUBD_OP1
@ FNMULSUBD_OP1
Definition AArch64InstrInfo.h:115

llvm::FMLAv8i16_indexed_OP1
@ FMLAv8i16_indexed_OP1
Definition AArch64InstrInfo.h:130

llvm::MULSUBv4i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
Definition AArch64InstrInfo.h:91

llvm::MULADDW_OP2
@ MULADDW_OP2
Definition AArch64InstrInfo.h:44

llvm::FMULADDH_OP2
@ FMULADDH_OP2
Definition AArch64InstrInfo.h:102

llvm::FMULADDS_OP2
@ FMULADDS_OP2
Definition AArch64InstrInfo.h:106

llvm::FMLSv4i32_indexed_OP2
@ FMLSv4i32_indexed_OP2
Definition AArch64InstrInfo.h:161

llvm::MULADDv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
Definition AArch64InstrInfo.h:89

llvm::MULSUBv4i32_OP2
@ MULSUBv4i32_OP2
Definition AArch64InstrInfo.h:80

llvm::MULSUBv8i16_indexed_OP1
@ MULSUBv8i16_indexed_OP1
Definition AArch64InstrInfo.h:93

llvm::MULADDv8i16_OP2
@ MULADDv8i16_OP2
Definition AArch64InstrInfo.h:63

llvm::MULSUBv2i32_indexed_OP2
@ MULSUBv2i32_indexed_OP2
Definition AArch64InstrInfo.h:96

llvm::FMLSv8f16_OP1
@ FMLSv8f16_OP1
Definition AArch64InstrInfo.h:144

llvm::MULADDW_OP1
@ MULADDW_OP1
Definition AArch64InstrInfo.h:43

llvm::FMULv4i32_indexed_OP2
@ FMULv4i32_indexed_OP2
Definition AArch64InstrInfo.h:170

llvm::FMLSv2i64_indexed_OP1
@ FMLSv2i64_indexed_OP1
Definition AArch64InstrInfo.h:156

llvm::FMLAv2f32_OP2
@ FMLAv2f32_OP2
Definition AArch64InstrInfo.h:124

llvm::FMLAv2f32_OP1
@ FMLAv2f32_OP1
Definition AArch64InstrInfo.h:125

llvm::SUBADD_OP2
@ SUBADD_OP2
Definition AArch64InstrInfo.h:40

llvm::MULADDv4i16_OP1
@ MULADDv4i16_OP1
Definition AArch64InstrInfo.h:60

llvm::FMLAv4i32_indexed_OP2
@ FMLAv4i32_indexed_OP2
Definition AArch64InstrInfo.h:139

llvm::MULADDv8i16_indexed_OP1
@ MULADDv8i16_indexed_OP1
Definition AArch64InstrInfo.h:84

llvm::FMLAv4f16_OP1
@ FMLAv4f16_OP1
Definition AArch64InstrInfo.h:120

llvm::FMULv4i32_indexed_OP1
@ FMULv4i32_indexed_OP1
Definition AArch64InstrInfo.h:169

llvm::FMLAv4i16_indexed_OP1
@ FMLAv4i16_indexed_OP1
Definition AArch64InstrInfo.h:128

llvm::FMULv8i16_indexed_OP1
@ FMULv8i16_indexed_OP1
Definition AArch64InstrInfo.h:171

llvm::MULSUBv8i8_OP1
@ MULSUBv8i8_OP1
Definition AArch64InstrInfo.h:69

llvm::MULSUBW_OP2
@ MULSUBW_OP2
Definition AArch64InstrInfo.h:46

llvm::MULADDv8i16_OP1
@ MULADDv8i16_OP1
Definition AArch64InstrInfo.h:62

llvm::MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_indexed_OP1
Definition AArch64InstrInfo.h:97

llvm::MULSUBv4i32_OP1
@ MULSUBv4i32_OP1
Definition AArch64InstrInfo.h:79

llvm::FMLSv8i16_indexed_OP2
@ FMLSv8i16_indexed_OP2
Definition AArch64InstrInfo.h:153

llvm::FMLAv4f32_OP2
@ FMLAv4f32_OP2
Definition AArch64InstrInfo.h:137

llvm::MULADDv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
Definition AArch64InstrInfo.h:85

llvm::MULSUBWI_OP1
@ MULSUBWI_OP1
Definition AArch64InstrInfo.h:48

llvm::FMLAv4f32_OP1
@ FMLAv4f32_OP1
Definition AArch64InstrInfo.h:136

llvm::MULSUBX_OP1
@ MULSUBX_OP1
Definition AArch64InstrInfo.h:51

llvm::MULSUBv2i32_OP2
@ MULSUBv2i32_OP2
Definition AArch64InstrInfo.h:78

llvm::FMLSv1i64_indexed_OP2
@ FMLSv1i64_indexed_OP2
Definition AArch64InstrInfo.h:141

llvm::MULADDv4i16_indexed_OP1
@ MULADDv4i16_indexed_OP1
Definition AArch64InstrInfo.h:82

llvm::MULSUBv8i8_OP2
@ MULSUBv8i8_OP2
Definition AArch64InstrInfo.h:70

llvm::emitFrameOffset
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
Definition AArch64InstrInfo.cpp:6237

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167

llvm::CombinerObjective
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
Definition MachineCombinerPattern.h:21

llvm::CombinerObjective::MustReduceDepth
@ MustReduceDepth
Definition MachineCombinerPattern.h:22

llvm::examineCFlagsUse
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
Definition AArch64InstrInfo.cpp:1878

llvm::CodeGenOptLevel
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82

llvm::CodeGenOptLevel::Aggressive
@ Aggressive
-O3
Definition CodeGen.h:86

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1122

llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition MachineBasicBlock.h:1515

llvm::errs
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition raw_ostream.cpp:908

llvm::drop_end
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323

llvm::getUndefRegState
unsigned getUndefRegState(bool B)
Definition MachineInstrBuilder.h:549

llvm::getXRegFromWReg
static MCRegister getXRegFromWReg(MCRegister Reg)
Definition AArch64BaseInfo.h:70

llvm::getDefRegState
unsigned getDefRegState(bool B)
Definition MachineInstrBuilder.h:537

llvm::createCFAOffset
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
Definition AArch64InstrInfo.cpp:6051

llvm::getKillRegState
unsigned getKillRegState(bool B)
Definition MachineInstrBuilder.h:543

llvm::Next
FunctionAddr VTableAddr Next
Definition InstrProf.h:141

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::isUncondBranchOpcode
static bool isUncondBranchOpcode(int Opc)
Definition AArch64InstrInfo.h:700

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560

llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120

llvm::isIntN
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257

llvm::rewriteAArch64FrameIndex
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
Definition AArch64InstrInfo.cpp:6567

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897

llvm::MOSuppressPair
static const MachineMemOperand::Flags MOSuppressPair
Definition AArch64InstrInfo.h:29

llvm::SignExtend64
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583

llvm::appendLEB128
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:238

llvm::ParamLoadedValue
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
Definition TargetInstrInfo.h:73

llvm::isNZCVTouchedInInstructionRange
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
Definition AArch64InstrInfo.cpp:5891

llvm::seq
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305

llvm::MOStridedAccess
static const MachineMemOperand::Flags MOStridedAccess
Definition AArch64InstrInfo.h:31

llvm::fullyRecomputeLiveIns
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition LivePhysRegs.h:225

llvm::printReg
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition TargetRegisterInfo.cpp:105

Status
Definition SIModeRegister.cpp:29

llvm::AArch64SysReg::SysReg::Encoding
unsigned Encoding
Definition AArch64BaseInfo.h:771

llvm::DestSourcePair
Definition TargetInstrInfo.h:75

llvm::ExtAddrMode
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
Definition TargetInstrInfo.h:95

llvm::ExtAddrMode::BaseReg
Register BaseReg
Definition TargetInstrInfo.h:102

llvm::ExtAddrMode::ScaledReg
Register ScaledReg
Definition TargetInstrInfo.h:103

llvm::ExtAddrMode::Form
Formula Form
Definition TargetInstrInfo.h:106

llvm::ExtAddrMode::Displacement
int64_t Displacement
Definition TargetInstrInfo.h:105

llvm::ExtAddrMode::Scale
int64_t Scale
Definition TargetInstrInfo.h:104

llvm::ExtAddrMode::Formula
Formula
Definition TargetInstrInfo.h:96

llvm::ExtAddrMode::Formula::Basic
@ Basic
Definition TargetInstrInfo.h:97

llvm::ExtAddrMode::Formula::ZExtScaledReg
@ ZExtScaledReg
Definition TargetInstrInfo.h:99

llvm::ExtAddrMode::Formula::SExtScaledReg
@ SExtScaledReg
Definition TargetInstrInfo.h:98

llvm::MBBSectionID::ColdSectionID
LLVM_ABI static const MBBSectionID ColdSectionID
Definition MachineBasicBlock.h:70

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition MachineMemOperand.h:42

llvm::MachinePointerInfo::getFixedStack
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition MachineOperand.cpp:1077

llvm::UsedNZCV
Definition AArch64InstrInfo.h:613

llvm::UsedNZCV::V
bool V
Definition AArch64InstrInfo.h:617

llvm::UsedNZCV::C
bool C
Definition AArch64InstrInfo.h:616

llvm::UsedNZCV::N
bool N
Definition AArch64InstrInfo.h:614

llvm::UsedNZCV::Z
bool Z
Definition AArch64InstrInfo.h:615

llvm::cl::desc
Definition CommandLine.h:411

llvm::outliner::Candidate
An individual sequence of instructions to be replaced with a call to an outlined function.
Definition MachineOutliner.h:38

llvm::outliner::Candidate::getMF
MachineFunction * getMF() const
Definition MachineOutliner.h:144

llvm::outliner::Candidate::back
MachineInstr & back()
Definition MachineOutliner.h:143

llvm::outliner::OutlinedFunction
The information necessary to create an outlined function for some class of candidate.
Definition MachineOutliner.h:218