docs/doxygen/AggressiveInstCombine_8cpp_source.html

//===- AggressiveInstCombine.cpp ------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file implements the aggressive expression pattern combiner classes.

// Currently, it handles expression patterns for:

//  * Truncate instruction

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"

#include "AggressiveInstCombineInternal.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/AliasAnalysis.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/BasicAliasAnalysis.h"

#include "llvm/Analysis/ConstantFolding.h"

#include "llvm/Analysis/DomTreeUpdater.h"

#include "llvm/Analysis/GlobalsModRef.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/MDBuilder.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/ProfDataUtils.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/BuildLibCalls.h"

#include "llvm/Transforms/Utils/Local.h"


using namespace llvm;

using namespace PatternMatch;


#define DEBUG_TYPE "aggressive-instcombine"


namespace llvm {

extern cl::opt<bool> ProfcheckDisableMetadataFixes;

}


STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");

STATISTIC(NumGuardedRotates,

          "Number of guarded rotates transformed into funnel shifts");

STATISTIC(NumGuardedFunnelShifts,

          "Number of guarded funnel shifts transformed into funnel shifts");

STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");

STATISTIC(NumSelectCTTZFolded,

          "Number of select-based split cttz patterns folded");

STATISTIC(NumSelectCTLZFolded,

          "Number of select-based split ctlz patterns folded");


static cl::opt<unsigned> MaxInstrsToScan(

    "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,

    cl::desc("Max number of instructions to scan for aggressive instcombine."));


static cl::opt<unsigned> StrNCmpInlineThreshold(

    "strncmp-inline-threshold", cl::init(3), cl::Hidden,

    cl::desc("The maximum length of a constant string for a builtin string cmp "

             "call eligible for inlining. The default value is 3."));


static cl::opt<unsigned>

    MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,

                          cl::desc("The maximum length of a constant string to "

                                   "inline a memchr call."));


/// Try to fold a select-based split cttz pattern into a single full-width cttz.

///

///   %lo = trunc iN %val to i(N/2)

///   %cmp = icmp eq i(N/2) %lo, 0

///   %shr = lshr iN %val, N/2

///   %hi = trunc iN %shr to i(N/2)

///   %cttz_hi = call i(N/2) @llvm.cttz.i(N/2)(i(N/2) %hi, ...)

///   %hi_plus = add/or_disjoint i(N/2) %cttz_hi, N/2

///   %cttz_lo = call i(N/2) @llvm.cttz.i(N/2)(i(N/2) %lo, ...)

///   %result = select i1 %cmp, i(N/2) %hi_plus, i(N/2) %cttz_lo

/// -->

///   %cttz_wide = call iN @llvm.cttz.iN(iN %val, i1 false)

///   %result = trunc iN %cttz_wide to i(N/2)

/// Alive proof (for i64/i32):  https://alive2.llvm.org/ce/z/-s14-s


static bool foldSelectSplitCTTZ(Instruction &I) {

  Value *Cond, *TrueVal, *FalseVal;

  if (!match(&I, m_Select(m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal))))

    return false;


  Type *HalfTy = I.getType();

  if (!HalfTy->isIntegerTy())

    return false;

  unsigned HalfWidth = HalfTy->getIntegerBitWidth();


  // Bail out on very small types (i1, i2): the full-width cttz can return

  // values not representable in the half type (e.g., cttz.i4 can return 4,

  // which doesn't fit in i2).

  if (HalfWidth <= 2)

    return false;


  unsigned FullWidth = HalfWidth * 2;


  // select (icmp eq (trunc SrcVal to i(N/2)), 0), HiResult, LoResult

  // Or select (icmp ne ...), LoResult, HiResult

  Value *LoTrunc;

  Value *HiResult, *LoResult;

  if (match(Cond,

            m_SpecificICmp(CmpInst::ICMP_EQ, m_Value(LoTrunc), m_ZeroInt()))) {

    HiResult = TrueVal;

    LoResult = FalseVal;

  } else if (match(Cond, m_SpecificICmp(CmpInst::ICMP_NE, m_Value(LoTrunc),

                                        m_ZeroInt()))) {

    HiResult = FalseVal;

    LoResult = TrueVal;

  } else {

    return false;

  }


  // LoTrunc: trunc iN SrcVal to i(N/2)

  Value *SrcVal;

  if (!match(LoTrunc, m_Trunc(m_Value(SrcVal))))

    return false;

  if (!SrcVal->getType()->isIntegerTy(FullWidth))

    return false;


  // LoResult: cttz(trunc(SrcVal), _),  must use same truncated value

  if (!match(LoResult, m_OneUse(m_Cttz(m_Specific(LoTrunc), m_Value()))))

    return false;


  // HiResult: add/or_disjoint(cttz(trunc(lshr(SrcVal, N/2)), _), N/2)

  Value *CttzHiCall;

  if (!match(HiResult, m_OneUse(m_AddLike(m_Value(CttzHiCall),

                                          m_SpecificInt(HalfWidth)))))

    return false;


  Value *HiCttzArg;

  if (!match(CttzHiCall, m_OneUse(m_Cttz(m_Value(HiCttzArg), m_Value()))))

    return false;


  if (!match(HiCttzArg,

             m_Trunc(m_LShr(m_Specific(SrcVal), m_SpecificInt(HalfWidth)))))

    return false;


  // Match successful.

  IRBuilder<> Builder(&I);

  Value *CttzWide = Builder.CreateIntrinsic(

      Intrinsic::cttz, {SrcVal->getType()}, {SrcVal, Builder.getFalse()});

  Value *Trunc = Builder.CreateTrunc(CttzWide, HalfTy);


  I.replaceAllUsesWith(Trunc);

  ++NumSelectCTTZFolded;

  return true;

}


/// Same as foldSelectSplitCTTZ but for leading zeros (ctlz).

///

///   %shr = lshr iN %val, N/2

///   %hi = trunc iN %shr to i(N/2)

///   %cmp = icmp eq i(N/2) %hi, 0   (or icmp eq iN %shr, 0)

///   %lo = trunc iN %val to i(N/2)

///   %ctlz_lo = call i(N/2) @llvm.ctlz.i(N/2)(i(N/2) %lo, ...)

///   %lo_plus = add/or_disjoint i(N/2) %ctlz_lo, N/2

///   %ctlz_hi = call i(N/2) @llvm.ctlz.i(N/2)(i(N/2) %hi, ...)

///   %result = select i1 %cmp, i(N/2) %lo_plus, i(N/2) %ctlz_hi

/// -->

///   %ctlz_wide = call iN @llvm.ctlz.iN(iN %val, i1 false)

///   %result = trunc iN %ctlz_wide to i(N/2)

///

/// Alive proof (for i64/i32): https://alive2.llvm.org/ce/z/WfQepH


static bool foldSelectSplitCTLZ(Instruction &I) {

  Value *Cond, *TrueVal, *FalseVal;

  if (!match(&I, m_Select(m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal))))

    return false;


  Type *HalfTy = I.getType();

  if (!HalfTy->isIntegerTy())

    return false;

  unsigned HalfWidth = HalfTy->getIntegerBitWidth();


  // Bail out on very small types (i1, i2): the full-width ctlz can return

  // values not representable in the half type (e.g., ctlz.i4 can return 4,

  // which doesn't fit in i2).

  if (HalfWidth <= 2)

    return false;


  unsigned FullWidth = HalfWidth * 2;


  // select (icmp eq HiPart, 0), LoResult, HiResult

  // HiPart could be (trunc (lshr SrcVal, N/2) to i(N/2)) or (lshr SrcVal, N/2)

  Value *HiPart;

  Value *LoResult, *HiResult;

  if (match(Cond,

            m_SpecificICmp(CmpInst::ICMP_EQ, m_Value(HiPart), m_ZeroInt()))) {

    LoResult = TrueVal;  // upper is zero: count in lower + N/2

    HiResult = FalseVal; // upper non-zero: count in upper

  } else if (match(Cond, m_SpecificICmp(CmpInst::ICMP_NE, m_Value(HiPart),

                                        m_ZeroInt()))) {

    LoResult = FalseVal;

    HiResult = TrueVal;

  } else {

    return false;

  }


  // Extract SrcVal from HiPart: either trunc(lshr(SrcVal, N/2)) or

  // lshr(SrcVal, N/2)

  Value *SrcVal;

  if (match(HiPart,

            m_Trunc(m_LShr(m_Value(SrcVal), m_SpecificInt(HalfWidth))))) {

    // HiPart is trunc(lshr(SrcVal, N/2))

  } else if (match(HiPart, m_LShr(m_Value(SrcVal), m_SpecificInt(HalfWidth)))) {

    // HiPart is lshr(SrcVal, N/2)

  } else {

    return false;

  }

  if (!SrcVal->getType()->isIntegerTy(FullWidth))

    return false;


  // HiResult: ctlz(trunc(lshr(SrcVal, N/2)), _)

  Value *HiCtlzArg;

  if (!match(HiResult, m_OneUse(m_Ctlz(m_Value(HiCtlzArg), m_Value()))))

    return false;


  if (!match(HiCtlzArg,

             m_Trunc(m_LShr(m_Specific(SrcVal), m_SpecificInt(HalfWidth)))))

    return false;


  // LoResult: add/or_disjoint(ctlz(trunc(SrcVal), _), N/2)

  Value *CtlzLoCall;

  if (!match(LoResult, m_OneUse(m_AddLike(m_Value(CtlzLoCall),

                                          m_SpecificInt(HalfWidth)))))

    return false;


  Value *LoCtlzArg;

  if (!match(CtlzLoCall, m_OneUse(m_Ctlz(m_Value(LoCtlzArg), m_Value()))))

    return false;


  if (!match(LoCtlzArg, m_Trunc(m_Specific(SrcVal))))

    return false;


  // Match successful.

  IRBuilder<> Builder(&I);

  Value *CtlzWide = Builder.CreateIntrinsic(

      Intrinsic::ctlz, {SrcVal->getType()}, {SrcVal, Builder.getFalse()});

  Value *Trunc = Builder.CreateTrunc(CtlzWide, HalfTy);


  I.replaceAllUsesWith(Trunc);

  ++NumSelectCTLZFolded;

  return true;

}


/// Match a pattern for a bitwise funnel/rotate operation that partially guards

/// against undefined behavior by branching around the funnel-shift/rotation

/// when the shift amount is 0.


static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {

  if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)

    return false;


  // As with the one-use checks below, this is not strictly necessary, but we

  // are being cautious to avoid potential perf regressions on targets that

  // do not actually have a funnel/rotate instruction (where the funnel shift

  // would be expanded back into math/shift/logic ops).

  if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))

    return false;


  // Match V to funnel shift left/right and capture the source operands and

  // shift amount.

  auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,

                             Value *&ShAmt) {

    unsigned Width = V->getType()->getScalarSizeInBits();


    // fshl(ShVal0, ShVal1, ShAmt)

    //  == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))

    if (match(V, m_OneUse(m_c_Or(

                     m_Shl(m_Value(ShVal0), m_Value(ShAmt)),

                     m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),

                                                   m_Deferred(ShAmt))))))) {

      return Intrinsic::fshl;

    }


    // fshr(ShVal0, ShVal1, ShAmt)

    //  == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))

    if (match(V,

              m_OneUse(m_c_Or(m_Shl(m_Value(ShVal0), m_Sub(m_SpecificInt(Width),

                                                           m_Value(ShAmt))),

                              m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {

      return Intrinsic::fshr;

    }


    return Intrinsic::not_intrinsic;

  };


  // One phi operand must be a funnel/rotate operation, and the other phi

  // operand must be the source value of that funnel/rotate operation:

  // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]

  // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]

  // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]

  PHINode &Phi = cast<PHINode>(I);

  unsigned FunnelOp = 0, GuardOp = 1;

  Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);

  Value *ShVal0, *ShVal1, *ShAmt;

  Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);

  if (IID == Intrinsic::not_intrinsic ||

      (IID == Intrinsic::fshl && ShVal0 != P1) ||

      (IID == Intrinsic::fshr && ShVal1 != P1)) {

    IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);

    if (IID == Intrinsic::not_intrinsic ||

        (IID == Intrinsic::fshl && ShVal0 != P0) ||

        (IID == Intrinsic::fshr && ShVal1 != P0))

      return false;

    assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&

           "Pattern must match funnel shift left or right");

    std::swap(FunnelOp, GuardOp);

  }


  // The incoming block with our source operand must be the "guard" block.

  // That must contain a cmp+branch to avoid the funnel/rotate when the shift

  // amount is equal to 0. The other incoming block is the block with the

  // funnel/rotate.

  BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);

  BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);

  Instruction *TermI = GuardBB->getTerminator();


  // Ensure that the shift values dominate each block.

  if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))

    return false;


  BasicBlock *PhiBB = Phi.getParent();

  if (!match(TermI, m_Br(m_SpecificICmp(CmpInst::ICMP_EQ, m_Specific(ShAmt),

                                        m_ZeroInt()),

                         m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))

    return false;


  IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());


  if (ShVal0 == ShVal1)

    ++NumGuardedRotates;

  else

    ++NumGuardedFunnelShifts;


  // If this is not a rotate then the select was blocking poison from the

  // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.

  bool IsFshl = IID == Intrinsic::fshl;

  if (ShVal0 != ShVal1) {

    if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))

      ShVal1 = Builder.CreateFreeze(ShVal1);

    else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))

      ShVal0 = Builder.CreateFreeze(ShVal0);

  }


  // We matched a variation of this IR pattern:

  // GuardBB:

  //   %cmp = icmp eq i32 %ShAmt, 0

  //   br i1 %cmp, label %PhiBB, label %FunnelBB

  // FunnelBB:

  //   %sub = sub i32 32, %ShAmt

  //   %shr = lshr i32 %ShVal1, %sub

  //   %shl = shl i32 %ShVal0, %ShAmt

  //   %fsh = or i32 %shr, %shl

  //   br label %PhiBB

  // PhiBB:

  //   %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]

  // -->

  // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)

  Phi.replaceAllUsesWith(

      Builder.CreateIntrinsic(IID, Phi.getType(), {ShVal0, ShVal1, ShAmt}));

  return true;

}


/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and

/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain

/// of 'and' ops, then we also need to capture the fact that we saw an

/// "and X, 1", so that's an extra return value for that case.

namespace {

struct MaskOps {

  Value *Root = nullptr;

  APInt Mask;

  bool MatchAndChain;

  bool FoundAnd1 = false;


  MaskOps(unsigned BitWidth, bool MatchAnds)

      : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}

};

} // namespace


/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a

/// chain of 'and' or 'or' instructions looking for shift ops of a common source

/// value. Examples:

///   or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)

/// returns { X, 0x129 }

///   and (and (X >> 1), 1), (X >> 4)

/// returns { X, 0x12 }


static bool matchAndOrChain(Value *V, MaskOps &MOps) {

  Value *Op0, *Op1;

  if (MOps.MatchAndChain) {

    // Recurse through a chain of 'and' operands. This requires an extra check

    // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere

    // in the chain to know that all of the high bits are cleared.

    if (match(V, m_And(m_Value(Op0), m_One()))) {

      MOps.FoundAnd1 = true;

      return matchAndOrChain(Op0, MOps);

    }

    if (match(V, m_And(m_Value(Op0), m_Value(Op1))))

      return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);

  } else {

    // Recurse through a chain of 'or' operands.

    if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))

      return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);

  }


  // We need a shift-right or a bare value representing a compare of bit 0 of

  // the original source operand.

  Value *Candidate;

  const APInt *BitIndex = nullptr;

  if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))

    Candidate = V;


  // Initialize result source operand.

  if (!MOps.Root)

    MOps.Root = Candidate;


  // The shift constant is out-of-range? This code hasn't been simplified.

  if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))

    return false;


  // Fill in the mask bit derived from the shift constant.

  MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);

  return MOps.Root == Candidate;

}


/// Match patterns that correspond to "any-bits-set" and "all-bits-set".

/// These will include a chain of 'or' or 'and'-shifted bits from a

/// common source value:

/// and (or  (lshr X, C), ...), 1 --> (X & CMask) != 0

/// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask

/// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns

/// that differ only with a final 'not' of the result. We expect that final

/// 'not' to be folded with the compare that we create here (invert predicate).


static bool foldAnyOrAllBitsSet(Instruction &I) {

  // The 'any-bits-set' ('or' chain) pattern is simpler to match because the

  // final "and X, 1" instruction must be the final op in the sequence.

  bool MatchAllBitsSet;

  bool MatchTrunc;

  Value *X;

  if (I.getType()->isIntOrIntVectorTy(1)) {

    if (match(&I, m_Trunc(m_OneUse(m_And(m_Value(), m_Value())))))

      MatchAllBitsSet = true;

    else if (match(&I, m_Trunc(m_OneUse(m_Or(m_Value(), m_Value())))))

      MatchAllBitsSet = false;

    else

      return false;

    MatchTrunc = true;

    X = I.getOperand(0);

  } else {

    if (match(&I, m_c_And(m_OneUse(m_And(m_Value(), m_Value())), m_Value()))) {

      X = &I;

      MatchAllBitsSet = true;

    } else if (match(&I,

                     m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One()))) {

      X = I.getOperand(0);

      MatchAllBitsSet = false;

    } else

      return false;

    MatchTrunc = false;

  }

  Type *Ty = X->getType();


  MaskOps MOps(Ty->getScalarSizeInBits(), MatchAllBitsSet);

  if (!matchAndOrChain(X, MOps) ||

      (MatchAllBitsSet && !MatchTrunc && !MOps.FoundAnd1))

    return false;


  // The pattern was found. Create a masked compare that replaces all of the

  // shift and logic ops.

  IRBuilder<> Builder(&I);

  Constant *Mask = ConstantInt::get(Ty, MOps.Mask);

  Value *And = Builder.CreateAnd(MOps.Root, Mask);

  Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)

                               : Builder.CreateIsNotNull(And);

  Value *Zext = MatchTrunc ? Cmp : Builder.CreateZExt(Cmp, Ty);

  I.replaceAllUsesWith(Zext);

  ++NumAnyOrAllBitsSet;

  return true;

}


/// Helper function to replace an instruction with a popcount intrinsic.

/// This creates the ctpop intrinsic with an optional truncation appended at the

/// end, and replaces all uses of the instruction.


static void replaceWithPopCount(Instruction &I, Value *Root) {

  LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");

  Type *RootTy = Root->getType();

  Type *OrigTy = I.getType();


  IRBuilder<> Builder(&I);

  Value *NewVal = Builder.CreateIntrinsic(Intrinsic::ctpop, RootTy, {Root});

  if (OrigTy != RootTy) {

    assert(RootTy->getScalarSizeInBits() > OrigTy->getScalarSizeInBits() &&

           "Only truncation is supported for now");

    NewVal = Builder.CreateTrunc(NewVal, OrigTy);

  }

  I.replaceAllUsesWith(NewVal);

  ++NumPopCountRecognized;

}


// Try to recognize below function as popcount intrinsic.

// This is the "best" algorithm from

// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel

// Also used in TargetLowering::expandCTPOP().

//

// int popcount(unsigned int i) {

//   i = i - ((i >> 1) & 0x55555555);

//   i = (i & 0x33333333) + ((i >> 2) & 0x33333333);

//   i = ((i + (i >> 4)) & 0x0F0F0F0F);

//   return (i * 0x01010101) >> 24;

// }


static bool tryToRecognizePopCount(Instruction &I) {

  if (I.getOpcode() != Instruction::LShr)

    return false;


  Type *Ty = I.getType();

  if (!Ty->isIntOrIntVectorTy())

    return false;


  unsigned Len = Ty->getScalarSizeInBits();

  // FIXME: fix Len == 8 and other irregular type lengths.

  if (!(Len <= 128 && Len > 8 && Len % 8 == 0))

    return false;


  APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));

  APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));

  APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));

  APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));

  APInt MaskShift = APInt(Len, Len - 8);


  Value *Op0 = I.getOperand(0);

  Value *Op1 = I.getOperand(1);

  Value *MulOp0;

  // Matching "(i * 0x01010101...) >> 24".

  if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&

      match(Op1, m_SpecificInt(MaskShift))) {

    Value *ShiftOp0;

    // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".

    if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),

                                    m_Deferred(ShiftOp0)),

                            m_SpecificInt(Mask0F)))) {

      Value *AndOp0;

      // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".

      if (match(ShiftOp0,

                m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),

                        m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)),

                              m_SpecificInt(Mask33))))) {

        Value *Root, *SubOp1;

        // Matching "i - ((i >> 1) & 0x55555555...)".

        const APInt *AndMask;

        if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&

            match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),

                                m_APInt(AndMask)))) {

          auto CheckAndMask = [&]() {

            if (*AndMask == Mask55)

              return true;


            // Exact match failed, see if any bits are known to be 0 where we

            // expect a 1 in the mask.

            if (!AndMask->isSubsetOf(Mask55))

              return false;


            APInt NeededMask = Mask55 & ~*AndMask;

            return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),

                                     NeededMask,

                                     SimplifyQuery(I.getDataLayout()));

          };


          if (CheckAndMask()) {

            replaceWithPopCount(I, Root);

            return true;

          }

        }

      }

    }

  }


  return false;

}


// Try to recognize below function as popcount intrinsic.

// Ref. Hacker Delights

// int popcount32(unsigned int i) {

// uWord = (uWord & 0x55555555) + ((uWord>>1) & 0x55555555);

// uWord = (uWord & 0x33333333) + ((uWord>>2) & 0x33333333);

// uWord = (uWord & 0x0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F);

// uWord = (uWord & 0x00FF00FF) + ((uWord>>8) & 0x00FF00FF);

// return  (uWord & 0x0000FFFF) + (uWord>>16);

// }

// int popcount64(unsigned long i) {

// uWord = (uWord & 0x5555555555555555) + ((uWord>>1) & 0x5555555555555555);

// uWord = (uWord & 0x3333333333333333) + ((uWord>>2) & 0x3333333333333333);

// uWord = (uWord & 0x0F0F0F0F0F0F0F0F) + ((uWord>>4) & 0x0F0F0F0F0F0F0F0F);

// uWord = (uWord & 0x00FF00FF00FF00FF) + ((uWord>>8) & 0x00FF00FF00FF00FF);

// uWord = (uWord & 0x0000FFFF0000FFFF) + ((uWord>>16) & 0x0000FFFF0000FFFF);

// return  (uWord & 0x00000000FFFFFFFF) + (uWord>>32) & 0x00000000FFFFFFFF;

// }

//

// InstCombine may narrow AND masks when it can prove the removed bits are

// known zero (e.g. 0x0F0F0F0F -> 0x07070707). We accept such narrowed masks

// by checking they are subsets of the expected masks and verifying the missing

// bits are known zero via MaskedValueIsZero.


static bool tryToRecognizePopCount1(Instruction &I) {

  if (I.getOpcode() != Instruction::Add)

    return false;


  Type *Ty = I.getType();

  if (!Ty->isIntOrIntVectorTy())

    return false;


  unsigned Len = Ty->getScalarSizeInBits();

  if (Len > 64 || Len <= 8 || Len % 8 != 0)

    return false;


  // Len should be a power of 2 for the loop to work correctly

  if (!isPowerOf2_32(Len))

    return false;


  APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));

  APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));


  SimplifyQuery SQ(I.getDataLayout());


  // Check if CapturedMask is a valid (possibly narrowed) version of

  // ExpectedMask for the given Operand. Returns true if the masks match

  // exactly, or if CapturedMask is a subset and the missing bits are

  // known zero in the Operand.

  auto isValidNarrowedMask = [&](const APInt &CapturedMask,

                                 const APInt &ExpectedMask,

                                 Value *Operand) -> bool {

    if (CapturedMask == ExpectedMask)

      return true;

    if (!CapturedMask.isSubsetOf(ExpectedMask))

      return false;

    APInt NeededMask = ExpectedMask & ~CapturedMask;

    return MaskedValueIsZero(Operand, NeededMask, SQ);

  };


  // For "(x & M) + ((x >> S) & M)" patterns, both AND masks may be narrowed.

  // Require subsets of BaseMask and prove any implied missing bits are zero.

  auto narrowAddPairMasksOk = [&](const APInt &BaseMask, unsigned ShiftAmt,

                                  Value *Val, const APInt &AndMask1,

                                  const APInt &AndMask2) -> bool {

    if (!AndMask1.isSubsetOf(BaseMask) || !AndMask2.isSubsetOf(BaseMask))

      return false;

    APInt NeededShifted = (BaseMask & ~AndMask1).shl(ShiftAmt);

    APInt NeededUnshifted = BaseMask & ~AndMask2;

    APInt AllNeeded = NeededShifted | NeededUnshifted;

    return AllNeeded.isZero() || MaskedValueIsZero(Val, AllNeeded, SQ);

  };


  Value *ShiftOp;

  Value *Start = &I;

  for (unsigned I = Len; I >= 8; I = I / 2) {

    APInt Mask = APInt::getSplat(Len, APInt::getLowBitsSet(I, I / 2));

    const APInt *AndMask1 = nullptr, *AndMask2 = nullptr;


    // Matching "(uWord & Mask) + ((uWord>>I/2) & Mask)".

    // Both masks might have been narrowed by InstCombine.

    if (match(Start,

              m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),

                            m_APInt(AndMask1)),

                      m_And(m_Deferred(ShiftOp), m_APInt(AndMask2))))) {

      if (!narrowAddPairMasksOk(Mask, I / 2, ShiftOp, *AndMask1, *AndMask2))

        return false;

    }

    // Matching "(uWord & Mask) + (uWord>>I/2)".

    // The mask might have been narrowed by InstCombine.

    else if (match(Start,

                   m_c_Add(m_LShr(m_Value(ShiftOp), m_SpecificInt(I / 2)),

                           m_And(m_Deferred(ShiftOp), m_APInt(AndMask1))))) {

      if (!isValidNarrowedMask(*AndMask1, Mask, ShiftOp))

        return false;

    } else

      return false;

    Start = ShiftOp;

  }


  // Matching "uWord = (uWord & Mask33) + ((uWord>>2) & Mask33)".

  const APInt *AndMask1 = nullptr, *AndMask2 = nullptr;

  if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(ShiftOp), m_SpecificInt(2)),

                                  m_APInt(AndMask1)),

                            m_And(m_Deferred(ShiftOp), m_APInt(AndMask2)))))

    return false;

  if (!narrowAddPairMasksOk(Mask33, 2, ShiftOp, *AndMask1, *AndMask2))

    return false;


  Start = ShiftOp;

  Value *Root;

  // Matching "uWord = (uWord & Mask55) + ((uWord>>1) & Mask55)".

  AndMask1 = nullptr;

  AndMask2 = nullptr;

  if (!match(Start, m_c_Add(m_And(m_LShr(m_Value(Root), m_SpecificInt(1)),

                                  m_APInt(AndMask1)),

                            m_And(m_Deferred(Root), m_APInt(AndMask2)))))

    return false;

  if (!narrowAddPairMasksOk(Mask55, 1, Root, *AndMask1, *AndMask2))

    return false;


  replaceWithPopCount(I, Root);

  return true;

}


// Try to recognize below function as popcount intrinsic.

// Ref. Hackers Delight

// int popcnt(unsigned x) {

// x = x - ((x >> 1) & 0x55555555);

// x = (x & 0x33333333) + ((x >> 2) & 0x33333333);

// x = (x + (x >> 4)) & 0x0F0F0F0F;

// x = x + (x >> 8);

// x = x + (x >> 16);

// return x & 0x0000003F;

// }


// int popcnt(unsigned x) {

// x = x - ((x >> 1) & 0x55555555);

// x = x - 3*((x >> 2) & 0x33333333);

// x = (x + (x >> 4)) & 0x0F0F0F0F;

// x = x + (x >> 8);

// x = x + (x >> 16);

// return x & 0x0000003F;

// }


static bool tryToRecognizePopCount2n3(Instruction &I) {

  if (I.getOpcode() != Instruction::And)

    return false;


  Type *Ty = I.getType();

  if (!Ty->isIntOrIntVectorTy())

    return false;


  unsigned Len = Ty->getScalarSizeInBits();

  Value *Add1;

  const APInt *MaskRes;

  if (!match(&I, m_And(m_Value(Add1), m_APInt(MaskRes))))

    return false;


  // Since `(trunc (and x, C))` might be canonicalized into `(and (trunc x), C)`

  // we might loose the opportunity to recognize `(trunc (popcount y))`. The

  // following block tries to capture such truncation, update `Len`, and append

  // the truncation at the end of the emitting popcount, if there is any.

  Value *TruncSrc;

  if (match(Add1, m_OneUse(m_Trunc(m_Value(TruncSrc))))) {

    Add1 = TruncSrc;

    Len = Add1->getType()->getScalarSizeInBits();

  }


  if (Len > 64 || Len <= 8 || Len % 8 != 0)

    return false;


  // Len should be a power of 2 for the loop to work correctly

  if (!isPowerOf2_32(Len))

    return false;


  APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));

  APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));

  APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));


  // Number of bits needed to represent Len.

  unsigned NumLenBits = Log2_32(Len) + 1;

  // The "mask" here really only needs to fulfill two conditions:

  // (1) All ones for the lower NumLenBits-bits

  // (2) Zeros from bit 8 and onward.

  // Condition (1) is straightforward. The reason behind condition

  // (2) is that we don't care any 8-bit chunks but the first one

  // in the original divide-and-conquer algorithm.

  if (MaskRes->countTrailingOnes() < NumLenBits || MaskRes->getActiveBits() > 8)

    return false;


  Value *Add2;

  for (unsigned I = Len; I >= 16; I = I / 2) {

    // Matching "x = x + (x >> I/2)" for I-bit.

    if (!match(Add1, m_c_Add(m_LShr(m_Value(Add2), m_SpecificInt(I / 2)),

                             m_Deferred(Add2))))

      return false;

    Add1 = Add2;

  }


  Value *And1 = Add1;

  // Matching "x = (x + (x >> 4)) & 0x0F0F0F0F".

  if (!match(And1, m_And(m_c_Add(m_LShr(m_Value(Add2), m_SpecificInt(4)),

                                 m_Deferred(Add2)),

                         m_SpecificInt(Mask0F))))

    return false;


  Value *Sub1;

  llvm::APInt NegThree(/*BitWidth=*/Len, /*Value=*/-3,

                       /*isSigned=*/true);

  // x = (x & 0x33333333) + ((x >> 2) & 0x33333333)".

  if (!match(Add2, m_c_Add(m_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),

                                 m_SpecificInt(Mask33)),

                           m_And(m_Deferred(Sub1), m_SpecificInt(Mask33)))) &&

      // Matching "x = x - 3*((x >> 2) & 0x33333333)".

      !match(Add2, m_Add(m_Mul(m_And(m_LShr(m_Value(Sub1), m_SpecificInt(2)),

                                     m_SpecificInt(Mask33)),

                               m_SpecificInt(NegThree)),

                         m_Deferred(Sub1))))

    return false;


  Value *Root;

  // x = x - ((x >> 1) & 0x55555555);

  if (!match(Sub1, m_Sub(m_Value(Root),

                         m_And(m_LShr(m_Deferred(Root), m_SpecificInt(1)),

                               m_SpecificInt(Mask55)))))

    return false;


  replaceWithPopCount(I, Root);

  return true;

}


/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and

/// C2 saturate the value of the fp conversion. The transform is not reversable

/// as the fptosi.sat is more defined than the input - all values produce a

/// valid value for the fptosi.sat, where as some produce poison for original

/// that were out of range of the integer conversion. The reversed pattern may

/// use fmax and fmin instead. As we cannot directly reverse the transform, and

/// it is not always profitable, we make it conditional on the cost being

/// reported as lower by TTI.


static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {

  // Look for min(max(fptosi, converting to fptosi_sat.

  Value *In;

  const APInt *MinC, *MaxC;

  if (!match(&I, m_SMax(m_OneUse(m_SMin(m_OneUse(m_FPToSI(m_Value(In))),

                                        m_APInt(MinC))),

                        m_APInt(MaxC))) &&

      !match(&I, m_SMin(m_OneUse(m_SMax(m_OneUse(m_FPToSI(m_Value(In))),

                                        m_APInt(MaxC))),

                        m_APInt(MinC))))

    return false;


  // Check that the constants clamp a saturate.

  if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)

    return false;


  Type *IntTy = I.getType();

  Type *FpTy = In->getType();

  Type *SatTy =

      IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);

  if (auto *VecTy = dyn_cast<VectorType>(IntTy))

    SatTy = VectorType::get(SatTy, VecTy->getElementCount());


  // Get the cost of the intrinsic, and check that against the cost of

  // fptosi+smin+smax

  InstructionCost SatCost = TTI.getIntrinsicInstrCost(

      IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),

      TTI::TCK_RecipThroughput);

  SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,

                                  TTI::CastContextHint::None,

                                  TTI::TCK_RecipThroughput);


  InstructionCost MinMaxCost = TTI.getCastInstrCost(

      Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,

      TTI::TCK_RecipThroughput);

  MinMaxCost += TTI.getIntrinsicInstrCost(

      IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),

      TTI::TCK_RecipThroughput);

  MinMaxCost += TTI.getIntrinsicInstrCost(

      IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),

      TTI::TCK_RecipThroughput);


  if (SatCost >= MinMaxCost)

    return false;


  IRBuilder<> Builder(&I);

  Value *Sat =

      Builder.CreateIntrinsic(Intrinsic::fptosi_sat, {SatTy, FpTy}, In);

  I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));

  return true;

}


/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids

/// pessimistic codegen that has to account for setting errno and can enable

/// vectorization.


static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,

                     TargetLibraryInfo &TLI, AssumptionCache &AC,

                     DominatorTree &DT) {

  // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created

  // (because NNAN or the operand arg must not be less than -0.0) and (2) we

  // would not end up lowering to a libcall anyway (which could change the value

  // of errno), then:

  // (1) errno won't be set.

  // (2) it is safe to convert this to an intrinsic call.

  Type *Ty = Call->getType();

  Value *Arg = Call->getArgOperand(0);

  if (TTI.haveFastSqrt(Ty) &&

      (Call->hasNoNaNs() ||

       cannotBeOrderedLessThanZero(

           Arg, SimplifyQuery(Call->getDataLayout(), &TLI, &DT, &AC, Call)))) {

    IRBuilder<> Builder(Call);

    Value *NewSqrt =

        Builder.CreateIntrinsic(Intrinsic::sqrt, Ty, Arg, Call, "sqrt");

    Call->replaceAllUsesWith(NewSqrt);


    // Explicitly erase the old call because a call with side effects is not

    // trivially dead.

    Call->eraseFromParent();

    return true;

  }


  return false;

}


// Check if this array of constants represents a cttz table.

// Iterate over the elements from \p Table by trying to find/match all

// the numbers from 0 to \p InputBits that should represent cttz results.


static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift,

                        const APInt &AndMask, Type *AccessTy,

                        unsigned InputBits, const APInt &GEPIdxFactor,

                        const DataLayout &DL) {

  for (unsigned Idx = 0; Idx < InputBits; Idx++) {

    APInt Index =

        (APInt::getOneBitSet(InputBits, Idx) * Mul).lshr(Shift) & AndMask;

    ConstantInt *C = dyn_cast_or_null<ConstantInt>(

        ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));

    if (!C || C->getValue() != Idx)

      return false;

  }


  return true;

}


// Try to recognize table-based ctz implementation.

// E.g., an example in C (for more cases please see the llvm/tests):

// int f(unsigned x) {

//    static const char table[32] =

//      {0, 1, 28, 2, 29, 14, 24, 3, 30,

//       22, 20, 15, 25, 17, 4, 8, 31, 27,

//       13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};

//    return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];

// }

// this can be lowered to `cttz` instruction.

// There is also a special case when the element is 0.

//

// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn

// sequence that contains each pattern of bits in it. The shift extracts

// the top bits after the multiply, and that index into the table should

// represent the number of trailing zeros in the original number.

//

// Here are some examples or LLVM IR for a 64-bit target:

//

// CASE 1:

// %sub = sub i32 0, %x

// %and = and i32 %sub, %x

// %mul = mul i32 %and, 125613361

// %shr = lshr i32 %mul, 27

// %idxprom = zext i32 %shr to i64

// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,

//     i64 %idxprom

// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8

//

// CASE 2:

// %sub = sub i32 0, %x

// %and = and i32 %sub, %x

// %mul = mul i32 %and, 72416175

// %shr = lshr i32 %mul, 26

// %idxprom = zext i32 %shr to i64

// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,

//     i64 0, i64 %idxprom

// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8

//

// CASE 3:

// %sub = sub i32 0, %x

// %and = and i32 %sub, %x

// %mul = mul i32 %and, 81224991

// %shr = lshr i32 %mul, 27

// %idxprom = zext i32 %shr to i64

// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,

//     i64 0, i64 %idxprom

// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8

//

// CASE 4:

// %sub = sub i64 0, %x

// %and = and i64 %sub, %x

// %mul = mul i64 %and, 283881067100198605

// %shr = lshr i64 %mul, 58

// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,

//     i64 %shr

// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8

//

// All these can be lowered to @llvm.cttz.i32/64 intrinsics.


static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) {

  LoadInst *LI = dyn_cast<LoadInst>(&I);

  if (!LI)

    return false;


  Type *AccessType = LI->getType();

  if (!AccessType->isIntegerTy())

    return false;


  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());

  if (!GEP || !GEP->hasNoUnsignedSignedWrap())

    return false;


  GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());

  if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())

    return false;


  unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());

  APInt ModOffset(BW, 0);

  SmallMapVector<Value *, APInt, 4> VarOffsets;

  if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||

      VarOffsets.size() != 1 || ModOffset != 0)

    return false;

  auto [GepIdx, GEPScale] = VarOffsets.front();


  Value *X1;

  const APInt *MulConst, *ShiftConst, *AndCst = nullptr;

  // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.

  // This might be extended to the pointer index type, and if the gep index type

  // has been replaced with an i8 then a new And (and different ShiftConst) will

  // be present.

  auto MatchInner = m_LShr(

      m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), m_APInt(MulConst)),

      m_APInt(ShiftConst));

  if (!match(GepIdx, m_CastOrSelf(MatchInner)) &&

      !match(GepIdx, m_CastOrSelf(m_And(MatchInner, m_APInt(AndCst)))))

    return false;


  unsigned InputBits = X1->getType()->getScalarSizeInBits();

  if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)

    return false;


  if (!GEPScale.isIntN(InputBits) ||

      !isCTTZTable(GVTable->getInitializer(), *MulConst, *ShiftConst,

                   AndCst ? *AndCst : APInt::getAllOnes(InputBits), AccessType,

                   InputBits, GEPScale.zextOrTrunc(InputBits), DL))

    return false;


  ConstantInt *ZeroTableElem = cast<ConstantInt>(

      ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));

  bool DefinedForZero = ZeroTableElem->getZExtValue() == InputBits;


  IRBuilder<> B(LI);

  ConstantInt *BoolConst = B.getInt1(!DefinedForZero);

  Type *XType = X1->getType();

  auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});

  Value *ZExtOrTrunc = nullptr;


  if (DefinedForZero) {

    ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);

  } else {

    // If the value in elem 0 isn't the same as InputBits, we still want to

    // produce the value from the table.

    auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));

    auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz);


    // The true branch of select handles the cttz(0) case, which is rare.

    if (!ProfcheckDisableMetadataFixes) {

      if (Instruction *SelectI = dyn_cast<Instruction>(Select))

        SelectI->setMetadata(

            LLVMContext::MD_prof,

            MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights());

    }


    // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target

    // it should be handled as: `cttz(x) & (typeSize - 1)`.


    ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);

  }


  LI->replaceAllUsesWith(ZExtOrTrunc);


  return true;

}


// Check if this array of constants represents a log2 table.

// Iterate over the elements from \p Table by trying to find/match all

// the numbers from 0 to \p InputBits that should represent log2 results.


static bool isLog2Table(Constant *Table, const APInt &Mul, const APInt &Shift,

                        Type *AccessTy, unsigned InputBits,

                        const APInt &GEPIdxFactor, const DataLayout &DL) {

  for (unsigned Idx = 0; Idx < InputBits; Idx++) {

    APInt Index = (APInt::getLowBitsSet(InputBits, Idx + 1) * Mul).lshr(Shift);

    ConstantInt *C = dyn_cast_or_null<ConstantInt>(

        ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));

    if (!C || C->getValue() != Idx)

      return false;

  }


  // Verify that an input of zero will select table index 0.

  APInt ZeroIndex = Mul.lshr(Shift);

  if (!ZeroIndex.isZero())

    return false;


  return true;

}


// Try to recognize table-based log2 implementation.

// E.g., an example in C (for more cases please the llvm/tests):

// int f(unsigned v) {

//    static const char table[32] =

//    {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,

//     8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};

//

//    v |= v >> 1; // first round down to one less than a power of 2

//    v |= v >> 2;

//    v |= v >> 4;

//    v |= v >> 8;

//    v |= v >> 16;

//

//    return table[(unsigned)(v * 0x07C4ACDDU) >> 27];

// }

// this can be lowered to `ctlz` instruction.

// There is also a special case when the element is 0.

//

// The >> and |= sequence sets all bits below the most significant set bit. The

// multiply is a de-bruijn sequence that contains each pattern of bits in it.

// The shift extracts the top bits after the multiply, and that index into the

// table should represent the floor log base 2 of the original number.

//

// Here are some examples of LLVM IR for a 64-bit target.

//

// CASE 1:

// %shr = lshr i32 %v, 1

// %or = or i32 %shr, %v

// %shr1 = lshr i32 %or, 2

// %or2 = or i32 %shr1, %or

// %shr3 = lshr i32 %or2, 4

// %or4 = or i32 %shr3, %or2

// %shr5 = lshr i32 %or4, 8

// %or6 = or i32 %shr5, %or4

// %shr7 = lshr i32 %or6, 16

// %or8 = or i32 %shr7, %or6

// %mul = mul i32 %or8, 130329821

// %shr9 = lshr i32 %mul, 27

// %idxprom = zext nneg i32 %shr9 to i64

// %arrayidx = getelementptr inbounds i8, ptr @table, i64 %idxprom

// %0 = load i8, ptr %arrayidx, align 1

//

// CASE 2:

// %shr = lshr i64 %v, 1

// %or = or i64 %shr, %v

// %shr1 = lshr i64 %or, 2

// %or2 = or i64 %shr1, %or

// %shr3 = lshr i64 %or2, 4

// %or4 = or i64 %shr3, %or2

// %shr5 = lshr i64 %or4, 8

// %or6 = or i64 %shr5, %or4

// %shr7 = lshr i64 %or6, 16

// %or8 = or i64 %shr7, %or6

// %shr9 = lshr i64 %or8, 32

// %or10 = or i64 %shr9, %or8

// %mul = mul i64 %or10, 285870213051386505

// %shr11 = lshr i64 %mul, 58

// %arrayidx = getelementptr inbounds i8, ptr @table, i64 %shr11

// %0 = load i8, ptr %arrayidx, align 1

//

// All these can be lowered to @llvm.ctlz.i32/64 intrinsics and a subtract.


static bool tryToRecognizeTableBasedLog2(Instruction &I, const DataLayout &DL,

                                         TargetTransformInfo &TTI) {

  LoadInst *LI = dyn_cast<LoadInst>(&I);

  if (!LI)

    return false;


  Type *AccessType = LI->getType();

  if (!AccessType->isIntegerTy())

    return false;


  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());

  if (!GEP || !GEP->hasNoUnsignedSignedWrap())

    return false;


  GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());

  if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())

    return false;


  unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());

  APInt ModOffset(BW, 0);

  SmallMapVector<Value *, APInt, 4> VarOffsets;

  if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||

      VarOffsets.size() != 1 || ModOffset != 0)

    return false;

  auto [GepIdx, GEPScale] = VarOffsets.front();


  Value *X;

  const APInt *MulConst, *ShiftConst;

  // Check that the gep variable index is (x * MulConst) >> ShiftConst.

  auto MatchInner =

      m_LShr(m_Mul(m_Value(X), m_APInt(MulConst)), m_APInt(ShiftConst));

  if (!match(GepIdx, m_CastOrSelf(MatchInner)))

    return false;


  unsigned InputBits = X->getType()->getScalarSizeInBits();

  if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)

    return false;


  // Verify shift amount.

  // TODO: Allow other shift amounts when we have proper test coverage.

  if (*ShiftConst != InputBits - Log2_32(InputBits))

    return false;


  // Match the sequence of OR operations with right shifts by powers of 2.

  for (unsigned ShiftAmt = InputBits / 2; ShiftAmt != 0; ShiftAmt /= 2) {

    Value *Y;

    if (!match(X, m_c_Or(m_LShr(m_Value(Y), m_SpecificInt(ShiftAmt)),

                         m_Deferred(Y))))

      return false;

    X = Y;

  }


  if (!GEPScale.isIntN(InputBits) ||

      !isLog2Table(GVTable->getInitializer(), *MulConst, *ShiftConst,

                   AccessType, InputBits, GEPScale.zextOrTrunc(InputBits), DL))

    return false;


  ConstantInt *ZeroTableElem = cast<ConstantInt>(

      ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));


  // Use InputBits - 1 - ctlz(X) to compute log2(X).

  IRBuilder<> B(LI);

  ConstantInt *BoolConst = B.getTrue();

  Type *XType = X->getType();


  // Check the the backend has an efficient ctlz instruction.

  // FIXME: Teach the backend to emit the original code when ctlz isn't

  // supported like we do for cttz.

  IntrinsicCostAttributes Attrs(

      Intrinsic::ctlz, XType,

      {PoisonValue::get(XType), /*is_zero_poison=*/BoolConst});

  InstructionCost Cost =

      TTI.getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);

  if (Cost > TargetTransformInfo::TCC_Basic)

    return false;


  Value *Ctlz = B.CreateIntrinsic(Intrinsic::ctlz, {XType}, {X, BoolConst});


  Constant *InputBitsM1 = ConstantInt::get(XType, InputBits - 1);

  Value *Sub = B.CreateSub(InputBitsM1, Ctlz);


  // The table won't produce a sensible result for 0.

  Value *Cmp = B.CreateICmpEQ(X, ConstantInt::get(XType, 0));

  Value *Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Sub);


  // The true branch of select handles the log2(0) case, which is rare.

  if (!ProfcheckDisableMetadataFixes) {

    if (Instruction *SelectI = dyn_cast<Instruction>(Select))

      SelectI->setMetadata(

          LLVMContext::MD_prof,

          MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights());

  }


  Value *ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);


  LI->replaceAllUsesWith(ZExtOrTrunc);


  return true;

}


/// This is used by foldLoadsRecursive() to capture a Root Load node which is

/// of type or(load, load) and recursively build the wide load. Also capture the

/// shift amount, zero extend type and loadSize.


struct LoadOps {

  LoadInst *Root = nullptr;

  LoadInst *RootInsert = nullptr;

  bool FoundRoot = false;

  uint64_t LoadSize = 0;

  uint64_t Shift = 0;

  Type *ZextType;

  AAMDNodes AATags;

};


// Identify and Merge consecutive loads recursively which is of the form

// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1

// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)


static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,

                               AliasAnalysis &AA, bool IsRoot = false) {

  uint64_t ShAmt2;

  Value *X;

  Instruction *L1, *L2;


  // For the root instruction, allow multiple uses since the final result

  // may legitimately be used in multiple places. For intermediate values,

  // require single use to avoid creating duplicate loads.

  if (!IsRoot && !V->hasOneUse())

    return false;


  if (!match(V, m_c_Or(m_Value(X),

                       m_OneUse(m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L2))),

                                            ShAmt2)))))

    return false;


  if (!foldLoadsRecursive(X, LOps, DL, AA, /*IsRoot=*/false) && LOps.FoundRoot)

    // Avoid Partial chain merge.

    return false;


  // Check if the pattern has loads

  LoadInst *LI1 = LOps.Root;

  uint64_t ShAmt1 = LOps.Shift;

  if (LOps.FoundRoot == false &&

      match(X, m_OneUse(

                   m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {

    LI1 = dyn_cast<LoadInst>(L1);

  }

  LoadInst *LI2 = dyn_cast<LoadInst>(L2);


  // Check if loads are same, atomic, volatile and having same address space.

  if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||

      LI1->getPointerAddressSpace() != LI2->getPointerAddressSpace())

    return false;


  // Check if Loads come from same BB.

  if (LI1->getParent() != LI2->getParent())

    return false;


  // Find the data layout

  bool IsBigEndian = DL.isBigEndian();


  // Check if loads are consecutive and same size.

  Value *Load1Ptr = LI1->getPointerOperand();

  APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);

  Load1Ptr =

      Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,

                                                  /* AllowNonInbounds */ true);


  Value *Load2Ptr = LI2->getPointerOperand();

  APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);

  Load2Ptr =

      Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,

                                                  /* AllowNonInbounds */ true);


  // Verify if both loads have same base pointers

  uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();

  uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();

  if (Load1Ptr != Load2Ptr)

    return false;


  // Make sure that there are no padding bits.

  if (!DL.typeSizeEqualsStoreSize(LI1->getType()) ||

      !DL.typeSizeEqualsStoreSize(LI2->getType()))

    return false;


  // Alias Analysis to check for stores b/w the loads.

  LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2;

  MemoryLocation Loc;

  if (!Start->comesBefore(End)) {

    std::swap(Start, End);

    // If LOps.RootInsert comes after LI2, since we use LI2 as the new insert

    // point, we should make sure whether the memory region accessed by LOps

    // isn't modified.

    if (LOps.FoundRoot)

      Loc = MemoryLocation(

          LOps.Root->getPointerOperand(),

          LocationSize::precise(DL.getTypeStoreSize(

              IntegerType::get(LI1->getContext(), LOps.LoadSize))),

          LOps.AATags);

    else

      Loc = MemoryLocation::get(End);

  } else

    Loc = MemoryLocation::get(End);

  unsigned NumScanned = 0;

  for (Instruction &Inst :

       make_range(Start->getIterator(), End->getIterator())) {

    if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))

      return false;


    if (++NumScanned > MaxInstrsToScan)

      return false;

  }


  // Make sure Load with lower Offset is at LI1

  bool Reverse = false;

  if (Offset2.slt(Offset1)) {

    std::swap(LI1, LI2);

    std::swap(ShAmt1, ShAmt2);

    std::swap(Offset1, Offset2);

    std::swap(Load1Ptr, Load2Ptr);

    std::swap(LoadSize1, LoadSize2);

    Reverse = true;

  }


  // Big endian swap the shifts

  if (IsBigEndian)

    std::swap(ShAmt1, ShAmt2);


  // First load is always LI1. This is where we put the new load.

  // Use the merged load size available from LI1 for forward loads.

  if (LOps.FoundRoot) {

    if (!Reverse)

      LoadSize1 = LOps.LoadSize;

    else

      LoadSize2 = LOps.LoadSize;

  }


  // Verify if shift amount and load index aligns and verifies that loads

  // are consecutive.

  uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;

  uint64_t PrevSize =

      DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));

  if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)

    return false;


  // Update LOps

  AAMDNodes AATags1 = LOps.AATags;

  AAMDNodes AATags2 = LI2->getAAMetadata();

  if (LOps.FoundRoot == false) {

    LOps.FoundRoot = true;

    AATags1 = LI1->getAAMetadata();

  }

  LOps.LoadSize = LoadSize1 + LoadSize2;

  LOps.RootInsert = Start;


  // Concatenate the AATags of the Merged Loads.

  LOps.AATags = AATags1.concat(AATags2);


  LOps.Root = LI1;

  LOps.Shift = ShAmt1;

  LOps.ZextType = X->getType();

  return true;

}


// For a given BB instruction, evaluate all loads in the chain that form a

// pattern which suggests that the loads can be combined. The one and only use

// of the loads is to form a wider load.


static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,

                                 TargetTransformInfo &TTI, AliasAnalysis &AA,

                                 const DominatorTree &DT) {

  // Only consider load chains of scalar values.

  if (isa<VectorType>(I.getType()))

    return false;


  LoadOps LOps;

  if (!foldLoadsRecursive(&I, LOps, DL, AA, /*IsRoot=*/true) || !LOps.FoundRoot)

    return false;


  IRBuilder<> Builder(&I);

  LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;


  IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);

  // TTI based checks if we want to proceed with wider load

  bool Allowed = TTI.isTypeLegal(WiderType);

  if (!Allowed)

    return false;


  unsigned AS = LI1->getPointerAddressSpace();

  unsigned Fast = 0;

  Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,

                                               AS, LI1->getAlign(), &Fast);

  if (!Allowed || !Fast)

    return false;


  // Get the Index and Ptr for the new GEP.

  Value *Load1Ptr = LI1->getPointerOperand();

  Builder.SetInsertPoint(LOps.RootInsert);

  if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {

    APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);

    Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(

        DL, Offset1, /* AllowNonInbounds */ true);

    Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1));

  }

  // Generate wider load.

  NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),

                                      LI1->isVolatile(), "");

  NewLoad->takeName(LI1);

  // Set the New Load AATags Metadata.

  if (LOps.AATags)

    NewLoad->setAAMetadata(LOps.AATags);


  Value *NewOp = NewLoad;

  // Check if zero extend needed.

  if (LOps.ZextType)

    NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);


  // Check if shift needed. We need to shift with the amount of load1

  // shift if not zero.

  if (LOps.Shift)

    NewOp = Builder.CreateShl(NewOp, LOps.Shift);

  I.replaceAllUsesWith(NewOp);


  return true;

}


/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.


struct PartStore {

  Value *PtrBase;

  APInt PtrOffset;

  Value *Val;

  uint64_t ValOffset;

  uint64_t ValWidth;

  StoreInst *Store;


  bool isCompatibleWith(const PartStore &Other) const {

    return PtrBase == Other.PtrBase && Val == Other.Val;

  }


  bool operator<(const PartStore &Other) const {

    return PtrOffset.slt(Other.PtrOffset);

  }


};


static std::optional<PartStore> matchPartStore(Instruction &I,

                                               const DataLayout &DL) {

  auto *Store = dyn_cast<StoreInst>(&I);

  if (!Store || !Store->isSimple())

    return std::nullopt;


  Value *StoredVal = Store->getValueOperand();

  Type *StoredTy = StoredVal->getType();

  if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))

    return std::nullopt;


  uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();

  uint64_t ValOffset;

  Value *Val;

  if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))

    return std::nullopt;


  Value *Ptr = Store->getPointerOperand();

  APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);

  Value *PtrBase = Ptr->stripAndAccumulateConstantOffsets(

      DL, PtrOffset, /*AllowNonInbounds=*/true);

  return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};

}


static bool mergeConsecutivePartStores(ArrayRef<PartStore> Parts,

                                       unsigned Width, const DataLayout &DL,

                                       TargetTransformInfo &TTI) {

  if (Parts.size() < 2)

    return false;


  // Check whether combining the stores is profitable.

  // FIXME: We could generate smaller stores if we can't produce a large one.

  const PartStore &First = Parts.front();

  LLVMContext &Ctx = First.Store->getContext();

  Type *NewTy = Type::getIntNTy(Ctx, Width);

  unsigned Fast = 0;

  if (!TTI.isTypeLegal(NewTy) ||

      !TTI.allowsMisalignedMemoryAccesses(Ctx, Width,

                                          First.Store->getPointerAddressSpace(),

                                          First.Store->getAlign(), &Fast) ||

      !Fast)

    return false;


  // Generate the combined store.

  IRBuilder<> Builder(First.Store);

  Value *Val = First.Val;

  if (First.ValOffset != 0)

    Val = Builder.CreateLShr(Val, First.ValOffset);

  Val = Builder.CreateZExtOrTrunc(Val, NewTy);

  StoreInst *Store = Builder.CreateAlignedStore(

      Val, First.Store->getPointerOperand(), First.Store->getAlign());


  // Merge various metadata onto the new store.

  AAMDNodes AATags = First.Store->getAAMetadata();

  SmallVector<Instruction *> Stores = {First.Store};

  Stores.reserve(Parts.size());

  SmallVector<DebugLoc> DbgLocs = {First.Store->getDebugLoc()};

  DbgLocs.reserve(Parts.size());

  for (const PartStore &Part : drop_begin(Parts)) {

    AATags = AATags.concat(Part.Store->getAAMetadata());

    Stores.push_back(Part.Store);

    DbgLocs.push_back(Part.Store->getDebugLoc());

  }

  Store->setAAMetadata(AATags);

  Store->mergeDIAssignID(Stores);

  Store->setDebugLoc(DebugLoc::getMergedLocations(DbgLocs));


  // Remove the old stores.

  for (const PartStore &Part : Parts)

    Part.Store->eraseFromParent();


  return true;

}


static bool mergePartStores(SmallVectorImpl<PartStore> &Parts,

                            const DataLayout &DL, TargetTransformInfo &TTI) {

  if (Parts.size() < 2)

    return false;


  // We now have multiple parts of the same value stored to the same pointer.

  // Sort the parts by pointer offset, and make sure they are consistent with

  // the value offsets. Also check that the value is fully covered without

  // overlaps.

  bool Changed = false;

  llvm::sort(Parts);

  int64_t LastEndOffsetFromFirst = 0;

  const PartStore *First = &Parts[0];

  for (const PartStore &Part : Parts) {

    APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;

    int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;

    if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||

        LastEndOffsetFromFirst != ValOffsetFromFirst) {

      Changed |= mergeConsecutivePartStores(ArrayRef(First, &Part),

                                            LastEndOffsetFromFirst, DL, TTI);

      First = &Part;

      LastEndOffsetFromFirst = Part.ValWidth;

      continue;

    }


    LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;

  }


  Changed |= mergeConsecutivePartStores(ArrayRef(First, Parts.end()),

                                        LastEndOffsetFromFirst, DL, TTI);

  return Changed;

}


static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL,

                                  TargetTransformInfo &TTI, AliasAnalysis &AA) {

  // FIXME: Add big endian support.

  if (DL.isBigEndian())

    return false;


  BatchAAResults BatchAA(AA);

  SmallVector<PartStore, 8> Parts;

  bool MadeChange = false;

  for (Instruction &I : make_early_inc_range(BB)) {

    if (std::optional<PartStore> Part = matchPartStore(I, DL)) {

      if (Parts.empty() || Part->isCompatibleWith(Parts[0])) {

        Parts.push_back(std::move(*Part));

        continue;

      }


      MadeChange |= mergePartStores(Parts, DL, TTI);

      Parts.clear();

      Parts.push_back(std::move(*Part));

      continue;

    }


    if (Parts.empty())

      continue;


    if (I.mayThrow() ||

        (I.mayReadOrWriteMemory() &&

         isModOrRefSet(BatchAA.getModRefInfo(

             &I, MemoryLocation::getBeforeOrAfter(Parts[0].PtrBase))))) {

      MadeChange |= mergePartStores(Parts, DL, TTI);

      Parts.clear();

      continue;

    }

  }


  MadeChange |= mergePartStores(Parts, DL, TTI);

  return MadeChange;

}


/// Combine away instructions providing they are still equivalent when compared

/// against 0. i.e do they have any bits set.


static Value *optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder) {

  auto *I = dyn_cast<Instruction>(V);

  if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())

    return nullptr;


  Value *A;


  // Look deeper into the chain of or's, combining away shl (so long as they are

  // nuw or nsw).

  Value *Op0 = I->getOperand(0);

  if (match(Op0, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),

                             m_NUWShl(m_Value(A), m_Value()))))

    Op0 = A;

  else if (auto *NOp = optimizeShiftInOrChain(Op0, Builder))

    Op0 = NOp;


  Value *Op1 = I->getOperand(1);

  if (match(Op1, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),

                             m_NUWShl(m_Value(A), m_Value()))))

    Op1 = A;

  else if (auto *NOp = optimizeShiftInOrChain(Op1, Builder))

    Op1 = NOp;


  if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))

    return Builder.CreateOr(Op0, Op1);

  return nullptr;

}


static bool foldICmpOrChain(Instruction &I, const DataLayout &DL,

                            TargetTransformInfo &TTI, AliasAnalysis &AA,

                            const DominatorTree &DT) {

  CmpPredicate Pred;

  Value *Op0;

  if (!match(&I, m_ICmp(Pred, m_Value(Op0), m_Zero())) ||

      !ICmpInst::isEquality(Pred))

    return false;


  // If the chain or or's matches a load, combine to that before attempting to

  // remove shifts.

  if (auto OpI = dyn_cast<Instruction>(Op0))

    if (OpI->getOpcode() == Instruction::Or)

      if (foldConsecutiveLoads(*OpI, DL, TTI, AA, DT))

        return true;


  IRBuilder<> Builder(&I);

  // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0

  if (auto *Res = optimizeShiftInOrChain(Op0, Builder)) {

    I.replaceAllUsesWith(Builder.CreateICmp(Pred, Res, I.getOperand(1)));

    return true;

  }


  return false;

}


// Calculate GEP Stride and accumulated const ModOffset. Return Stride and

// ModOffset

static std::pair<APInt, APInt>


getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) {

  unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());

  std::optional<APInt> Stride;

  APInt ModOffset(BW, 0);

  // Return a minimum gep stride, greatest common divisor of consective gep

  // index scales(c.f. Bézout's identity).

  while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {

    SmallMapVector<Value *, APInt, 4> VarOffsets;

    if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))

      break;


    for (auto [V, Scale] : VarOffsets) {

      // Only keep a power of two factor for non-inbounds

      if (!GEP->hasNoUnsignedSignedWrap())

        Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());


      if (!Stride)

        Stride = Scale;

      else

        Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);

    }


    PtrOp = GEP->getPointerOperand();

  }


  // Check whether pointer arrives back at Global Variable via at least one GEP.

  // Even if it doesn't, we can check by alignment.

  if (!isa<GlobalVariable>(PtrOp) || !Stride)

    return {APInt(BW, 1), APInt(BW, 0)};


  // In consideration of signed GEP indices, non-negligible offset become

  // remainder of division by minimum GEP stride.

  ModOffset = ModOffset.srem(*Stride);

  if (ModOffset.isNegative())

    ModOffset += *Stride;


  return {*Stride, ModOffset};

}


/// If C is a constant patterned array and all valid loaded results for given

/// alignment are same to a constant, return that constant.


static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {

  auto *LI = dyn_cast<LoadInst>(&I);

  if (!LI || LI->isVolatile())

    return false;


  // We can only fold the load if it is from a constant global with definitive

  // initializer. Skip expensive logic if this is not the case.

  auto *PtrOp = LI->getPointerOperand();

  auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(PtrOp));

  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())

    return false;


  // Bail for large initializers in excess of 4K to avoid too many scans.

  Constant *C = GV->getInitializer();

  uint64_t GVSize = DL.getTypeAllocSize(C->getType());

  if (!GVSize || 4096 < GVSize)

    return false;


  Type *LoadTy = LI->getType();

  unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());

  auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);


  // Any possible offset could be multiple of GEP stride. And any valid

  // offset is multiple of load alignment, so checking only multiples of bigger

  // one is sufficient to say results' equality.

  if (auto LA = LI->getAlign();

      LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {

    ConstOffset = APInt(BW, 0);

    Stride = APInt(BW, LA.value());

  }


  Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);

  if (!Ca)

    return false;


  unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);

  for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)

    if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))

      return false;


  I.replaceAllUsesWith(Ca);


  return true;

}


namespace {

class StrNCmpInliner {

public:

  StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,

                 const DataLayout &DL)

      : CI(CI), Func(Func), DTU(DTU), DL(DL) {}


  bool optimizeStrNCmp();


private:

  void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);


  CallInst *CI;

  LibFunc Func;

  DomTreeUpdater *DTU;

  const DataLayout &DL;

};


} // namespace


/// First we normalize calls to strncmp/strcmp to the form of

/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2

/// (without considering '\0').

///

/// Examples:

///

/// \code

///   strncmp(s, "a", 3) -> compare(s, "a", 2)

///   strncmp(s, "abc", 3) -> compare(s, "abc", 3)

///   strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)

///   strcmp(s, "a") -> compare(s, "a", 2)

///

///   char s2[] = {'a'}

///   strncmp(s, s2, 3) -> compare(s, s2, 3)

///

///   char s2[] = {'a', 'b', 'c', 'd'}

///   strncmp(s, s2, 3) -> compare(s, s2, 3)

/// \endcode

///

/// We only handle cases where N and exactly one of s1 and s2 are constant.

/// Cases that s1 and s2 are both constant are already handled by the

/// instcombine pass.

///

/// We do not handle cases where N > StrNCmpInlineThreshold.

///

/// We also do not handles cases where N < 2, which are already

/// handled by the instcombine pass.

///

bool StrNCmpInliner::optimizeStrNCmp() {

  if (StrNCmpInlineThreshold < 2)

    return false;


  if (!isOnlyUsedInZeroComparison(CI))

    return false;


  Value *Str1P = CI->getArgOperand(0);

  Value *Str2P = CI->getArgOperand(1);

  // Should be handled elsewhere.

  if (Str1P == Str2P)

    return false;


  StringRef Str1, Str2;

  bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);

  bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);

  if (HasStr1 == HasStr2)

    return false;


  // Note that '\0' and characters after it are not trimmed.

  StringRef Str = HasStr1 ? Str1 : Str2;

  Value *StrP = HasStr1 ? Str2P : Str1P;


  size_t Idx = Str.find('\0');

  uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;

  if (Func == LibFunc_strncmp) {

    if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))

      N = std::min(N, ConstInt->getZExtValue());

    else

      return false;

  }

  // Now N means how many bytes we need to compare at most.

  if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)

    return false;


  // Cases where StrP has two or more dereferenceable bytes might be better

  // optimized elsewhere.

  bool CanBeNull = false, CanBeFreed = false;

  if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)

    return false;

  inlineCompare(StrP, Str, N, HasStr1);

  return true;

}


/// Convert

///

/// \code

///   ret = compare(s1, s2, N)

/// \endcode

///

/// into

///

/// \code

///   ret = (int)s1[0] - (int)s2[0]

///   if (ret != 0)

///     goto NE

///   ...

///   ret = (int)s1[N-2] - (int)s2[N-2]

///   if (ret != 0)

///     goto NE

///   ret = (int)s1[N-1] - (int)s2[N-1]

///   NE:

/// \endcode

///

/// CFG before and after the transformation:

///

/// (before)

/// BBCI

///

/// (after)

/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail

///                 |                    ^

///                 E                    |

///                 |                    |

///        BBSubs[1] (sub,icmp) --NE-----+

///                ...                   |

///        BBSubs[N-1]    (sub) ---------+

///

void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,

                                   bool Swapped) {

  auto &Ctx = CI->getContext();

  IRBuilder<> B(Ctx);

  // We want these instructions to be recognized as inlined instructions for the

  // compare call, but we don't have a source location for the definition of

  // that function, since we're generating that code now. Because the generated

  // code is a viable point for a memory access error, we make the pragmatic

  // choice here to directly use CI's location so that we have useful

  // attribution for the generated code.

  B.SetCurrentDebugLocation(CI->getDebugLoc());


  BasicBlock *BBCI = CI->getParent();

  BasicBlock *BBTail =

      SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");


  SmallVector<BasicBlock *> BBSubs;

  for (uint64_t I = 0; I < N; ++I)

    BBSubs.push_back(

        BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));

  BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);


  cast<UncondBrInst>(BBCI->getTerminator())->setSuccessor(BBSubs[0]);


  B.SetInsertPoint(BBNE);

  PHINode *Phi = B.CreatePHI(CI->getType(), N);

  B.CreateBr(BBTail);


  Value *Base = LHS;

  for (uint64_t i = 0; i < N; ++i) {

    B.SetInsertPoint(BBSubs[i]);

    Value *VL =

        B.CreateZExt(B.CreateLoad(B.getInt8Ty(),

                                  B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),

                     CI->getType());

    Value *VR =

        ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i]));

    Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);

    if (i < N - 1) {

      CondBrInst *CondBrInst = B.CreateCondBr(

          B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)), BBNE,

          BBSubs[i + 1]);


      Function *F = CI->getFunction();

      assert(F && "Instruction does not belong to a function!");

      std::optional<Function::ProfileCount> EC = F->getEntryCount();

      if (EC && EC->getCount() > 0)

        setExplicitlyUnknownBranchWeights(*CondBrInst, DEBUG_TYPE);

    } else {

      B.CreateBr(BBNE);

    }


    Phi->addIncoming(Sub, BBSubs[i]);

  }


  CI->replaceAllUsesWith(Phi);

  CI->eraseFromParent();


  if (DTU) {

    SmallVector<DominatorTree::UpdateType, 8> Updates;

    Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});

    for (uint64_t i = 0; i < N; ++i) {

      if (i < N - 1)

        Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});

      Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});

    }

    Updates.push_back({DominatorTree::Insert, BBNE, BBTail});

    Updates.push_back({DominatorTree::Delete, BBCI, BBTail});

    DTU->applyUpdates(Updates);

  }

}


/// Convert memchr with a small constant string into a switch


static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,

                       const DataLayout &DL) {

  if (isa<Constant>(Call->getArgOperand(1)))

    return false;


  StringRef Str;

  Value *Base = Call->getArgOperand(0);

  if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))

    return false;


  uint64_t N = Str.size();

  if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {

    uint64_t Val = ConstInt->getZExtValue();

    // Ignore the case that n is larger than the size of string.

    if (Val > N)

      return false;

    N = Val;

  } else

    return false;


  if (N > MemChrInlineThreshold)

    return false;


  BasicBlock *BB = Call->getParent();

  BasicBlock *BBNext = SplitBlock(BB, Call, DTU);

  IRBuilder<> IRB(BB);

  IRB.SetCurrentDebugLocation(Call->getDebugLoc());

  IntegerType *ByteTy = IRB.getInt8Ty();

  BB->getTerminator()->eraseFromParent();

  SwitchInst *SI = IRB.CreateSwitch(

      IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);

  // We can't know the precise weights here, as they would depend on the value

  // distribution of Call->getArgOperand(1). So we just mark it as "unknown".

  setExplicitlyUnknownBranchWeightsIfProfiled(*SI, DEBUG_TYPE);

  Type *IndexTy = DL.getIndexType(Call->getType());

  SmallVector<DominatorTree::UpdateType, 8> Updates;


  BasicBlock *BBSuccess = BasicBlock::Create(

      Call->getContext(), "memchr.success", BB->getParent(), BBNext);

  IRB.SetInsertPoint(BBSuccess);

  PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");

  Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);

  IRB.CreateBr(BBNext);

  if (DTU)

    Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});


  SmallPtrSet<ConstantInt *, 4> Cases;

  for (uint64_t I = 0; I < N; ++I) {

    ConstantInt *CaseVal =

        ConstantInt::get(ByteTy, static_cast<unsigned char>(Str[I]));

    if (!Cases.insert(CaseVal).second)

      continue;


    BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",

                                            BB->getParent(), BBSuccess);

    SI->addCase(CaseVal, BBCase);

    IRB.SetInsertPoint(BBCase);

    IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);

    IRB.CreateBr(BBSuccess);

    if (DTU) {

      Updates.push_back({DominatorTree::Insert, BB, BBCase});

      Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});

    }

  }


  PHINode *PHI =

      PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());

  PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);

  PHI->addIncoming(FirstOccursLocation, BBSuccess);


  Call->replaceAllUsesWith(PHI);

  Call->eraseFromParent();


  if (DTU)

    DTU->applyUpdates(Updates);


  return true;

}


static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,

                         TargetLibraryInfo &TLI, AssumptionCache &AC,

                         DominatorTree &DT, const DataLayout &DL,

                         bool &MadeCFGChange) {


  auto *CI = dyn_cast<CallInst>(&I);

  if (!CI || CI->isNoBuiltin())

    return false;


  Function *CalledFunc = CI->getCalledFunction();

  if (!CalledFunc)

    return false;


  LibFunc LF;

  if (!TLI.getLibFunc(*CalledFunc, LF) ||

      !isLibFuncEmittable(CI->getModule(), &TLI, LF))

    return false;


  DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);


  switch (LF) {

  case LibFunc_sqrt:

  case LibFunc_sqrtf:

  case LibFunc_sqrtl:

    return foldSqrt(CI, LF, TTI, TLI, AC, DT);

  case LibFunc_strcmp:

  case LibFunc_strncmp:

    if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {

      MadeCFGChange = true;

      return true;

    }

    break;

  case LibFunc_memchr:

    if (foldMemChr(CI, &DTU, DL)) {

      MadeCFGChange = true;

      return true;

    }

    break;

  default:;

  }

  return false;

}


/// Match high part of long multiplication.

///

/// Considering a multiply made up of high and low parts, we can split the

/// multiply into:

///  x * y == (xh*T + xl) * (yh*T + yl)

/// where xh == x>>32 and xl == x & 0xffffffff. T = 2^32.

/// This expands to

///  xh*yh*T*T + xh*yl*T + xl*yh*T + xl*yl

/// which can be drawn as

/// [  xh*yh  ]

///      [  xh*yl  ]

///      [  xl*yh  ]

///           [  xl*yl  ]

/// We are looking for the "high" half, which is xh*yh + xh*yl>>32 + xl*yh>>32 +

/// some carrys. The carry makes this difficult and there are multiple ways of

/// representing it. The ones we attempt to support here are:

///  Carry:  xh*yh + carry + lowsum

///          carry = lowsum < xh*yl ? 0x1000000 : 0

///          lowsum = xh*yl + xl*yh + (xl*yl>>32)

///  Ladder: xh*yh + c2>>32 + c3>>32

///          c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh

///       or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xl*yh

///  Carry4: xh*yh + carry + crosssum>>32 + (xl*yl + crosssum&0xffffffff) >> 32

///          crosssum = xh*yl + xl*yh

///          carry = crosssum < xh*yl ? 0x1000000 : 0

///  Ladder4: xh*yh + (xl*yh)>>32 + (xh*yl)>>32 + low>>32;

///          low = (xl*yl)>>32 + (xl*yh)&0xffffffff + (xh*yl)&0xffffffff

///

/// They all start by matching xh*yh + 2 or 3 other operands. The bottom of the

/// tree is xh*yh, xh*yl, xl*yh and xl*yl.


static bool foldMulHigh(Instruction &I) {

  Type *Ty = I.getType();

  if (!Ty->isIntOrIntVectorTy())

    return false;


  unsigned BitWidth = Ty->getScalarSizeInBits();

  APInt LowMask = APInt::getLowBitsSet(BitWidth, BitWidth / 2);

  if (BitWidth % 2 != 0)

    return false;


  auto CreateMulHigh = [&](Value *X, Value *Y) {

    IRBuilder<> Builder(&I);

    Type *NTy = Ty->getWithNewBitWidth(BitWidth * 2);

    Value *XExt = Builder.CreateZExt(X, NTy);

    Value *YExt = Builder.CreateZExt(Y, NTy);

    Value *Mul = Builder.CreateMul(XExt, YExt, "", /*HasNUW=*/true);

    Value *High = Builder.CreateLShr(Mul, BitWidth);

    Value *Res = Builder.CreateTrunc(High, Ty, "", /*HasNUW=*/true);

    Res->takeName(&I);

    I.replaceAllUsesWith(Res);

    LLVM_DEBUG(dbgs() << "Created long multiply from parts of " << *X << " and "

                      << *Y << "\n");

    return true;

  };


  // Common check routines for X_lo*Y_lo and X_hi*Y_lo

  auto CheckLoLo = [&](Value *XlYl, Value *X, Value *Y) {

    return match(XlYl, m_c_Mul(m_And(m_Specific(X), m_SpecificInt(LowMask)),

                               m_And(m_Specific(Y), m_SpecificInt(LowMask))));

  };

  auto CheckHiLo = [&](Value *XhYl, Value *X, Value *Y) {

    return match(XhYl,

                 m_c_Mul(m_LShr(m_Specific(X), m_SpecificInt(BitWidth / 2)),

                         m_And(m_Specific(Y), m_SpecificInt(LowMask))));

  };


  auto FoldMulHighCarry = [&](Value *X, Value *Y, Instruction *Carry,

                              Instruction *B) {

    // Looking for LowSum >> 32 and carry (select)

    if (Carry->getOpcode() != Instruction::Select)

      std::swap(Carry, B);


    // Carry = LowSum < XhYl ? 0x100000000 : 0

    Value *LowSum, *XhYl;

    if (!match(Carry,

               m_OneUse(m_Select(

                   m_OneUse(m_SpecificICmp(ICmpInst::ICMP_ULT, m_Value(LowSum),

                                           m_Value(XhYl))),

                   m_SpecificInt(APInt::getOneBitSet(BitWidth, BitWidth / 2)),

                   m_Zero()))))

      return false;


    // XhYl can be Xh*Yl or Xl*Yh

    if (!CheckHiLo(XhYl, X, Y)) {

      if (CheckHiLo(XhYl, Y, X))

        std::swap(X, Y);

      else

        return false;

    }

    if (XhYl->hasNUsesOrMore(3))

      return false;


    // B = LowSum >> 32

    if (!match(B, m_OneUse(m_LShr(m_Specific(LowSum),

                                  m_SpecificInt(BitWidth / 2)))) ||

        LowSum->hasNUsesOrMore(3))

      return false;


    // LowSum = XhYl + XlYh + XlYl>>32

    Value *XlYh, *XlYl;

    auto XlYlHi = m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2));

    if (!match(LowSum,

               m_c_Add(m_Specific(XhYl),

                       m_OneUse(m_c_Add(m_OneUse(m_Value(XlYh)), XlYlHi)))) &&

        !match(LowSum, m_c_Add(m_OneUse(m_Value(XlYh)),

                               m_OneUse(m_c_Add(m_Specific(XhYl), XlYlHi)))) &&

        !match(LowSum,

               m_c_Add(XlYlHi, m_OneUse(m_c_Add(m_Specific(XhYl),

                                                m_OneUse(m_Value(XlYh)))))))

      return false;


    // Check XlYl and XlYh

    if (!CheckLoLo(XlYl, X, Y))

      return false;

    if (!CheckHiLo(XlYh, Y, X))

      return false;


    return CreateMulHigh(X, Y);

  };


  auto FoldMulHighLadder = [&](Value *X, Value *Y, Instruction *A,

                               Instruction *B) {

    //  xh*yh + c2>>32 + c3>>32

    //    c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh

    // or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xh*yl

    Value *XlYh, *XhYl, *XlYl, *C2, *C3;

    // Strip off the two expected shifts.

    if (!match(A, m_LShr(m_Value(C2), m_SpecificInt(BitWidth / 2))) ||

        !match(B, m_LShr(m_Value(C3), m_SpecificInt(BitWidth / 2))))

      return false;


    if (match(C3, m_c_Add(m_Add(m_Value(), m_Value()), m_Value())))

      std::swap(C2, C3);

    // Try to match c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32)

    if (match(C2,

              m_c_Add(m_c_Add(m_And(m_Specific(C3), m_SpecificInt(LowMask)),

                              m_Value(XlYh)),

                      m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2)))) ||

        match(C2, m_c_Add(m_c_Add(m_And(m_Specific(C3), m_SpecificInt(LowMask)),

                                  m_LShr(m_Value(XlYl),

                                         m_SpecificInt(BitWidth / 2))),

                          m_Value(XlYh))) ||

        match(C2, m_c_Add(m_c_Add(m_LShr(m_Value(XlYl),

                                         m_SpecificInt(BitWidth / 2)),

                                  m_Value(XlYh)),

                          m_And(m_Specific(C3), m_SpecificInt(LowMask))))) {

      XhYl = C3;

    } else {

      // Match c3 = c2&0xffffffff + xl*yh

      if (!match(C3, m_c_Add(m_And(m_Specific(C2), m_SpecificInt(LowMask)),

                             m_Value(XlYh))))

        std::swap(C2, C3);

      if (!match(C3, m_c_Add(m_OneUse(

                                 m_And(m_Specific(C2), m_SpecificInt(LowMask))),

                             m_Value(XlYh))) ||

          !C3->hasOneUse() || C2->hasNUsesOrMore(3))

        return false;


      // Match c2 = xh*yl + (xl*yl >> 32)

      if (!match(C2, m_c_Add(m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2)),

                             m_Value(XhYl))))

        return false;

    }


    // Match XhYl and XlYh - they can appear either way around.

    if (!CheckHiLo(XlYh, Y, X))

      std::swap(XlYh, XhYl);

    if (!CheckHiLo(XlYh, Y, X))

      return false;

    if (!CheckHiLo(XhYl, X, Y))

      return false;

    if (!CheckLoLo(XlYl, X, Y))

      return false;


    return CreateMulHigh(X, Y);

  };


  auto FoldMulHighLadder4 = [&](Value *X, Value *Y, Instruction *A,

                                Instruction *B, Instruction *C) {

    ///  Ladder4: xh*yh + (xl*yh)>>32 + (xh+yl)>>32 + low>>32;

    ///           low = (xl*yl)>>32 + (xl*yh)&0xffffffff + (xh*yl)&0xffffffff


    // Find A = Low >> 32 and B/C = XhYl>>32, XlYh>>32.

    auto ShiftAdd =

        m_LShr(m_Add(m_Value(), m_Value()), m_SpecificInt(BitWidth / 2));

    if (!match(A, ShiftAdd))

      std::swap(A, B);

    if (!match(A, ShiftAdd))

      std::swap(A, C);

    Value *Low;

    if (!match(A, m_LShr(m_OneUse(m_Value(Low)), m_SpecificInt(BitWidth / 2))))

      return false;


    // Match B == XhYl>>32 and C == XlYh>>32

    Value *XhYl, *XlYh;

    if (!match(B, m_LShr(m_Value(XhYl), m_SpecificInt(BitWidth / 2))) ||

        !match(C, m_LShr(m_Value(XlYh), m_SpecificInt(BitWidth / 2))))

      return false;

    if (!CheckHiLo(XhYl, X, Y))

      std::swap(XhYl, XlYh);

    if (!CheckHiLo(XhYl, X, Y) || XhYl->hasNUsesOrMore(3))

      return false;

    if (!CheckHiLo(XlYh, Y, X) || XlYh->hasNUsesOrMore(3))

      return false;


    // Match Low as XlYl>>32 + XhYl&0xffffffff + XlYh&0xffffffff

    Value *XlYl;

    if (!match(

            Low,

            m_c_Add(

                m_OneUse(m_c_Add(

                    m_OneUse(m_And(m_Specific(XhYl), m_SpecificInt(LowMask))),

                    m_OneUse(m_And(m_Specific(XlYh), m_SpecificInt(LowMask))))),

                m_OneUse(

                    m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2))))) &&

        !match(

            Low,

            m_c_Add(

                m_OneUse(m_c_Add(

                    m_OneUse(m_And(m_Specific(XhYl), m_SpecificInt(LowMask))),

                    m_OneUse(

                        m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2))))),

                m_OneUse(m_And(m_Specific(XlYh), m_SpecificInt(LowMask))))) &&

        !match(

            Low,

            m_c_Add(

                m_OneUse(m_c_Add(

                    m_OneUse(m_And(m_Specific(XlYh), m_SpecificInt(LowMask))),

                    m_OneUse(

                        m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2))))),

                m_OneUse(m_And(m_Specific(XhYl), m_SpecificInt(LowMask))))))

      return false;

    if (!CheckLoLo(XlYl, X, Y))

      return false;


    return CreateMulHigh(X, Y);

  };


  auto FoldMulHighCarry4 = [&](Value *X, Value *Y, Instruction *Carry,

                               Instruction *B, Instruction *C) {

    //  xh*yh + carry + crosssum>>32 + (xl*yl + crosssum&0xffffffff) >> 32

    //  crosssum = xh*yl+xl*yh

    //  carry = crosssum < xh*yl ? 0x1000000 : 0

    if (Carry->getOpcode() != Instruction::Select)

      std::swap(Carry, B);

    if (Carry->getOpcode() != Instruction::Select)

      std::swap(Carry, C);


    // Carry = CrossSum < XhYl ? 0x100000000 : 0

    Value *CrossSum, *XhYl;

    if (!match(Carry,

               m_OneUse(m_Select(

                   m_OneUse(m_SpecificICmp(ICmpInst::ICMP_ULT,

                                           m_Value(CrossSum), m_Value(XhYl))),

                   m_SpecificInt(APInt::getOneBitSet(BitWidth, BitWidth / 2)),

                   m_Zero()))))

      return false;


    if (!match(B, m_LShr(m_Specific(CrossSum), m_SpecificInt(BitWidth / 2))))

      std::swap(B, C);

    if (!match(B, m_LShr(m_Specific(CrossSum), m_SpecificInt(BitWidth / 2))))

      return false;


    Value *XlYl, *LowAccum;

    if (!match(C, m_LShr(m_Value(LowAccum), m_SpecificInt(BitWidth / 2))) ||

        !match(LowAccum, m_c_Add(m_OneUse(m_LShr(m_Value(XlYl),

                                                 m_SpecificInt(BitWidth / 2))),

                                 m_OneUse(m_And(m_Specific(CrossSum),

                                                m_SpecificInt(LowMask))))) ||

        LowAccum->hasNUsesOrMore(3))

      return false;

    if (!CheckLoLo(XlYl, X, Y))

      return false;


    if (!CheckHiLo(XhYl, X, Y))

      std::swap(X, Y);

    if (!CheckHiLo(XhYl, X, Y))

      return false;

    Value *XlYh;

    if (!match(CrossSum, m_c_Add(m_Specific(XhYl), m_OneUse(m_Value(XlYh)))) ||

        !CheckHiLo(XlYh, Y, X) || CrossSum->hasNUsesOrMore(4) ||

        XhYl->hasNUsesOrMore(3))

      return false;


    return CreateMulHigh(X, Y);

  };


  // X and Y are the two inputs, A, B and C are other parts of the pattern

  // (crosssum>>32, carry, etc).

  Value *X, *Y;

  Instruction *A, *B, *C;

  auto HiHi = m_OneUse(m_Mul(m_LShr(m_Value(X), m_SpecificInt(BitWidth / 2)),

                             m_LShr(m_Value(Y), m_SpecificInt(BitWidth / 2))));

  if ((match(&I, m_c_Add(HiHi, m_OneUse(m_Add(m_Instruction(A),

                                              m_Instruction(B))))) ||

       match(&I, m_c_Add(m_Instruction(A),

                         m_OneUse(m_c_Add(HiHi, m_Instruction(B)))))) &&

      A->hasOneUse() && B->hasOneUse())

    if (FoldMulHighCarry(X, Y, A, B) || FoldMulHighLadder(X, Y, A, B))

      return true;


  if ((match(&I, m_c_Add(HiHi, m_OneUse(m_c_Add(

                                   m_Instruction(A),

                                   m_OneUse(m_Add(m_Instruction(B),

                                                  m_Instruction(C))))))) ||

       match(&I, m_c_Add(m_Instruction(A),

                         m_OneUse(m_c_Add(

                             HiHi, m_OneUse(m_Add(m_Instruction(B),

                                                  m_Instruction(C))))))) ||

       match(&I, m_c_Add(m_Instruction(A),

                         m_OneUse(m_c_Add(

                             m_Instruction(B),

                             m_OneUse(m_c_Add(HiHi, m_Instruction(C))))))) ||

       match(&I,

             m_c_Add(m_OneUse(m_c_Add(HiHi, m_Instruction(A))),

                     m_OneUse(m_Add(m_Instruction(B), m_Instruction(C)))))) &&

      A->hasOneUse() && B->hasOneUse() && C->hasOneUse())

    return FoldMulHighCarry4(X, Y, A, B, C) ||

           FoldMulHighLadder4(X, Y, A, B, C);


  return false;

}


/// This is the entry point for folds that could be implemented in regular

/// InstCombine, but they are separated because they are not expected to

/// occur frequently and/or have more than a constant-length pattern match.


static bool foldUnusualPatterns(Function &F, DominatorTree &DT,

                                TargetTransformInfo &TTI,

                                TargetLibraryInfo &TLI, AliasAnalysis &AA,

                                AssumptionCache &AC, bool &MadeCFGChange) {

  bool MadeChange = false;

  for (BasicBlock &BB : F) {

    // Ignore unreachable basic blocks.

    if (!DT.isReachableFromEntry(&BB))

      continue;


    const DataLayout &DL = F.getDataLayout();


    // Walk the block backwards for efficiency. We're matching a chain of

    // use->defs, so we're more likely to succeed by starting from the bottom.

    // Also, we want to avoid matching partial patterns.

    // TODO: It would be more efficient if we removed dead instructions

    // iteratively in this loop rather than waiting until the end.

    for (Instruction &I : make_early_inc_range(llvm::reverse(BB))) {

      MadeChange |= foldAnyOrAllBitsSet(I);

      MadeChange |= foldGuardedFunnelShift(I, DT);

      MadeChange |= foldSelectSplitCTTZ(I);

      MadeChange |= foldSelectSplitCTLZ(I);

      MadeChange |= tryToRecognizePopCount(I);

      MadeChange |= tryToRecognizePopCount1(I);

      MadeChange |= tryToRecognizePopCount2n3(I);

      MadeChange |= tryToFPToSat(I, TTI);

      MadeChange |= tryToRecognizeTableBasedCttz(I, DL);

      MadeChange |= tryToRecognizeTableBasedLog2(I, DL, TTI);

      MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);

      MadeChange |= foldPatternedLoads(I, DL);

      MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);

      MadeChange |= foldMulHigh(I);

      // NOTE: This function introduces erasing of the instruction `I`, so it

      // needs to be called at the end of this sequence, otherwise we may make

      // bugs.

      MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);

    }


    // Do this separately to avoid redundantly scanning stores multiple times.

    MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA);

  }


  // We're done with transforms, so remove dead instructions.

  if (MadeChange)

    for (BasicBlock &BB : F)

      SimplifyInstructionsInBlock(&BB);


  return MadeChange;

}


/// This is the entry point for all transforms. Pass manager differences are

/// handled in the callers of this function.


static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,

                    TargetLibraryInfo &TLI, DominatorTree &DT,

                    AliasAnalysis &AA, bool &MadeCFGChange) {

  bool MadeChange = false;

  const DataLayout &DL = F.getDataLayout();

  TruncInstCombine TIC(AC, TLI, DL, DT);

  MadeChange |= TIC.run(F);

  MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);

  return MadeChange;

}


PreservedAnalyses AggressiveInstCombinePass::run(Function &F,

                                                 FunctionAnalysisManager &AM) {

  auto &AC = AM.getResult<AssumptionAnalysis>(F);

  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);

  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);

  auto &TTI = AM.getResult<TargetIRAnalysis>(F);

  auto &AA = AM.getResult<AAManager>(F);

  bool MadeCFGChange = false;

  if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {

    // No changes, all analyses are preserved.

    return PreservedAnalyses::all();

  }

  // Mark all the analyses that instcombine updates as preserved.

  PreservedAnalyses PA;

  if (MadeCFGChange)

    PA.preserve<DominatorTreeAnalysis>();

  else

    PA.preserveSet<CFGAnalyses>();

  return PA;

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

Select
AMDGPU Register Bank Select
Definition AMDGPURegBankSelect.cpp:68

PHI
Rewrite undef for PHI
Definition AMDGPURewriteUndefForPHI.cpp:98

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

AggressiveInstCombineInternal.h

replaceWithPopCount
static void replaceWithPopCount(Instruction &I, Value *Root)
Helper function to replace an instruction with a popcount intrinsic.
Definition AggressiveInstCombine.cpp:492

tryToRecognizePopCount
static bool tryToRecognizePopCount(Instruction &I)
Definition AggressiveInstCombine.cpp:519

foldSqrt
static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
Definition AggressiveInstCombine.cpp:881

isLog2Table
static bool isLog2Table(Constant *Table, const APInt &Mul, const APInt &Shift, Type *AccessTy, unsigned InputBits, const APInt &GEPIdxFactor, const DataLayout &DL)
Definition AggressiveInstCombine.cpp:1076

foldAnyOrAllBitsSet
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
Definition AggressiveInstCombine.cpp:442

MemChrInlineThreshold
static cl::opt< unsigned > MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string to " "inline a memchr call."))

tryToFPToSat
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
Definition AggressiveInstCombine.cpp:826

StrNCmpInlineThreshold
static cl::opt< unsigned > StrNCmpInlineThreshold("strncmp-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string for a builtin string cmp " "call eligible for inlining. The default value is 3."))

matchAndOrChain
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
Definition AggressiveInstCombine.cpp:396

foldMemChr
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, const DataLayout &DL)
Convert memchr with a small constant string into a switch.
Definition AggressiveInstCombine.cpp:1987

tryToRecognizePopCount2n3
static bool tryToRecognizePopCount2n3(Instruction &I)
Definition AggressiveInstCombine.cpp:731

optimizeShiftInOrChain
static Value * optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder)
Combine away instructions providing they are still equivalent when compared against 0.
Definition AggressiveInstCombine.cpp:1645

foldConsecutiveLoads
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
Definition AggressiveInstCombine.cpp:1421

foldGuardedFunnelShift
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
Definition AggressiveInstCombine.cpp:258

tryToRecognizeTableBasedCttz
static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL)
Definition AggressiveInstCombine.cpp:988

mergePartStores
static bool mergePartStores(SmallVectorImpl< PartStore > &Parts, const DataLayout &DL, TargetTransformInfo &TTI)
Definition AggressiveInstCombine.cpp:1571

foldLoadsRecursive
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA, bool IsRoot=false)
Definition AggressiveInstCombine.cpp:1272

mergeConsecutivePartStores
static bool mergeConsecutivePartStores(ArrayRef< PartStore > Parts, unsigned Width, const DataLayout &DL, TargetTransformInfo &TTI)
Definition AggressiveInstCombine.cpp:1521

MaxInstrsToScan
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))

foldSelectSplitCTTZ
static bool foldSelectSplitCTTZ(Instruction &I)
Try to fold a select-based split cttz pattern into a single full-width cttz.
Definition AggressiveInstCombine.cpp:89

foldSelectSplitCTLZ
static bool foldSelectSplitCTLZ(Instruction &I)
Same as foldSelectSplitCTTZ but for leading zeros (ctlz).
Definition AggressiveInstCombine.cpp:174

tryToRecognizePopCount1
static bool tryToRecognizePopCount1(Instruction &I)
Definition AggressiveInstCombine.cpp:610

foldICmpOrChain
static bool foldICmpOrChain(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
Definition AggressiveInstCombine.cpp:1673

isCTTZTable
static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift, const APInt &AndMask, Type *AccessTy, unsigned InputBits, const APInt &GEPIdxFactor, const DataLayout &DL)
Definition AggressiveInstCombine.cpp:913

matchPartStore
static std::optional< PartStore > matchPartStore(Instruction &I, const DataLayout &DL)
Definition AggressiveInstCombine.cpp:1497

foldConsecutiveStores
static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
Definition AggressiveInstCombine.cpp:1604

getStrideAndModOffsetOfGEP
static std::pair< APInt, APInt > getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL)
Definition AggressiveInstCombine.cpp:1702

foldPatternedLoads
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL)
If C is a constant patterned array and all valid loaded results for given alignment are same to a con...
Definition AggressiveInstCombine.cpp:1743

tryToRecognizeTableBasedLog2
static bool tryToRecognizeTableBasedLog2(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI)
Definition AggressiveInstCombine.cpp:1156

foldLibCalls
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, const DataLayout &DL, bool &MadeCFGChange)
Definition AggressiveInstCombine.cpp:2066

foldMulHigh
static bool foldMulHigh(Instruction &I)
Match high part of long multiplication.
Definition AggressiveInstCombine.cpp:2139

foldUnusualPatterns
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, AssumptionCache &AC, bool &MadeCFGChange)
This is the entry point for folds that could be implemented in regular InstCombine,...
Definition AggressiveInstCombine.cpp:2435

AggressiveInstCombine.h
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...

AliasAnalysis.h

AssumptionCache.h

BasicAliasAnalysis.h
This is the interface for LLVM's primary stateless and local alias analysis.

BasicBlockUtils.h

X
#define X(NUM, ENUM, NAME)
Definition ELF.h:853

BuildLibCalls.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Casting.h

CommandLine.h

ConstantFolding.h

DataLayout.h

DomTreeUpdater.h

Dominators.h

runImpl
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
Definition ExpandIRInsts.cpp:1159

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

GlobalsModRef.h
This is the interface for a simple mod/ref and alias analysis over globals.

GEP
Hexagon Common GEP
Definition HexagonCommonGEP.cpp:164

getAlign
static MaybeAlign getAlign(Value *Ptr)
Definition IRBuilder.cpp:528

IRBuilder.h

Function.h

Instruction.h

matchFunnelShift
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
Definition InstCombineAndOrXor.cpp:3116

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MDBuilder.h

High
uint64_t High
Definition NVVMIntrRange.cpp:46

PatternMatch.h

ProfDataUtils.h
This file contains the declarations for profiling metadata utility functions.

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:73

MaskShift
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Definition SIProgramInfo.cpp:164

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

TargetLibraryInfo.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

ValueTracking.h

RHS
Value * RHS
Definition X86PartialReduction.cpp:81

LHS
Value * LHS
Definition X86PartialReduction.cpp:80

Mul
BinaryOperator * Mul
Definition X86PartialReduction.cpp:75

llvm::AAManager
A manager for alias analyses.
Definition AliasAnalysis.h:1020

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235

llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563

llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535

llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353

llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381

llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511

llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330

llvm::APInt::getSplat
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652

llvm::APInt::srem
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1788

llvm::APInt::isSubsetOf
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264

llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307

llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137

llvm::APInt::countTrailingOnes
unsigned countTrailingOnes() const
Definition APInt.h:1685

llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240

llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228

llvm::AggressiveInstCombinePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition AggressiveInstCombine.cpp:2498

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:434

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::front
const T & front() const
Get the first element.
Definition ArrayRef.h:144

llvm::ArrayRef::size
size_t size() const
Get the array size.
Definition ArrayRef.h:141

llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition AssumptionCache.h:180

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition AssumptionCache.h:44

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461

llvm::BasicBlock::getFirstInsertionPt
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition BasicBlock.cpp:366

llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213

llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237

llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition AliasAnalysis.h:662

llvm::BatchAAResults::getModRefInfo
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Definition AliasAnalysis.h:687

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition InstrTypes.h:1357

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition Instructions.h:1531

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition InstrTypes.h:761

llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition InstrTypes.h:762

llvm::CmpPredicate
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition CmpPredicate.h:23

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition Constants.h:87

llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Constant::getNullValue
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition Constants.cpp:399

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64

llvm::DebugLoc::getMergedLocations
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:166

llvm::DomTreeUpdater
Definition DomTreeUpdater.h:34

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278

llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition GenericDomTree.h:286

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159

llvm::DominatorTree::isReachableFromEntry
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition Dominators.cpp:322

llvm::DominatorTree::dominates
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition Dominators.cpp:123

llvm::Function
Definition Function.h:65

llvm::GenericDomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Definition GenericDomTreeUpdaterImpl.h:59

llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition Instructions.h:968

llvm::GlobalVariable
Definition GlobalVariable.h:41

llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition GlobalVariable.h:155

llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition GlobalVariable.h:111

llvm::GlobalVariable::isConstant
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Definition GlobalVariable.h:183

llvm::ICmpInst::isEquality
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Definition Instructions.h:1335

llvm::IRBuilderBase::SetCurrentDebugLocation
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Definition IRBuilder.h:247

llvm::IRBuilderBase::CreateBr
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1232

llvm::IRBuilderBase::CreatePHI
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2539

llvm::IRBuilderBase::CreateSwitch
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1261

llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2106

llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207

llvm::IRBuilderBase::CreateInBoundsPtrAdd
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2096

llvm::IRBuilderBase::getInt8Ty
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:576

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition Instruction.h:545

llvm::Instruction::setAAMetadata
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition Metadata.cpp:1852

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:112

llvm::Instruction::getFunction
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
Definition Instruction.cpp:90

llvm::Instruction::getAAMetadata
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition Metadata.cpp:1822

llvm::IntegerType
Class to represent integer types.
Definition DerivedTypes.h:42

llvm::IntegerType::get
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354

llvm::IntrinsicCostAttributes
Definition TargetTransformInfo.h:181

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::LoadInst
An instruction for reading from memory.
Definition Instructions.h:181

llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition Instructions.h:266

llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition Instructions.h:260

llvm::LoadInst::isSimple
bool isSimple() const
Definition Instructions.h:252

llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition MemoryLocation.h:95

llvm::MDBuilder
Definition MDBuilder.h:37

llvm::MDBuilder::createUnlikelyBranchWeights
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48

llvm::MapVector::size
size_type size() const
Definition MapVector.h:58

llvm::MapVector::front
std::pair< KeyT, ValueT > & front()
Definition MapVector.h:81

llvm::MemoryLocation
Representation for a specific memory location.
Definition MemoryLocation.h:217

llvm::MemoryLocation::get
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition MemoryLocation.cpp:36

llvm::MemoryLocation::getBeforeOrAfter
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Definition MemoryLocation.h:285

llvm::PHINode
Definition Instructions.h:2661

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition Instructions.h:2795

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition Instructions.h:2695

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:2056

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::PreservedAnalyses::preserve
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:387

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:533

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:581

llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition SmallVector.h:671

llvm::SmallVectorImpl::clear
void clear()
Definition SmallVector.h:618

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition SmallVector.h:278

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:83

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::StoreInst
An instruction for storing to memory.
Definition Instructions.h:297

llvm::StringRef
Represent a constant reference to a string, i.e.
Definition StringRef.h:56

llvm::StringRef::npos
static constexpr size_t npos
Definition StringRef.h:58

llvm::SwitchInst
Multiway switch.
Definition Instructions.h:3341

llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition TargetTransformInfo.h:2117

llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition TargetLibraryInfo.h:602

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:266

llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition TargetLibraryInfo.h:333

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:271

llvm::TargetTransformInfo::VectorInstrContext::None
@ None
The insert/extract is not used with a load/store.
Definition TargetTransformInfo.h:1068

llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition TargetTransformInfo.h:335

llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition TargetTransformInfo.h:338

llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition TargetTransformInfo.h:361

llvm::TruncInstCombine
Definition AggressiveInstCombineInternal.h:52

llvm::TruncInstCombine::run
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
Definition TruncInstCombine.cpp:524

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Type::getIntegerBitWidth
LLVM_ABI unsigned getIntegerBitWidth() const
Definition DerivedTypes.h:107

llvm::Type::getPrimitiveSizeInBits
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201

llvm::Type::getWithNewBitWidth
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
Definition DerivedTypes.h:824

llvm::Type::getScalarSizeInBits
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255

llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439

llvm::Value::replaceAllUsesWith
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258

llvm::Value::hasNUsesOrMore
LLVM_ABI bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition Value.cpp:154

llvm::Value::stripAndAccumulateConstantOffsets
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.

llvm::Value::getPointerDereferenceableBytes
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:890

llvm::Value::getName
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318

llvm::Value::takeName
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399

llvm::VectorType::get
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::cl::opt
Definition CommandLine.h:1454

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

uint64_t

Call
CallInst * Call
Definition ObjCARCOpts.cpp:2356

Changed
Changed
Definition ObjCARCOpts.cpp:2366

UINT64_MAX
#define UINT64_MAX
Definition DataTypes.h:77

llvm::AA
Abstract Attribute helper functions.
Definition Attributor.h:165

llvm::APIntOps::GreatestCommonDivisor
LLVM_ABI APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition APInt.cpp:830

llvm::CallingConv::Fast
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81

llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition Intrinsics.h:49

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::Loc
Definition DwarfDebug.h:130

llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
Definition MIPatternMatch.h:278

llvm::MIPatternMatch::m_Neg
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
Definition MIPatternMatch.h:935

llvm::MIPatternMatch::m_OneUse
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Definition MIPatternMatch.h:56

llvm::PatternMatchHelpers::m_CombineOr
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
Definition PatternMatchHelpers.h:56

llvm::PatternMatch
Definition PatternMatch.h:51

llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition PatternMatch.h:1264

llvm::PatternMatch::m_LShrOrSelf
ShiftLike_match< LHS, Instruction::LShr > m_LShrOrSelf(const LHS &L, uint64_t &R)
Matches lshr L, ConstShAmt or L itself (R will be set to zero in this case).
Definition PatternMatch.h:1326

llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition PatternMatch.h:1150

llvm::PatternMatch::m_CastOrSelf
match_combine_or< CastInst_match< OpTy, CastInst >, OpTy > m_CastOrSelf(const OpTy &Op)
Matches any cast or self. Used to ignore casts.
Definition PatternMatch.h:2185

llvm::PatternMatch::m_APInt
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition PatternMatch.h:261

llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition PatternMatch.h:3088

llvm::PatternMatch::m_Trunc
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition PatternMatch.h:2191

llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition PatternMatch.h:1028

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:53

llvm::PatternMatch::m_Instruction
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition PatternMatch.h:822

llvm::PatternMatch::m_Deferred
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition PatternMatch.h:947

llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition PatternMatch.h:939

llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition PatternMatch.h:560

llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition PatternMatch.h:1896

llvm::PatternMatch::m_SMin
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
Definition PatternMatch.h:2558

llvm::PatternMatch::m_Value
auto m_Value()
Match an arbitrary value and ignore it.
Definition PatternMatch.h:135

llvm::PatternMatch::m_ShlOrSelf
ShiftLike_match< LHS, Instruction::Shl > m_ShlOrSelf(const LHS &L, uint64_t &R)
Matches shl L, ConstShAmt or L itself (R will be set to zero in this case).
Definition PatternMatch.h:1319

llvm::PatternMatch::m_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
Definition PatternMatch.h:1216

llvm::PatternMatch::m_SpecificBB
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
Definition PatternMatch.h:1061

llvm::PatternMatch::m_NSWShl
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
Definition PatternMatch.h:1400

llvm::PatternMatch::m_SpecificICmp
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
Definition PatternMatch.h:1785

llvm::PatternMatch::m_ZExt
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition PatternMatch.h:2223

llvm::PatternMatch::m_NUWShl
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
Definition PatternMatch.h:1443

llvm::PatternMatch::m_c_Add
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
Definition PatternMatch.h:3074

llvm::PatternMatch::m_AddLike
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
Definition PatternMatch.h:1504

llvm::PatternMatch::m_FPToSI
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
Definition PatternMatch.h:2370

llvm::PatternMatch::m_SMax
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
Definition PatternMatch.h:2552

llvm::PatternMatch::m_LShr
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition PatternMatch.h:1288

llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Definition PatternMatch.h:1722

llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition PatternMatch.h:1282

llvm::PatternMatch::m_Cttz
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_Cttz(const Opnd0 &Op0, const Opnd1 &Op1)
Definition PatternMatch.h:2952

llvm::PatternMatch::m_Br
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition PatternMatch.h:2430

llvm::PatternMatch::m_Ctlz
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_Ctlz(const Opnd0 &Op0, const Opnd1 &Op1)
Definition PatternMatch.h:2946

llvm::PatternMatch::m_Or
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition PatternMatch.h:1270

llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition PatternMatch.h:589

llvm::PatternMatch::m_c_Or
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Definition PatternMatch.h:3095

llvm::PatternMatch::m_c_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
Definition PatternMatch.h:3081

llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition PatternMatch.h:1162

llvm::SI
Definition SIInstrInfo.h:1902

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::codeview::CompileSym2Flags::EC
@ EC
Definition CodeView.h:432

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:408

llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390

llvm::sampleprof::Base
@ Base
Definition Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315

llvm::ThreadPriority::Low
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::ProfcheckDisableMetadataFixes
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60

llvm::TailFoldingOpts::Reverse
@ Reverse
Definition AArch64BaseInfo.h:670

llvm::setExplicitlyUnknownBranchWeightsIfProfiled
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
Definition ProfDataUtils.cpp:279

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::isOnlyUsedInZeroComparison
LLVM_ABI bool isOnlyUsedInZeroComparison(const Instruction *CxtI)
Definition ValueTracking.cpp:257

llvm::getConstantStringInfo
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
Definition ValueTracking.cpp:6748

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633

llvm::SimplifyInstructionsInBlock
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:723

llvm::setExplicitlyUnknownBranchWeights
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName)
Specify that the branch weights for this terminator cannot be known at compile time.
Definition ProfDataUtils.cpp:269

llvm::MaskedValueIsZero
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
Definition ValueTracking.cpp:319

llvm::isLibFuncEmittable
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
Definition BuildLibCalls.cpp:1555

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::isModSet
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209

llvm::isModOrRefSet
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43

llvm::ConstantFoldLoadFromConst
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
Definition ConstantFolding.cpp:846

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1151

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::IRMemLocation::Other
@ Other
Any other memory.
Definition ModRef.h:68

llvm::IRMemLocation::First
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74

llvm::TTI
TargetTransformInfo TTI
Definition TargetTransformInfo.h:266

llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:38

llvm::SplitBlock
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
Definition BasicBlockUtils.cpp:1049

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:219

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:586

llvm::isGuaranteedNotToBePoison
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Definition ValueTracking.cpp:7951

llvm::getUnderlyingObject
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Definition ValueTracking.cpp:6940

llvm::AliasAnalysis
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
Definition AliasAnalysis.h:746

llvm::cannotBeOrderedLessThanZero
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
Definition ValueTracking.cpp:6191

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876

N
#define N

LoadOps
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
Definition AggressiveInstCombine.cpp:1259

LoadOps::ZextType
Type * ZextType
Definition AggressiveInstCombine.cpp:1265

LoadOps::Shift
uint64_t Shift
Definition AggressiveInstCombine.cpp:1264

LoadOps::AATags
AAMDNodes AATags
Definition AggressiveInstCombine.cpp:1266

LoadOps::RootInsert
LoadInst * RootInsert
Definition AggressiveInstCombine.cpp:1261

LoadOps::Root
LoadInst * Root
Definition AggressiveInstCombine.cpp:1260

LoadOps::LoadSize
uint64_t LoadSize
Definition AggressiveInstCombine.cpp:1263

LoadOps::FoundRoot
bool FoundRoot
Definition AggressiveInstCombine.cpp:1262

PartStore
ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
Definition AggressiveInstCombine.cpp:1480

PartStore::ValOffset
uint64_t ValOffset
Definition AggressiveInstCombine.cpp:1484

PartStore::ValWidth
uint64_t ValWidth
Definition AggressiveInstCombine.cpp:1485

PartStore::PtrOffset
APInt PtrOffset
Definition AggressiveInstCombine.cpp:1482

PartStore::Store
StoreInst * Store
Definition AggressiveInstCombine.cpp:1486

PartStore::operator<
bool operator<(const PartStore &Other) const
Definition AggressiveInstCombine.cpp:1492

PartStore::isCompatibleWith
bool isCompatibleWith(const PartStore &Other) const
Definition AggressiveInstCombine.cpp:1488

PartStore::PtrBase
Value * PtrBase
Definition AggressiveInstCombine.cpp:1481

PartStore::Val
Value * Val
Definition AggressiveInstCombine.cpp:1483

llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763

llvm::AAMDNodes::concat
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Definition TypeBasedAliasAnalysis.cpp:553

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:314

llvm::SimplifyQuery
Definition SimplifyQuery.h:71

llvm::SmallMapVector
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334

llvm::cl::desc
Definition CommandLine.h:410