/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp

Bug Summary

File:	lib/CodeGen/InterleavedLoadCombinePass.cpp
Warning:	line 965, column 31 Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name InterleavedLoadCombinePass.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn359426/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen -I /build/llvm-toolchain-snapshot-9~svn359426/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn359426/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn359426/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn359426=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-05-01-032957-29988-1 -x c++ /build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp -faddrsig

/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp

→

1//===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10//
11// This file defines the interleaved-load-combine pass. The pass searches for
12// ShuffleVectorInstruction that execute interleaving loads. If a matching
13// pattern is found, it adds a combined load and further instructions in a
14// pattern that is detectable by InterleavedAccesPass. The old instructions are
15// left dead to be removed later. The pass is specifically designed to be
16// executed just before InterleavedAccesPass to find any left-over instances
17// that are not detected within former passes.
18//
19//===----------------------------------------------------------------------===//

21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/MemoryLocation.h"
23#include "llvm/Analysis/MemorySSA.h"
24#include "llvm/Analysis/MemorySSAUpdater.h"
25#include "llvm/Analysis/OptimizationRemarkEmitter.h"
26#include "llvm/Analysis/TargetTransformInfo.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetPassConfig.h"
30#include "llvm/CodeGen/TargetSubtargetInfo.h"
31#include "llvm/IR/DataLayout.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/IR/LegacyPassManager.h"
36#include "llvm/IR/Module.h"
37#include "llvm/Pass.h"
38#include "llvm/Support/Debug.h"
39#include "llvm/Support/ErrorHandling.h"
40#include "llvm/Support/raw_ostream.h"
41#include "llvm/Target/TargetMachine.h"

43#include <algorithm>
44#include <cassert>
45#include <list>

47using namespace llvm;

49#define DEBUG_TYPE"interleaved-load-combine" "interleaved-load-combine"

51namespace {

53/// Statistic counter
54STATISTIC(NumInterleavedLoadCombine, "Number of combined loads")static llvm::Statistic NumInterleavedLoadCombine = {"interleaved-load-combine"
, "NumInterleavedLoadCombine", "Number of combined loads", {0
}, {false}};

56/// Option to disable the pass
57static cl::opt<bool> DisableInterleavedLoadCombine(
  "disable-" DEBUG_TYPE"interleaved-load-combine", cl::init(false), cl::Hidden,
  cl::desc("Disable combining of interleaved loads"));

61struct VectorInfo;

63struct InterleavedLoadCombineImpl {
64public:
InterleavedLoadCombineImpl(Function &F, DominatorTree &DT, MemorySSA &MSSA,
                           TargetMachine &TM)
    : F(F), DT(DT), MSSA(MSSA),
      TLI(*TM.getSubtargetImpl(F)->getTargetLowering()),
      TTI(TM.getTargetTransformInfo(F)) {}

/// Scan the function for interleaved load candidates and execute the
/// replacement if applicable.
bool run();

75private:
/// Function this pass is working on
Function &F;

/// Dominator Tree Analysis
DominatorTree &DT;

/// Memory Alias Analyses
MemorySSA &MSSA;

/// Target Lowering Information
const TargetLowering &TLI;

/// Target Transform Information
const TargetTransformInfo TTI;

/// Find the instruction in sets LIs that dominates all others, return nullptr
/// if there is none.
LoadInst *findFirstLoad(const std::set<LoadInst *> &LIs);

/// Replace interleaved load candidates. It does additional
/// analyses if this makes sense. Returns true on success and false
/// of nothing has been changed.
bool combine(std::list<VectorInfo> &InterleavedLoad,
             OptimizationRemarkEmitter &ORE);

/// Given a set of VectorInfo containing candidates for a given interleave
/// factor, find a set that represents a 'factor' interleaved load.
bool findPattern(std::list<VectorInfo> &Candidates,
                 std::list<VectorInfo> &InterleavedLoad, unsigned Factor,
                 const DataLayout &DL);
106}; // InterleavedLoadCombine

108/// First Order Polynomial on an n-Bit Integer Value
109///
110/// Polynomial(Value) = Value * B + A + E*2^(n-e)
111///
112/// A and B are the coefficients. E*2^(n-e) is an error within 'e' most
113/// significant bits. It is introduced if an exact computation cannot be proven
114/// (e.q. division by 2).
115///
116/// As part of this optimization multiple loads will be combined. It necessary
117/// to prove that loads are within some relative offset to each other. This
118/// class is used to prove relative offsets of values loaded from memory.
119///
120/// Representing an integer in this form is sound since addition in two's
121/// complement is associative (trivial) and multiplication distributes over the
122/// addition (see Proof(1) in Polynomial::mul). Further, both operations
123/// commute.
124//
125// Example:
126// declare @fn(i64 %IDX, <4 x float>* %PTR) {
127//   %Pa1 = add i64 %IDX, 2
128//   %Pa2 = lshr i64 %Pa1, 1
129//   %Pa3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pa2
130//   %Va = load <4 x float>, <4 x float>* %Pa3
131//
132//   %Pb1 = add i64 %IDX, 4
133//   %Pb2 = lshr i64 %Pb1, 1
134//   %Pb3 = getelementptr inbounds <4 x float>, <4 x float>* %PTR, i64 %Pb2
135//   %Vb = load <4 x float>, <4 x float>* %Pb3
136// ... }
137//
138// The goal is to prove that two loads load consecutive addresses.
139//
140// In this case the polynomials are constructed by the following
141// steps.
142//
143// The number tag #e specifies the error bits.
144//
145// Pa_0 = %IDX              #0
146// Pa_1 = %IDX + 2          #0 | add 2
147// Pa_2 = %IDX/2 + 1        #1 | lshr 1
148// Pa_3 = %IDX/2 + 1        #1 | GEP, step signext to i64
149// Pa_4 = (%IDX/2)*16 + 16  #0 | GEP, multiply index by sizeof(4) for floats
150// Pa_5 = (%IDX/2)*16 + 16  #0 | GEP, add offset of leading components
151//
152// Pb_0 = %IDX              #0
153// Pb_1 = %IDX + 4          #0 | add 2
154// Pb_2 = %IDX/2 + 2        #1 | lshr 1
155// Pb_3 = %IDX/2 + 2        #1 | GEP, step signext to i64
156// Pb_4 = (%IDX/2)*16 + 32  #0 | GEP, multiply index by sizeof(4) for floats
157// Pb_5 = (%IDX/2)*16 + 16  #0 | GEP, add offset of leading components
158//
159// Pb_5 - Pa_5 = 16         #0 | subtract to get the offset
160//
161// Remark: %PTR is not maintained within this class. So in this instance the
162// offset of 16 can only be assumed if the pointers are equal.
163//
164class Polynomial {
/// Operations on B
enum BOps {
  LShr,
  Mul,
  SExt,
  Trunc,
};

/// Number of Error Bits e
unsigned ErrorMSBs;

/// Value
Value *V;

/// Coefficient B
SmallVector<std::pair<BOps, APInt>, 4> B;

/// Coefficient A
APInt A;

185public:
Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V), B(), A() {
  IntegerType *Ty = dyn_cast<IntegerType>(V->getType());
  if (Ty) {
    ErrorMSBs = 0;
    this->V = V;
    A = APInt(Ty->getBitWidth(), 0);
  }
}

Polynomial(const APInt &A, unsigned ErrorMSBs = 0)
    : ErrorMSBs(ErrorMSBs), V(NULL__null), B(), A(A) {}

Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)
    : ErrorMSBs(ErrorMSBs), V(NULL__null), B(), A(BitWidth, A) {}

Polynomial() : ErrorMSBs((unsigned)-1), V(NULL__null), B(), A() {}

/// Increment and clamp the number of undefined bits.
void incErrorMSBs(unsigned amt) {
  if (ErrorMSBs == (unsigned)-1)
    return;

  ErrorMSBs += amt;
  if (ErrorMSBs > A.getBitWidth())
    ErrorMSBs = A.getBitWidth();
}

/// Decrement and clamp the number of undefined bits.
void decErrorMSBs(unsigned amt) {
  if (ErrorMSBs == (unsigned)-1)
    return;

  if (ErrorMSBs > amt)
    ErrorMSBs -= amt;
  else
    ErrorMSBs = 0;
}

/// Apply an add on the polynomial
Polynomial &add(const APInt &C) {
  // Note: Addition is associative in two's complement even when in case of
  // signed overflow.
  //
  // Error bits can only propagate into higher significant bits. As these are
  // already regarded as undefined, there is no change.
  //
  // Theorem: Adding a constant to a polynomial does not change the error
  // term.
  //
  // Proof:
  //
  //   Since the addition is associative and commutes:
  //
  //   (B + A + E*2^(n-e)) + C = B + (A + C) + E*2^(n-e)
  // [qed]

  if (C.getBitWidth() != A.getBitWidth()) {
    ErrorMSBs = (unsigned)-1;
    return *this;
  }

  A += C;
  return *this;
}

/// Apply a multiplication onto the polynomial.
Polynomial &mul(const APInt &C) {
  // Note: Multiplication distributes over the addition
  //
  // Theorem: Multiplication distributes over the addition
  //
  // Proof(1):
  //
  //   (B+A)*C =-
  //        = (B + A) + (B + A) + .. {C Times}
  //         addition is associative and commutes, hence
  //        = B + B + .. {C Times} .. + A + A + .. {C times}
  //        = B*C + A*C
  //   (see (function add) for signed values and overflows)
  // [qed]
  //
  // Theorem: If C has c trailing zeros, errors bits in A or B are shifted out
  // to the left.
  //
  // Proof(2):
  //
  //   Let B' and A' be the n-Bit inputs with some unknown errors EA,
  //   EB at e leading bits. B' and A' can be written down as:
  //
  //     B' = B + 2^(n-e)*EB
  //     A' = A + 2^(n-e)*EA
  //
  //   Let C' be an input with c trailing zero bits. C' can be written as
  //
  //     C' = C*2^c
  //
  //   Therefore we can compute the result by using distributivity and
  //   commutativity.
  //
  //     (B'*C' + A'*C') = [B + 2^(n-e)*EB] * C' + [A + 2^(n-e)*EA] * C' =
  //                     = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
  //                     = (B'+A') * C' =
  //                     = [B + 2^(n-e)*EB + A + 2^(n-e)*EA] * C' =
  //                     = [B + A + 2^(n-e)*EB + 2^(n-e)*EA] * C' =
  //                     = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C' =
  //                     = (B + A) * C' + [2^(n-e)*EB + 2^(n-e)*EA)] * C*2^c =
  //                     = (B + A) * C' + C*(EB + EA)*2^(n-e)*2^c =
  //
  //   Let EC be the final error with EC = C*(EB + EA)
  //
  //                     = (B + A)*C' + EC*2^(n-e)*2^c =
  //                     = (B + A)*C' + EC*2^(n-(e-c))
  //
  //   Since EC is multiplied by 2^(n-(e-c)) the resulting error contains c
  //   less error bits than the input. c bits are shifted out to the left.
  // [qed]

  if (C.getBitWidth() != A.getBitWidth()) {
    ErrorMSBs = (unsigned)-1;
    return *this;
  }

  // Multiplying by one is a no-op.
  if (C.isOneValue()) {
    return *this;
  }

  // Multiplying by zero removes the coefficient B and defines all bits.
  if (C.isNullValue()) {
    ErrorMSBs = 0;
    deleteB();
  }

  // See Proof(2): Trailing zero bits indicate a left shift. This removes
  // leading bits from the result even if they are undefined.
  decErrorMSBs(C.countTrailingZeros());

  A *= C;
  pushBOperation(Mul, C);
  return *this;
}

/// Apply a logical shift right on the polynomial
Polynomial &lshr(const APInt &C) {
  // Theorem(1): (B + A + E*2^(n-e)) >> 1 => (B >> 1) + (A >> 1) + E'*2^(n-e')
  //          where
  //             e' = e + 1,
  //             E is a e-bit number,
  //             E' is a e'-bit number,
  //   holds under the following precondition:
  //          pre(1): A % 2 = 0
  //          pre(2): e < n, (see Theorem(2) for the trivial case with e=n)
  //   where >> expresses a logical shift to the right, with adding zeros.
  //
  //  We need to show that for every, E there is a E'
  //
  //  B = b_h * 2^(n-1) + b_m * 2 + b_l
  //  A = a_h * 2^(n-1) + a_m * 2         (pre(1))
  //
  //  where a_h, b_h, b_l are single bits, and a_m, b_m are (n-2) bit numbers
  //
  //  Let X = (B + A + E*2^(n-e)) >> 1
  //  Let Y = (B >> 1) + (A >> 1) + E*2^(n-e) >> 1
  //
  //    X = [B + A + E*2^(n-e)] >> 1 =
  //      = [  b_h * 2^(n-1) + b_m * 2 + b_l +
  //         + a_h * 2^(n-1) + a_m * 2 +
  //         + E * 2^(n-e) ] >> 1 =
  //
  //    The sum is built by putting the overflow of [a_m + b+n] into the term
  //    2^(n-1). As there are no more bits beyond 2^(n-1) the overflow within
  //    this bit is discarded. This is expressed by % 2.
  //
  //    The bit in position 0 cannot overflow into the term (b_m + a_m).
  //
  //      = [  ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-1) +
  //         + ((b_m + a_m) % 2^(n-2)) * 2 +
  //         + b_l + E * 2^(n-e) ] >> 1 =
  //
  //    The shift is computed by dividing the terms by 2 and by cutting off
  //    b_l.
  //
  //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
  //         + ((b_m + a_m) % 2^(n-2)) +
  //         + E * 2^(n-(e+1)) =
  //
  //    by the definition in the Theorem e+1 = e'
  //
  //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
  //         + ((b_m + a_m) % 2^(n-2)) +
  //         + E * 2^(n-e') =
  //
  //    Compute Y by applying distributivity first
  //
  //    Y =  (B >> 1) + (A >> 1) + E*2^(n-e') =
  //      =    (b_h * 2^(n-1) + b_m * 2 + b_l) >> 1 +
  //         + (a_h * 2^(n-1) + a_m * 2) >> 1 +
  //         + E * 2^(n-e) >> 1 =
  //
  //    Again, the shift is computed by dividing the terms by 2 and by cutting
  //    off b_l.
  //
  //      =     b_h * 2^(n-2) + b_m +
  //         +  a_h * 2^(n-2) + a_m +
  //         +  E * 2^(n-(e+1)) =
  //
  //    Again, the sum is built by putting the overflow of [a_m + b+n] into
  //    the term 2^(n-1). But this time there is room for a second bit in the
  //    term 2^(n-2) we add this bit to a new term and denote it o_h in a
  //    second step.
  //
  //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] >> 1) * 2^(n-1) +
  //         + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
  //         + ((b_m + a_m) % 2^(n-2)) +
  //         + E * 2^(n-(e+1)) =
  //
  //    Let o_h = [b_h + a_h + (b_m + a_m) >> (n-2)] >> 1
  //    Further replace e+1 by e'.
  //
  //      =    o_h * 2^(n-1) +
  //         + ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
  //         + ((b_m + a_m) % 2^(n-2)) +
  //         + E * 2^(n-e') =
  //
  //    Move o_h into the error term and construct E'. To ensure that there is
  //    no 2^x with negative x, this step requires pre(2) (e < n).
  //
  //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
  //         + ((b_m + a_m) % 2^(n-2)) +
  //         + o_h * 2^(e'-1) * 2^(n-e') +               | pre(2), move 2^(e'-1)
  //                                                     | out of the old exponent
  //         + E * 2^(n-e') =
  //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
  //         + ((b_m + a_m) % 2^(n-2)) +
  //         + [o_h * 2^(e'-1) + E] * 2^(n-e') +         | move 2^(e'-1) out of
  //                                                     | the old exponent
  //
  //    Let E' = o_h * 2^(e'-1) + E
  //
  //      =    ([b_h + a_h + (b_m + a_m) >> (n-2)] % 2) * 2^(n-2) +
  //         + ((b_m + a_m) % 2^(n-2)) +
  //         + E' * 2^(n-e')
  //
  //    Because X and Y are distinct only in there error terms and E' can be
  //    constructed as shown the theorem holds.
  // [qed]
  //
  // For completeness in case of the case e=n it is also required to show that
  // distributivity can be applied.
  //
  // In this case Theorem(1) transforms to (the pre-condition on A can also be
  // dropped)
  //
  // Theorem(2): (B + A + E) >> 1 => (B >> 1) + (A >> 1) + E'
  //          where
  //             A, B, E, E' are two's complement numbers with the same bit
  //             width
  //
  //   Let A + B + E = X
  //   Let (B >> 1) + (A >> 1) = Y
  //
  //   Therefore we need to show that for every X and Y there is an E' which
  //   makes the equation
  //
  //     X = Y + E'
  //
  //   hold. This is trivially the case for E' = X - Y.
  //
  // [qed]
  //
  // Remark: Distributing lshr with and arbitrary number n can be expressed as
  //   ((((B + A) lshr 1) lshr 1) ... ) {n times}.
  // This construction induces n additional error bits at the left.

  if (C.getBitWidth() != A.getBitWidth()) {
    ErrorMSBs = (unsigned)-1;
    return *this;
  }

  if (C.isNullValue())
    return *this;

  // Test if the result will be zero
  unsigned shiftAmt = C.getZExtValue();
  if (shiftAmt >= C.getBitWidth())
    return mul(APInt(C.getBitWidth(), 0));

  // The proof that shiftAmt LSBs are zero for at least one summand is only
  // possible for the constant number.
  //
  // If this can be proven add shiftAmt to the error counter
  // `ErrorMSBs`. Otherwise set all bits as undefined.
  if (A.countTrailingZeros() < shiftAmt)
    ErrorMSBs = A.getBitWidth();
  else
    incErrorMSBs(shiftAmt);

  // Apply the operation.
  pushBOperation(LShr, C);
  A = A.lshr(shiftAmt);

  return *this;
}

/// Apply a sign-extend or truncate operation on the polynomial.
Polynomial &sextOrTrunc(unsigned n) {
  if (n < A.getBitWidth()) {
    // Truncate: Clearly undefined Bits on the MSB side are removed
    // if there are any.
    decErrorMSBs(A.getBitWidth() - n);
    A = A.trunc(n);
    pushBOperation(Trunc, APInt(sizeof(n) * 8, n));
  }
  if (n > A.getBitWidth()) {
    // Extend: Clearly extending first and adding later is different
    // to adding first and extending later in all extended bits.
    incErrorMSBs(n - A.getBitWidth());
    A = A.sext(n);
    pushBOperation(SExt, APInt(sizeof(n) * 8, n));
  }

  return *this;
}

/// Test if there is a coefficient B.
bool isFirstOrder() const { return V != nullptr; }

/// Test coefficient B of two Polynomials are equal.
bool isCompatibleTo(const Polynomial &o) const {
  // The polynomial use different bit width.
  if (A.getBitWidth() != o.A.getBitWidth())
    return false;

  // If neither Polynomial has the Coefficient B.
  if (!isFirstOrder() && !o.isFirstOrder())
    return true;

  // The index variable is different.
  if (V != o.V)
    return false;

  // Check the operations.
  if (B.size() != o.B.size())
    return false;

  auto ob = o.B.begin();
  for (auto &b : B) {
    if (b != *ob)
      return false;
    ob++;
  }

  return true;
}

/// Subtract two polynomials, return an undefined polynomial if
/// subtraction is not possible.
Polynomial operator-(const Polynomial &o) const {
  // Return an undefined polynomial if incompatible.
  if (!isCompatibleTo(o))
    return Polynomial();

  // If the polynomials are compatible (meaning they have the same
  // coefficient on B), B is eliminated. Thus a polynomial solely
  // containing A is returned
  return Polynomial(A - o.A, std::max(ErrorMSBs, o.ErrorMSBs));
}

/// Subtract a constant from a polynomial,
Polynomial operator-(uint64_t C) const {
  Polynomial Result(*this);
  Result.A -= C;
  return Result;
}

/// Add a constant to a polynomial,
Polynomial operator+(uint64_t C) const {
  Polynomial Result(*this);
  Result.A += C;
  return Result;
}

/// Returns true if it can be proven that two Polynomials are equal.
bool isProvenEqualTo(const Polynomial &o) {
  // Subtract both polynomials and test if it is fully defined and zero.
  Polynomial r = *this - o;
  return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue());
}

/// Print the polynomial into a stream.
void print(raw_ostream &OS) const {
  OS << "[{#ErrBits:" << ErrorMSBs << "} ";

  if (V) {
    for (auto b : B)
      OS << "(";
    OS << "(" << *V << ") ";

    for (auto b : B) {
      switch (b.first) {
      case LShr:
        OS << "LShr ";
        break;
      case Mul:
        OS << "Mul ";
        break;
      case SExt:
        OS << "SExt ";
        break;
      case Trunc:
        OS << "Trunc ";
        break;
      }

      OS << b.second << ") ";
    }
  }

  OS << "+ " << A << "]";
}

607private:
void deleteB() {
  V = nullptr;
  B.clear();
}

void pushBOperation(const BOps Op, const APInt &C) {
  if (isFirstOrder()) {
    B.push_back(std::make_pair(Op, C));
    return;
  }
}
619};

621#ifndef NDEBUG
622static raw_ostream &operator<<(raw_ostream &OS, const Polynomial &S) {
S.print(OS);
return OS;
625}
626#endif

628/// VectorInfo stores abstract the following information for each vector
629/// element:
630///
631/// 1) The the memory address loaded into the element as Polynomial
632/// 2) a set of load instruction necessary to construct the vector,
633/// 3) a set of all other instructions that are necessary to create the vector and
634/// 4) a pointer value that can be used as relative base for all elements.
635struct VectorInfo {
636private:
VectorInfo(const VectorInfo &c) : VTy(c.VTy) {
  llvm_unreachable(::llvm::llvm_unreachable_internal("Copying VectorInfo is neither implemented nor necessary,"
, "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp"
, 639)
      "Copying VectorInfo is neither implemented nor necessary,")::llvm::llvm_unreachable_internal("Copying VectorInfo is neither implemented nor necessary,"
, "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp"
, 639);
}

642public:
/// Information of a Vector Element
struct ElementInfo {
  /// Offset Polynomial.
  Polynomial Ofs;

  /// The Load Instruction used to Load the entry. LI is null if the pointer
  /// of the load instruction does not point on to the entry
  LoadInst *LI;

  ElementInfo(Polynomial Offset = Polynomial(), LoadInst *LI = nullptr)
      : Ofs(Offset), LI(LI) {}
};

/// Basic-block the load instructions are within
BasicBlock *BB;

/// Pointer value of all participation load instructions
Value *PV;

/// Participating load instructions
std::set<LoadInst *> LIs;

/// Participating instructions
std::set<Instruction *> Is;

/// Final shuffle-vector instruction
ShuffleVectorInst *SVI;

/// Information of the offset for each vector element
ElementInfo *EI;

/// Vector Type
VectorType *const VTy;

VectorInfo(VectorType *VTy)
    : BB(nullptr), PV(nullptr), LIs(), Is(), SVI(nullptr), VTy(VTy) {
  EI = new ElementInfo[VTy->getNumElements()];
}

virtual ~VectorInfo() { delete[] EI; }

unsigned getDimension() const { return VTy->getNumElements(); }

/// Test if the VectorInfo can be part of an interleaved load with the
/// specified factor.
///
/// \param Factor of the interleave
/// \param DL Targets Datalayout
///
/// \returns true if this is possible and false if not
bool isInterleaved(unsigned Factor, const DataLayout &DL) const {
  unsigned Size = DL.getTypeAllocSize(VTy->getElementType());
  for (unsigned i = 1; i < getDimension(); i++) {
    if (!EI[i].Ofs.isProvenEqualTo(EI[0].Ofs + i * Factor * Size)) {
      return false;
    }
  }
  return true;
}

/// Recursively computes the vector information stored in V.
///
/// This function delegates the work to specialized implementations
///
/// \param V Value to operate on
/// \param Result Result of the computation
///
/// \returns false if no sensible information can be gathered.
static bool compute(Value *V, VectorInfo &Result, const DataLayout &DL) {
  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
  if (SVI)
1
Taking false branch→
    return computeFromSVI(SVI, Result, DL);
  LoadInst *LI = dyn_cast<LoadInst>(V);
  if (LI)
2
←
Taking true branch→
    return computeFromLI(LI, Result, DL);
3
←
Calling 'VectorInfo::computeFromLI'→
  BitCastInst *BCI = dyn_cast<BitCastInst>(V);
  if (BCI)
    return computeFromBCI(BCI, Result, DL);
  return false;
}

/// BitCastInst specialization to compute the vector information.
///
/// \param BCI BitCastInst to operate on
/// \param Result Result of the computation
///
/// \returns false if no sensible information can be gathered.
static bool computeFromBCI(BitCastInst *BCI, VectorInfo &Result,
                           const DataLayout &DL) {
  Instruction *Op = dyn_cast<Instruction>(BCI->getOperand(0));

  if (!Op)
    return false;

  VectorType *VTy = dyn_cast<VectorType>(Op->getType());
  if (!VTy)
    return false;

  // We can only cast from large to smaller vectors
  if (Result.VTy->getNumElements() % VTy->getNumElements())
    return false;

  unsigned Factor = Result.VTy->getNumElements() / VTy->getNumElements();
  unsigned NewSize = DL.getTypeAllocSize(Result.VTy->getElementType());
  unsigned OldSize = DL.getTypeAllocSize(VTy->getElementType());

  if (NewSize * Factor != OldSize)
    return false;

  VectorInfo Old(VTy);
  if (!compute(Op, Old, DL))
    return false;

  for (unsigned i = 0; i < Result.VTy->getNumElements(); i += Factor) {
    for (unsigned j = 0; j < Factor; j++) {
      Result.EI[i + j] =
          ElementInfo(Old.EI[i / Factor].Ofs + j * NewSize,
                      j == 0 ? Old.EI[i / Factor].LI : nullptr);
    }
  }

  Result.BB = Old.BB;
  Result.PV = Old.PV;
  Result.LIs.insert(Old.LIs.begin(), Old.LIs.end());
  Result.Is.insert(Old.Is.begin(), Old.Is.end());
  Result.Is.insert(BCI);
  Result.SVI = nullptr;

  return true;
}

/// ShuffleVectorInst specialization to compute vector information.
///
/// \param SVI ShuffleVectorInst to operate on
/// \param Result Result of the computation
///
/// Compute the left and the right side vector information and merge them by
/// applying the shuffle operation. This function also ensures that the left
/// and right side have compatible loads. This means that all loads are with
/// in the same basic block and are based on the same pointer.
///
/// \returns false if no sensible information can be gathered.
static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result,
                           const DataLayout &DL) {
  VectorType *ArgTy = dyn_cast<VectorType>(SVI->getOperand(0)->getType());
  assert(ArgTy && "ShuffleVector Operand is not a VectorType")((ArgTy && "ShuffleVector Operand is not a VectorType"
) ? static_cast<void> (0) : __assert_fail ("ArgTy && \"ShuffleVector Operand is not a VectorType\""
, "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp"
, 788, __PRETTY_FUNCTION__));

  // Compute the left hand vector information.
  VectorInfo LHS(ArgTy);
  if (!compute(SVI->getOperand(0), LHS, DL))
    LHS.BB = nullptr;

  // Compute the right hand vector information.
  VectorInfo RHS(ArgTy);
  if (!compute(SVI->getOperand(1), RHS, DL))
    RHS.BB = nullptr;

  // Neither operand produced sensible results?
  if (!LHS.BB && !RHS.BB)
    return false;
  // Only RHS produced sensible results?
  else if (!LHS.BB) {
    Result.BB = RHS.BB;
    Result.PV = RHS.PV;
  }
  // Only LHS produced sensible results?
  else if (!RHS.BB) {
    Result.BB = LHS.BB;
    Result.PV = LHS.PV;
  }
  // Both operands produced sensible results?
  else if ((LHS.BB == RHS.BB) && (LHS.PV == RHS.PV)) {
    Result.BB = LHS.BB;
    Result.PV = LHS.PV;
  }
  // Both operands produced sensible results but they are incompatible.
  else {
    return false;
  }

  // Merge and apply the operation on the offset information.
  if (LHS.BB) {
    Result.LIs.insert(LHS.LIs.begin(), LHS.LIs.end());
    Result.Is.insert(LHS.Is.begin(), LHS.Is.end());
  }
  if (RHS.BB) {
    Result.LIs.insert(RHS.LIs.begin(), RHS.LIs.end());
    Result.Is.insert(RHS.Is.begin(), RHS.Is.end());
  }
  Result.Is.insert(SVI);
  Result.SVI = SVI;

  int j = 0;
  for (int i : SVI->getShuffleMask()) {
    assert((i < 2 * (signed)ArgTy->getNumElements()) &&(((i < 2 * (signed)ArgTy->getNumElements()) && "Invalid ShuffleVectorInst (index out of bounds)"
) ? static_cast<void> (0) : __assert_fail ("(i < 2 * (signed)ArgTy->getNumElements()) && \"Invalid ShuffleVectorInst (index out of bounds)\""
, "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp"
, 838, __PRETTY_FUNCTION__))
           "Invalid ShuffleVectorInst (index out of bounds)")(((i < 2 * (signed)ArgTy->getNumElements()) && "Invalid ShuffleVectorInst (index out of bounds)"
) ? static_cast<void> (0) : __assert_fail ("(i < 2 * (signed)ArgTy->getNumElements()) && \"Invalid ShuffleVectorInst (index out of bounds)\""
, "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp"
, 838, __PRETTY_FUNCTION__));

    if (i < 0)
      Result.EI[j] = ElementInfo();
    else if (i < (signed)ArgTy->getNumElements()) {
      if (LHS.BB)
        Result.EI[j] = LHS.EI[i];
      else
        Result.EI[j] = ElementInfo();
    } else {
      if (RHS.BB)
        Result.EI[j] = RHS.EI[i - ArgTy->getNumElements()];
      else
        Result.EI[j] = ElementInfo();
    }
    j++;
  }

  return true;
}

/// LoadInst specialization to compute vector information.
///
/// This function also acts as abort condition to the recursion.
///
/// \param LI LoadInst to operate on
/// \param Result Result of the computation
///
/// \returns false if no sensible information can be gathered.
static bool computeFromLI(LoadInst *LI, VectorInfo &Result,
                          const DataLayout &DL) {
  Value *BasePtr;
  Polynomial Offset;

  if (LI->isVolatile())
4
←
Assuming the condition is false→
5
←
Taking false branch→
    return false;

  if (LI->isAtomic())
6
←
Assuming the condition is false→
7
←
Taking false branch→
    return false;

  // Get the base polynomial
  computePolynomialFromPointer(*LI->getPointerOperand(), Offset, BasePtr, DL);
8
←
Calling 'VectorInfo::computePolynomialFromPointer'→

  Result.BB = LI->getParent();
  Result.PV = BasePtr;
  Result.LIs.insert(LI);
  Result.Is.insert(LI);

  for (unsigned i = 0; i < Result.getDimension(); i++) {
    Value *Idx[2] = {
        ConstantInt::get(Type::getInt32Ty(LI->getContext()), 0),
        ConstantInt::get(Type::getInt32Ty(LI->getContext()), i),
    };
    int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, makeArrayRef(Idx, 2));
    Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);
  }

  return true;
}

/// Recursively compute polynomial of a value.
///
/// \param BO Input binary operation
/// \param Result Result polynomial
static void computePolynomialBinOp(BinaryOperator &BO, Polynomial &Result) {
  Value *LHS = BO.getOperand(0);
  Value *RHS = BO.getOperand(1);

  // Find the RHS Constant if any
  ConstantInt *C = dyn_cast<ConstantInt>(RHS);
  if ((!C) && BO.isCommutative()) {
    C = dyn_cast<ConstantInt>(LHS);
    if (C)
      std::swap(LHS, RHS);
  }

  switch (BO.getOpcode()) {
  case Instruction::Add:
    if (!C)
      break;

    computePolynomial(*LHS, Result);
    Result.add(C->getValue());
    return;

  case Instruction::LShr:
    if (!C)
      break;

    computePolynomial(*LHS, Result);
    Result.lshr(C->getValue());
    return;

  default:
    break;
  }

  Result = Polynomial(&BO);
}

/// Recursively compute polynomial of a value
///
/// \param V input value
/// \param Result result polynomial
static void computePolynomial(Value &V, Polynomial &Result) {
  if (isa<BinaryOperator>(&V))
    computePolynomialBinOp(*dyn_cast<BinaryOperator>(&V), Result);
  else
    Result = Polynomial(&V);
}

/// Compute the Polynomial representation of a Pointer type.
///
/// \param Ptr input pointer value
/// \param Result result polynomial
/// \param BasePtr pointer the polynomial is based on
/// \param DL Datalayout of the target machine
static void computePolynomialFromPointer(Value &Ptr, Polynomial &Result,
                                         Value *&BasePtr,
                                         const DataLayout &DL) {
  // Not a pointer type? Return an undefined polynomial
  PointerType *PtrTy = dyn_cast<PointerType>(Ptr.getType());
14
←
Calling 'dyn_cast<llvm::PointerType, llvm::Type>'→
18
←
Returning from 'dyn_cast<llvm::PointerType, llvm::Type>'→
19
←
'PtrTy' initialized here→
  if (!PtrTy) {
9
←
Taking false branch→
20
←
Assuming 'PtrTy' is null→
21
←
Taking true branch→
    Result = Polynomial();
    BasePtr = nullptr;
  }
  unsigned PointerBits =
      DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());
22
←
Called C++ object pointer is null

  /// Skip pointer casts. Return Zero polynomial otherwise
  if (isa<CastInst>(&Ptr)) {
10
←
Assuming the condition is true→
11
←
Taking true branch→
    CastInst &CI = *cast<CastInst>(&Ptr);
    switch (CI.getOpcode()) {
12
←
Control jumps to 'case BitCast:'  at line 971→
    case Instruction::BitCast:
      computePolynomialFromPointer(*CI.getOperand(0), Result, BasePtr, DL);
13
←
Calling 'VectorInfo::computePolynomialFromPointer'→
      break;
    default:
      BasePtr = &Ptr;
      Polynomial(PointerBits, 0);
      break;
    }
  }
  /// Resolve GetElementPtrInst.
  else if (isa<GetElementPtrInst>(&Ptr)) {
    GetElementPtrInst &GEP = *cast<GetElementPtrInst>(&Ptr);

    APInt BaseOffset(PointerBits, 0);

    // Check if we can compute the Offset with accumulateConstantOffset
    if (GEP.accumulateConstantOffset(DL, BaseOffset)) {
      Result = Polynomial(BaseOffset);
      BasePtr = GEP.getPointerOperand();
      return;
    } else {
      // Otherwise we allow that the last index operand of the GEP is
      // non-constant.
      unsigned idxOperand, e;
      SmallVector<Value *, 4> Indices;
      for (idxOperand = 1, e = GEP.getNumOperands(); idxOperand < e;
           idxOperand++) {
        ConstantInt *IDX = dyn_cast<ConstantInt>(GEP.getOperand(idxOperand));
        if (!IDX)
          break;
        Indices.push_back(IDX);
      }

      // It must also be the last operand.
      if (idxOperand + 1 != e) {
        Result = Polynomial();
        BasePtr = nullptr;
        return;
      }

      // Compute the polynomial of the index operand.
      computePolynomial(*GEP.getOperand(idxOperand), Result);

      // Compute base offset from zero based index, excluding the last
      // variable operand.
      BaseOffset =
          DL.getIndexedOffsetInType(GEP.getSourceElementType(), Indices);

      // Apply the operations of GEP to the polynomial.
      unsigned ResultSize = DL.getTypeAllocSize(GEP.getResultElementType());
      Result.sextOrTrunc(PointerBits);
      Result.mul(APInt(PointerBits, ResultSize));
      Result.add(BaseOffset);
      BasePtr = GEP.getPointerOperand();
    }
  }
  // All other instructions are handled by using the value as base pointer and
  // a zero polynomial.
  else {
    BasePtr = &Ptr;
    Polynomial(DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()), 0);
  }
}

1035#ifndef NDEBUG
void print(raw_ostream &OS) const {
  if (PV)
    OS << *PV;
  else
    OS << "(none)";
  OS << " + ";
  for (unsigned i = 0; i < getDimension(); i++)
    OS << ((i == 0) ? "[" : ", ") << EI[i].Ofs;
  OS << "]";
}
1046#endif
1047};

1049} // anonymous namespace

1051bool InterleavedLoadCombineImpl::findPattern(
  std::list<VectorInfo> &Candidates, std::list<VectorInfo> &InterleavedLoad,
  unsigned Factor, const DataLayout &DL) {
for (auto C0 = Candidates.begin(), E0 = Candidates.end(); C0 != E0; ++C0) {
  unsigned i;
  // Try to find an interleaved load using the front of Worklist as first line
  unsigned Size = DL.getTypeAllocSize(C0->VTy->getElementType());

  // List containing iterators pointing to the VectorInfos of the candidates
  std::vector<std::list<VectorInfo>::iterator> Res(Factor, Candidates.end());

  for (auto C = Candidates.begin(), E = Candidates.end(); C != E; C++) {
    if (C->VTy != C0->VTy)
      continue;
    if (C->BB != C0->BB)
      continue;
    if (C->PV != C0->PV)
      continue;

    // Check the current value matches any of factor - 1 remaining lines
    for (i = 1; i < Factor; i++) {
      if (C->EI[0].Ofs.isProvenEqualTo(C0->EI[0].Ofs + i * Size)) {
        Res[i] = C;
      }
    }

    for (i = 1; i < Factor; i++) {
      if (Res[i] == Candidates.end())
        break;
    }
    if (i == Factor) {
      Res[0] = C0;
      break;
    }
  }

  if (Res[0] != Candidates.end()) {
    // Move the result into the output
    for (unsigned i = 0; i < Factor; i++) {
      InterleavedLoad.splice(InterleavedLoad.end(), Candidates, Res[i]);
    }

    return true;
  }
}
return false;
1097}

1099LoadInst *
1100InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) {
assert(!LIs.empty() && "No load instructions given.")((!LIs.empty() && "No load instructions given.") ? static_cast
<void> (0) : __assert_fail ("!LIs.empty() && \"No load instructions given.\""
, "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp"
, 1101, __PRETTY_FUNCTION__));

// All LIs are within the same BB. Select the first for a reference.
BasicBlock *BB = (*LIs.begin())->getParent();
BasicBlock::iterator FLI =
    std::find_if(BB->begin(), BB->end(), [&LIs](Instruction &I) -> bool {
      return is_contained(LIs, &I);
    });
assert(FLI != BB->end())((FLI != BB->end()) ? static_cast<void> (0) : __assert_fail
 ("FLI != BB->end()", "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp"
, 1109, __PRETTY_FUNCTION__));

return cast<LoadInst>(FLI);
1112}

1114bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
                                       OptimizationRemarkEmitter &ORE) {
LLVM_DEBUG(dbgs() << "Checking interleaved load\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("interleaved-load-combine")) { dbgs() << "Checking interleaved load\n"
; } } while (false);

// The insertion point is the LoadInst which loads the first values. The
// following tests are used to proof that the combined load can be inserted
// just before InsertionPoint.
LoadInst *InsertionPoint = InterleavedLoad.front().EI[0].LI;

// Test if the offset is computed
if (!InsertionPoint)
  return false;

std::set<LoadInst *> LIs;
std::set<Instruction *> Is;
std::set<Instruction *> SVIs;

unsigned InterleavedCost;
unsigned InstructionCost = 0;

// Get the interleave factor
unsigned Factor = InterleavedLoad.size();

// Merge all input sets used in analysis
for (auto &VI : InterleavedLoad) {
  // Generate a set of all load instructions to be combined
  LIs.insert(VI.LIs.begin(), VI.LIs.end());

  // Generate a set of all instructions taking part in load
  // interleaved. This list excludes the instructions necessary for the
  // polynomial construction.
  Is.insert(VI.Is.begin(), VI.Is.end());

  // Generate the set of the final ShuffleVectorInst.
  SVIs.insert(VI.SVI);
}

// There is nothing to combine.
if (LIs.size() < 2)
  return false;

// Test if all participating instruction will be dead after the
// transformation. If intermediate results are used, no performance gain can
// be expected. Also sum the cost of the Instructions beeing left dead.
for (auto &I : Is) {
  // Compute the old cost
  InstructionCost +=
      TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);

  // The final SVIs are allowed not to be dead, all uses will be replaced
  if (SVIs.find(I) != SVIs.end())
    continue;

  // If there are users outside the set to be eliminated, we abort the
  // transformation. No gain can be expected.
  for (const auto &U : I->users()) {
    if (Is.find(dyn_cast<Instruction>(U)) == Is.end())
      return false;
  }
}

// We know that all LoadInst are within the same BB. This guarantees that
// either everything or nothing is loaded.
LoadInst *First = findFirstLoad(LIs);

// To be safe that the loads can be combined, iterate over all loads and test
// that the corresponding defining access dominates first LI. This guarantees
// that there are no aliasing stores in between the loads.
auto FMA = MSSA.getMemoryAccess(First);
for (auto LI : LIs) {
  auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess();
  if (!MSSA.dominates(MADef, FMA))
    return false;
}
assert(!LIs.empty() && "There are no LoadInst to combine")((!LIs.empty() && "There are no LoadInst to combine")
 ? static_cast<void> (0) : __assert_fail ("!LIs.empty() && \"There are no LoadInst to combine\""
, "/build/llvm-toolchain-snapshot-9~svn359426/lib/CodeGen/InterleavedLoadCombinePass.cpp"
, 1188, __PRETTY_FUNCTION__));

// It is necessary that insertion point dominates all final ShuffleVectorInst.
for (auto &VI : InterleavedLoad) {
  if (!DT.dominates(InsertionPoint, VI.SVI))
    return false;
}

// All checks are done. Add instructions detectable by InterleavedAccessPass
// The old instruction will are left dead.
IRBuilder<> Builder(InsertionPoint);
Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType();
unsigned ElementsPerSVI =
    InterleavedLoad.front().SVI->getType()->getNumElements();
VectorType *ILTy = VectorType::get(ETy, Factor * ElementsPerSVI);

SmallVector<unsigned, 4> Indices;
for (unsigned i = 0; i < Factor; i++)
  Indices.push_back(i);
InterleavedCost = TTI.getInterleavedMemoryOpCost(
    Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlignment(),
    InsertionPoint->getPointerAddressSpace());

if (InterleavedCost >= InstructionCost) {
  return false;
}

// Create a pointer cast for the wide load.
auto CI = Builder.CreatePointerCast(InsertionPoint->getOperand(0),
                                    ILTy->getPointerTo(),
                                    "interleaved.wide.ptrcast");

// Create the wide load and update the MemorySSA.
auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlignment(),
                                    "interleaved.wide.load");
auto MSSAU = MemorySSAUpdater(&MSSA);
MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
    LI, nullptr, MSSA.getMemoryAccess(InsertionPoint)));
MSSAU.insertUse(MSSALoad);

// Create the final SVIs and replace all uses.
int i = 0;
for (auto &VI : InterleavedLoad) {
  SmallVector<uint32_t, 4> Mask;
  for (unsigned j = 0; j < ElementsPerSVI; j++)
    Mask.push_back(i + j * Factor);

  Builder.SetInsertPoint(VI.SVI);
  auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()),
                                         Mask, "interleaved.shuffle");
  VI.SVI->replaceAllUsesWith(SVI);
  i++;
}

NumInterleavedLoadCombine++;
ORE.emit([&]() {
  return OptimizationRemark(DEBUG_TYPE"interleaved-load-combine", "Combined Interleaved Load", LI)
         << "Load interleaved combined with factor "
         << ore::NV("Factor", Factor);
});

return true;
1250}

1252bool InterleavedLoadCombineImpl::run() {
OptimizationRemarkEmitter ORE(&F);
bool changed = false;
unsigned MaxFactor = TLI.getMaxSupportedInterleaveFactor();

auto &DL = F.getParent()->getDataLayout();

// Start with the highest factor to avoid combining and recombining.
for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) {
  std::list<VectorInfo> Candidates;

  for (BasicBlock &BB : F) {
    for (Instruction &I : BB) {
      if (auto SVI = dyn_cast<ShuffleVectorInst>(&I)) {

        Candidates.emplace_back(SVI->getType());

        if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) {
          Candidates.pop_back();
          continue;
        }

        if (!Candidates.back().isInterleaved(Factor, DL)) {
          Candidates.pop_back();
        }
      }
    }
  }

  std::list<VectorInfo> InterleavedLoad;
  while (findPattern(Candidates, InterleavedLoad, Factor, DL)) {
    if (combine(InterleavedLoad, ORE)) {
      changed = true;
    } else {
      // Remove the first element of the Interleaved Load but put the others
      // back on the list and continue searching
      Candidates.splice(Candidates.begin(), InterleavedLoad,
                        std::next(InterleavedLoad.begin()),
                        InterleavedLoad.end());
    }
    InterleavedLoad.clear();
  }
}

return changed;
1297}

1299namespace {
1300/// This pass combines interleaved loads into a pattern detectable by
1301/// InterleavedAccessPass.
1302struct InterleavedLoadCombine : public FunctionPass {
static char ID;

InterleavedLoadCombine() : FunctionPass(ID) {
  initializeInterleavedLoadCombinePass(*PassRegistry::getPassRegistry());
}

StringRef getPassName() const override {
  return "Interleaved Load Combine Pass";
}

bool runOnFunction(Function &F) override {
  if (DisableInterleavedLoadCombine)
    return false;

  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
  if (!TPC)
    return false;

  LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("interleaved-load-combine")) { dbgs() << "*** " <<
 getPassName() << ": " << F.getName() << "\n"
; } } while (false)
                    << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("interleaved-load-combine")) { dbgs() << "*** " <<
 getPassName() << ": " << F.getName() << "\n"
; } } while (false);

  return InterleavedLoadCombineImpl(
             F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
             getAnalysis<MemorySSAWrapperPass>().getMSSA(),
             TPC->getTM<TargetMachine>())
      .run();
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
  AU.addRequired<MemorySSAWrapperPass>();
  AU.addRequired<DominatorTreeWrapperPass>();
  FunctionPass::getAnalysisUsage(AU);
}

1337private:
1338};
1339} // anonymous namespace

1341char InterleavedLoadCombine::ID = 0;

1343INITIALIZE_PASS_BEGIN(static void *initializeInterleavedLoadCombinePassOnce(PassRegistry
 &Registry) {
  InterleavedLoadCombine, DEBUG_TYPE,static void *initializeInterleavedLoadCombinePassOnce(PassRegistry
 &Registry) {
  "Combine interleaved loads into wide loads and shufflevector instructions",static void *initializeInterleavedLoadCombinePassOnce(PassRegistry
 &Registry) {
  false, false)static void *initializeInterleavedLoadCombinePassOnce(PassRegistry
 &Registry) {
1347INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
1348INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
1349INITIALIZE_PASS_END(PassInfo *PI = new PassInfo( "Combine interleaved loads into wide loads and shufflevector instructions"
, "interleaved-load-combine", &InterleavedLoadCombine::ID
, PassInfo::NormalCtor_t(callDefaultCtor<InterleavedLoadCombine
>), false, false); Registry.registerPass(*PI, true); return
 PI; } static llvm::once_flag InitializeInterleavedLoadCombinePassFlag
; void llvm::initializeInterleavedLoadCombinePass(PassRegistry
 &Registry) { llvm::call_once(InitializeInterleavedLoadCombinePassFlag
, initializeInterleavedLoadCombinePassOnce, std::ref(Registry
)); }
  InterleavedLoadCombine, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Combine interleaved loads into wide loads and shufflevector instructions"
, "interleaved-load-combine", &InterleavedLoadCombine::ID
, PassInfo::NormalCtor_t(callDefaultCtor<InterleavedLoadCombine
>), false, false); Registry.registerPass(*PI, true); return
 PI; } static llvm::once_flag InitializeInterleavedLoadCombinePassFlag
; void llvm::initializeInterleavedLoadCombinePass(PassRegistry
 &Registry) { llvm::call_once(InitializeInterleavedLoadCombinePassFlag
, initializeInterleavedLoadCombinePassOnce, std::ref(Registry
)); }
  "Combine interleaved loads into wide loads and shufflevector instructions",PassInfo *PI = new PassInfo( "Combine interleaved loads into wide loads and shufflevector instructions"
, "interleaved-load-combine", &InterleavedLoadCombine::ID
, PassInfo::NormalCtor_t(callDefaultCtor<InterleavedLoadCombine
>), false, false); Registry.registerPass(*PI, true); return
 PI; } static llvm::once_flag InitializeInterleavedLoadCombinePassFlag
; void llvm::initializeInterleavedLoadCombinePass(PassRegistry
 &Registry) { llvm::call_once(InitializeInterleavedLoadCombinePassFlag
, initializeInterleavedLoadCombinePassOnce, std::ref(Registry
)); }
  false, false)PassInfo *PI = new PassInfo( "Combine interleaved loads into wide loads and shufflevector instructions"
, "interleaved-load-combine", &InterleavedLoadCombine::ID
, PassInfo::NormalCtor_t(callDefaultCtor<InterleavedLoadCombine
>), false, false); Registry.registerPass(*PI, true); return
 PI; } static llvm::once_flag InitializeInterleavedLoadCombinePassFlag
; void llvm::initializeInterleavedLoadCombinePass(PassRegistry
 &Registry) { llvm::call_once(InitializeInterleavedLoadCombinePassFlag
, initializeInterleavedLoadCombinePassOnce, std::ref(Registry
)); }

1354FunctionPass *
1355llvm::createInterleavedLoadCombinePass() {
auto P = new InterleavedLoadCombine();
return P;
1358}

←

/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h

1//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(),
10// and dyn_cast_or_null<X>() templates.
11//
12//===----------------------------------------------------------------------===//
13 
14#ifndef LLVM_SUPPORT_CASTING_H
15#define LLVM_SUPPORT_CASTING_H
16 
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/type_traits.h"
19#include <cassert>
20#include <memory>
21#include <type_traits>
22 
23namespace llvm {
24 
25//===----------------------------------------------------------------------===//
26//                          isa<x> Support Templates
27//===----------------------------------------------------------------------===//
28 
29// Define a template that can be specialized by smart pointers to reflect the
30// fact that they are automatically dereferenced, and are not involved with the
31// template selection process...  the default implementation is a noop.
32//
33template<typename From> struct simplify_type {
34  using SimpleType = From; // The real type this represents...
35 
36  // An accessor to get the real value...
37  static SimpleType &getSimplifiedValue(From &Val) { return Val; }
38};
39 
40template<typename From> struct simplify_type<const From> {
41  using NonConstSimpleType = typename simplify_type<From>::SimpleType;
42  using SimpleType =
43      typename add_const_past_pointer<NonConstSimpleType>::type;
44  using RetType =
45      typename add_lvalue_reference_if_not_pointer<SimpleType>::type;
46 
47  static RetType getSimplifiedValue(const From& Val) {
48    return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val));
49  }
50};
51 
52// The core of the implementation of isa<X> is here; To and From should be
53// the names of classes.  This template can be specialized to customize the
54// implementation of isa<> without rewriting it from scratch.
55template <typename To, typename From, typename Enabler = void>
56struct isa_impl {
57  static inline bool doit(const From &Val) {
58    return To::classof(&Val);
59  }
60};
61 
62/// Always allow upcasts, and perform no dynamic check for them.
63template <typename To, typename From>
64struct isa_impl<
65    To, From, typename std::enable_if<std::is_base_of<To, From>::value>::type> {
66  static inline bool doit(const From &) { return true; }
67};
68 
69template <typename To, typename From> struct isa_impl_cl {
70  static inline bool doit(const From &Val) {
71    return isa_impl<To, From>::doit(Val);
72  }
73};
74 
75template <typename To, typename From> struct isa_impl_cl<To, const From> {
76  static inline bool doit(const From &Val) {
77    return isa_impl<To, From>::doit(Val);
78  }
79};
80 
81template <typename To, typename From>
82struct isa_impl_cl<To, const std::unique_ptr<From>> {
83  static inline bool doit(const std::unique_ptr<From> &Val) {
84    assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 84, __PRETTY_FUNCTION__));
85    return isa_impl_cl<To, From>::doit(*Val);
86  }
87};
88 
89template <typename To, typename From> struct isa_impl_cl<To, From*> {
90  static inline bool doit(const From *Val) {
91    assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 91, __PRETTY_FUNCTION__));
92    return isa_impl<To, From>::doit(*Val);
93  }
94};
95 
96template <typename To, typename From> struct isa_impl_cl<To, From*const> {
97  static inline bool doit(const From *Val) {
98    assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 98, __PRETTY_FUNCTION__));
99    return isa_impl<To, From>::doit(*Val);
100  }
101};
102 
103template <typename To, typename From> struct isa_impl_cl<To, const From*> {
104  static inline bool doit(const From *Val) {
105    assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 105, __PRETTY_FUNCTION__));
106    return isa_impl<To, From>::doit(*Val);
107  }
108};
109 
110template <typename To, typename From> struct isa_impl_cl<To, const From*const> {
111  static inline bool doit(const From *Val) {
112    assert(Val && "isa<> used on a null pointer")((Val && "isa<> used on a null pointer") ? static_cast
<void> (0) : __assert_fail ("Val && \"isa<> used on a null pointer\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 112, __PRETTY_FUNCTION__));
113    return isa_impl<To, From>::doit(*Val);
114  }
115};
116 
117template<typename To, typename From, typename SimpleFrom>
118struct isa_impl_wrap {
119  // When From != SimplifiedType, we can simplify the type some more by using
120  // the simplify_type template.
121  static bool doit(const From &Val) {
122    return isa_impl_wrap<To, SimpleFrom,
123      typename simplify_type<SimpleFrom>::SimpleType>::doit(
124                          simplify_type<const From>::getSimplifiedValue(Val));
125  }
126};
127 
128template<typename To, typename FromTy>
129struct isa_impl_wrap<To, FromTy, FromTy> {
130  // When From == SimpleType, we are as simple as we are going to get.
131  static bool doit(const FromTy &Val) {
132    return isa_impl_cl<To,FromTy>::doit(Val);
133  }
134};
135 
136// isa<X> - Return true if the parameter to the template is an instance of the
137// template type argument.  Used like this:
138//
139//  if (isa<Type>(myVal)) { ... }
140//
141template <class X, class Y> LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa(const Y &Val) {
142  return isa_impl_wrap<X, const Y,
143                       typename simplify_type<const Y>::SimpleType>::doit(Val);
144}
145 
146// isa_and_nonnull<X> - Functionally identical to isa, except that a null value
147// is accepted.
148//
149template <class X, class Y>
150LLVM_NODISCARD[[clang::warn_unused_result]] inline bool isa_and_nonnull(const Y &Val) {
151  if (!Val)
152    return false;
153  return isa<X>(Val);
154}
155 
156//===----------------------------------------------------------------------===//
157//                          cast<x> Support Templates
158//===----------------------------------------------------------------------===//
159 
160template<class To, class From> struct cast_retty;
161 
162// Calculate what type the 'cast' function should return, based on a requested
163// type of To and a source type of From.
164template<class To, class From> struct cast_retty_impl {
165  using ret_type = To &;       // Normal case, return Ty&
166};
167template<class To, class From> struct cast_retty_impl<To, const From> {
168  using ret_type = const To &; // Normal case, return Ty&
169};
170 
171template<class To, class From> struct cast_retty_impl<To, From*> {
172  using ret_type = To *;       // Pointer arg case, return Ty*
173};
174 
175template<class To, class From> struct cast_retty_impl<To, const From*> {
176  using ret_type = const To *; // Constant pointer arg case, return const Ty*
177};
178 
179template<class To, class From> struct cast_retty_impl<To, const From*const> {
180  using ret_type = const To *; // Constant pointer arg case, return const Ty*
181};
182 
183template <class To, class From>
184struct cast_retty_impl<To, std::unique_ptr<From>> {
185private:
186  using PointerType = typename cast_retty_impl<To, From *>::ret_type;
187  using ResultType = typename std::remove_pointer<PointerType>::type;
188 
189public:
190  using ret_type = std::unique_ptr<ResultType>;
191};
192 
193template<class To, class From, class SimpleFrom>
194struct cast_retty_wrap {
195  // When the simplified type and the from type are not the same, use the type
196  // simplifier to reduce the type, then reuse cast_retty_impl to get the
197  // resultant type.
198  using ret_type = typename cast_retty<To, SimpleFrom>::ret_type;
199};
200 
201template<class To, class FromTy>
202struct cast_retty_wrap<To, FromTy, FromTy> {
203  // When the simplified type is equal to the from type, use it directly.
204  using ret_type = typename cast_retty_impl<To,FromTy>::ret_type;
205};
206 
207template<class To, class From>
208struct cast_retty {
209  using ret_type = typename cast_retty_wrap<
210      To, From, typename simplify_type<From>::SimpleType>::ret_type;
211};
212 
213// Ensure the non-simple values are converted using the simplify_type template
214// that may be specialized by smart pointers...
215//
216template<class To, class From, class SimpleFrom> struct cast_convert_val {
217  // This is not a simple type, use the template to simplify it...
218  static typename cast_retty<To, From>::ret_type doit(From &Val) {
219    return cast_convert_val<To, SimpleFrom,
220      typename simplify_type<SimpleFrom>::SimpleType>::doit(
221                          simplify_type<From>::getSimplifiedValue(Val));
222  }
223};
224 
225template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> {
226  // This _is_ a simple type, just cast it.
227  static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) {
228    typename cast_retty<To, FromTy>::ret_type Res2
229     = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val);
230    return Res2;
231  }
232};
233 
234template <class X> struct is_simple_type {
235  static const bool value =
236      std::is_same<X, typename simplify_type<X>::SimpleType>::value;
237};
238 
239// cast<X> - Return the argument parameter cast to the specified type.  This
240// casting operator asserts that the type is correct, so it does not return null
241// on failure.  It does not allow a null argument (use cast_or_null for that).
242// It is typically used like this:
243//
244//  cast<Instruction>(myVal)->getParent()
245//
246template <class X, class Y>
247inline typename std::enable_if<!is_simple_type<Y>::value,
248                               typename cast_retty<X, const Y>::ret_type>::type
249cast(const Y &Val) {
250  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 250, __PRETTY_FUNCTION__));
251  return cast_convert_val<
252      X, const Y, typename simplify_type<const Y>::SimpleType>::doit(Val);
253}
254 
255template <class X, class Y>
256inline typename cast_retty<X, Y>::ret_type cast(Y &Val) {
257  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 257, __PRETTY_FUNCTION__));
258  return cast_convert_val<X, Y,
259                          typename simplify_type<Y>::SimpleType>::doit(Val);
260}
261 
262template <class X, class Y>
263inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
264  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 264, __PRETTY_FUNCTION__));
265  return cast_convert_val<X, Y*,
266                          typename simplify_type<Y*>::SimpleType>::doit(Val);
267}
268 
269template <class X, class Y>
270inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type
271cast(std::unique_ptr<Y> &&Val) {
272  assert(isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!")((isa<X>(Val.get()) && "cast<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val.get()) && \"cast<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 272, __PRETTY_FUNCTION__));
273  using ret_type = typename cast_retty<X, std::unique_ptr<Y>>::ret_type;
274  return ret_type(
275      cast_convert_val<X, Y *, typename simplify_type<Y *>::SimpleType>::doit(
276          Val.release()));
277}
278 
279// cast_or_null<X> - Functionally identical to cast, except that a null value is
280// accepted.
281//
282template <class X, class Y>
283LLVM_NODISCARD[[clang::warn_unused_result]] inline
284    typename std::enable_if<!is_simple_type<Y>::value,
285                            typename cast_retty<X, const Y>::ret_type>::type
286    cast_or_null(const Y &Val) {
287  if (!Val)
288    return nullptr;
289  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 289, __PRETTY_FUNCTION__));
290  return cast<X>(Val);
291}
292 
293template <class X, class Y>
294LLVM_NODISCARD[[clang::warn_unused_result]] inline
295    typename std::enable_if<!is_simple_type<Y>::value,
296                            typename cast_retty<X, Y>::ret_type>::type
297    cast_or_null(Y &Val) {
298  if (!Val)
299    return nullptr;
300  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 300, __PRETTY_FUNCTION__));
301  return cast<X>(Val);
302}
303 
304template <class X, class Y>
305LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type
306cast_or_null(Y *Val) {
307  if (!Val) return nullptr;
308  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!")((isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!"
) ? static_cast<void> (0) : __assert_fail ("isa<X>(Val) && \"cast_or_null<Ty>() argument of incompatible type!\""
, "/build/llvm-toolchain-snapshot-9~svn359426/include/llvm/Support/Casting.h"
, 308, __PRETTY_FUNCTION__));
309  return cast<X>(Val);
310}
311 
312template <class X, class Y>
313inline typename cast_retty<X, std::unique_ptr<Y>>::ret_type
314cast_or_null(std::unique_ptr<Y> &&Val) {
315  if (!Val)
316    return nullptr;
317  return cast<X>(std::move(Val));
318}
319 
320// dyn_cast<X> - Return the argument parameter cast to the specified type.  This
321// casting operator returns null if the argument is of the wrong type, so it can
322// be used to test for a type as well as cast if successful.  This should be
323// used in the context of an if statement like this:
324//
325//  if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... }
326//
327 
328template <class X, class Y>
329LLVM_NODISCARD[[clang::warn_unused_result]] inline
330    typename std::enable_if<!is_simple_type<Y>::value,
331                            typename cast_retty<X, const Y>::ret_type>::type
332    dyn_cast(const Y &Val) {
333  return isa<X>(Val) ? cast<X>(Val) : nullptr;
334}
335 
336template <class X, class Y>
337LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) {
338  return isa<X>(Val) ? cast<X>(Val) : nullptr;
339}
340 
341template <class X, class Y>
342LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) {
343  return isa<X>(Val) ? cast<X>(Val) : nullptr;
15
←
Assuming the condition is true→
16
←
'?' condition is true→
17
←
Returning pointer→
344}
345 
346// dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null
347// value is accepted.
348//
349template <class X, class Y>
350LLVM_NODISCARD[[clang::warn_unused_result]] inline
351    typename std::enable_if<!is_simple_type<Y>::value,
352                            typename cast_retty<X, const Y>::ret_type>::type
353    dyn_cast_or_null(const Y &Val) {
354  return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
355}
356 
357template <class X, class Y>
358LLVM_NODISCARD[[clang::warn_unused_result]] inline
359    typename std::enable_if<!is_simple_type<Y>::value,
360                            typename cast_retty<X, Y>::ret_type>::type
361    dyn_cast_or_null(Y &Val) {
362  return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
363}
364 
365template <class X, class Y>
366LLVM_NODISCARD[[clang::warn_unused_result]] inline typename cast_retty<X, Y *>::ret_type
367dyn_cast_or_null(Y *Val) {
368  return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
369}
370 
371// unique_dyn_cast<X> - Given a unique_ptr<Y>, try to return a unique_ptr<X>,
372// taking ownership of the input pointer iff isa<X>(Val) is true.  If the
373// cast is successful, From refers to nullptr on exit and the casted value
374// is returned.  If the cast is unsuccessful, the function returns nullptr
375// and From is unchanged.
376template <class X, class Y>
377LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &Val)
378    -> decltype(cast<X>(Val)) {
379  if (!isa<X>(Val))
380    return nullptr;
381  return cast<X>(std::move(Val));
382}
383 
384template <class X, class Y>
385LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast(std::unique_ptr<Y> &&Val)
386    -> decltype(cast<X>(Val)) {
387  return unique_dyn_cast<X, Y>(Val);
388}
389 
390// dyn_cast_or_null<X> - Functionally identical to unique_dyn_cast, except that
391// a null value is accepted.
392template <class X, class Y>
393LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &Val)
394    -> decltype(cast<X>(Val)) {
395  if (!Val)
396    return nullptr;
397  return unique_dyn_cast<X, Y>(Val);
398}
399 
400template <class X, class Y>
401LLVM_NODISCARD[[clang::warn_unused_result]] inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &&Val)
402    -> decltype(cast<X>(Val)) {
403  return unique_dyn_cast_or_null<X, Y>(Val);
404}
405 
406} // end namespace llvm
407 
408#endif // LLVM_SUPPORT_CASTING_H