doxygen/NaryReassociate_8cpp_source.html

//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass reassociates n-ary add expressions and eliminates the redundancy

// exposed by the reassociation.

//

// A motivating example:

//

//   void foo(int a, int b) {

//     bar(a + b);

//     bar((a + 2) + b);

//   }

//

// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify

// the above code to

//

//   int t = a + b;

//   bar(t);

//   bar(t + 2);

//

// However, the Reassociate pass is unable to do that because it processes each

// instruction individually and believes (a + 2) + b is the best form according

// to its rank system.

//

// To address this limitation, NaryReassociate reassociates an expression in a

// form that reuses existing instructions. As a result, NaryReassociate can

// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that

// (a + b) is computed before.

//

// NaryReassociate works as follows. For every instruction in the form of (a +

// b) + c, it checks whether a + c or b + c is already computed by a dominating

// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +

// c) + a and removes the redundancy accordingly. To efficiently look up whether

// an expression is computed before, we store each instruction seen and its SCEV

// into an SCEV-to-instruction map.

//

// Although the algorithm pattern-matches only ternary additions, it

// automatically handles many >3-ary expressions by walking through the function

// in the depth-first order. For example, given

//

//   (a + c) + d

//   ((a + b) + c) + d

//

// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites

// ((a + c) + b) + d into ((a + c) + d) + b.

//

// Finally, the above dominator-based algorithm may need to be run multiple

// iterations before emitting optimal code. One source of this need is that we

// only split an operand when it is used only once. The above algorithm can

// eliminate an instruction and decrease the usage count of its operands. As a

// result, an instruction that previously had multiple uses may become a

// single-use instruction and thus eligible for split consideration. For

// example,

//

//   ac = a + c

//   ab = a + b

//   abc = ab + c

//   ab2 = ab + b

//   ab2c = ab2 + c

//

// In the first iteration, we cannot reassociate abc to ac+b because ab is used

// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a

// result, ab2 becomes dead and ab will be used only once in the second

// iteration.

//

// Limitations and TODO items:

//

// 1) We only considers n-ary adds and muls for now. This should be extended

// and generalized.

//

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/NaryReassociate.h"

#include "llvm/ADT/DepthFirstIterator.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/GetElementPtrTypeIterator.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstrTypes.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/Operator.h"

#include "llvm/IR/PatternMatch.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Value.h"

#include "llvm/IR/ValueHandle.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Transforms/Scalar.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include <cassert>

#include <cstdint>


using namespace llvm;

using namespace PatternMatch;


#define DEBUG_TYPE "nary-reassociate"


namespace {


class NaryReassociateLegacyPass : public FunctionPass {

public:

  static char ID;


  NaryReassociateLegacyPass() : FunctionPass(ID) {

    initializeNaryReassociateLegacyPassPass(*PassRegistry::getPassRegistry());

  }


  bool doInitialization(Module &M) override {

    return false;

  }


  bool runOnFunction(Function &F) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addPreserved<DominatorTreeWrapperPass>();

    AU.addPreserved<ScalarEvolutionWrapperPass>();

    AU.addPreserved<TargetLibraryInfoWrapperPass>();

    AU.addRequired<AssumptionCacheTracker>();

    AU.addRequired<DominatorTreeWrapperPass>();

    AU.addRequired<ScalarEvolutionWrapperPass>();

    AU.addRequired<TargetLibraryInfoWrapperPass>();

    AU.addRequired<TargetTransformInfoWrapperPass>();

    AU.setPreservesCFG();

  }


private:

  NaryReassociatePass Impl;

};


} // end anonymous namespace


char NaryReassociateLegacyPass::ID = 0;


INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate",

                      "Nary reassociation", false, false)

INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)

INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)

INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)

INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)

INITIALIZE_PASS_END(NaryReassociateLegacyPass, "nary-reassociate",

                    "Nary reassociation", false, false)


FunctionPass *llvm::createNaryReassociatePass() {

  return new NaryReassociateLegacyPass();

}


bool NaryReassociateLegacyPass::runOnFunction(Function &F) {

  if (skipFunction(F))

    return false;


  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();

  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);

  auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);


  return Impl.runImpl(F, AC, DT, SE, TLI, TTI);

}


PreservedAnalyses NaryReassociatePass::run(Function &F,

                                           FunctionAnalysisManager &AM) {

  auto *AC = &AM.getResult<AssumptionAnalysis>(F);

  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);

  auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);

  auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);

  auto *TTI = &AM.getResult<TargetIRAnalysis>(F);


  if (!runImpl(F, AC, DT, SE, TLI, TTI))

    return PreservedAnalyses::all();


  PreservedAnalyses PA;

  PA.preserveSet<CFGAnalyses>();

  PA.preserve<ScalarEvolutionAnalysis>();

  return PA;

}


bool NaryReassociatePass::runImpl(Function &F, AssumptionCache *AC_,

                                  DominatorTree *DT_, ScalarEvolution *SE_,

                                  TargetLibraryInfo *TLI_,

                                  TargetTransformInfo *TTI_) {

  AC = AC_;

  DT = DT_;

  SE = SE_;

  TLI = TLI_;

  TTI = TTI_;

  DL = &F.getDataLayout();


  bool Changed = false, ChangedInThisIteration;

  do {

    ChangedInThisIteration = doOneIteration(F);

    Changed |= ChangedInThisIteration;

  } while (ChangedInThisIteration);

  return Changed;

}


bool NaryReassociatePass::doOneIteration(Function &F) {

  bool Changed = false;

  SeenExprs.clear();

  // Process the basic blocks in a depth first traversal of the dominator

  // tree. This order ensures that all bases of a candidate are in Candidates

  // when we process it.

  SmallVector<WeakTrackingVH, 16> DeadInsts;

  for (const auto Node : depth_first(DT)) {

    BasicBlock *BB = Node->getBlock();

    for (Instruction &OrigI : *BB) {

      const SCEV *OrigSCEV = nullptr;

      if (Instruction *NewI = tryReassociate(&OrigI, OrigSCEV)) {

        Changed = true;

        OrigI.replaceAllUsesWith(NewI);


        // Add 'OrigI' to the list of dead instructions.

        DeadInsts.push_back(WeakTrackingVH(&OrigI));

        // Add the rewritten instruction to SeenExprs; the original

        // instruction is deleted.

        const SCEV *NewSCEV = SE->getSCEV(NewI);

        SeenExprs[NewSCEV].push_back(WeakTrackingVH(NewI));


        // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)

        // is equivalent to I. However, ScalarEvolution::getSCEV may

        // weaken nsw causing NewSCEV not to equal OldSCEV. For example,

        // suppose we reassociate

        //   I = &a[sext(i +nsw j)] // assuming sizeof(a[0]) = 4

        // to

        //   NewI = &a[sext(i)] + sext(j).

        //

        // ScalarEvolution computes

        //   getSCEV(I)    = a + 4 * sext(i + j)

        //   getSCEV(newI) = a + 4 * sext(i) + 4 * sext(j)

        // which are different SCEVs.

        //

        // To alleviate this issue of ScalarEvolution not always capturing

        // equivalence, we add I to SeenExprs[OldSCEV] as well so that we can

        // map both SCEV before and after tryReassociate(I) to I.

        //

        // This improvement is exercised in @reassociate_gep_nsw in

        // nary-gep.ll.

        if (NewSCEV != OrigSCEV)

          SeenExprs[OrigSCEV].push_back(WeakTrackingVH(NewI));

      } else if (OrigSCEV)

        SeenExprs[OrigSCEV].push_back(WeakTrackingVH(&OrigI));

    }

  }

  // Delete all dead instructions from 'DeadInsts'.

  // Please note ScalarEvolution is updated along the way.

  RecursivelyDeleteTriviallyDeadInstructionsPermissive(

      DeadInsts, TLI, nullptr, [this](Value *V) { SE->forgetValue(V); });


  return Changed;

}


template <typename PredT>

Instruction *

NaryReassociatePass::matchAndReassociateMinOrMax(Instruction *I,

                                                 const SCEV *&OrigSCEV) {

  Value *LHS = nullptr;

  Value *RHS = nullptr;


  auto MinMaxMatcher =

      MaxMin_match<ICmpInst, bind_ty<Value>, bind_ty<Value>, PredT>(

          m_Value(LHS), m_Value(RHS));

  if (match(I, MinMaxMatcher)) {

    OrigSCEV = SE->getSCEV(I);

    if (auto *NewMinMax = dyn_cast_or_null<Instruction>(

            tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS)))

      return NewMinMax;

    if (auto *NewMinMax = dyn_cast_or_null<Instruction>(

            tryReassociateMinOrMax(I, MinMaxMatcher, RHS, LHS)))

      return NewMinMax;

  }

  return nullptr;

}


Instruction *NaryReassociatePass::tryReassociate(Instruction * I,

                                                 const SCEV *&OrigSCEV) {


  if (!SE->isSCEVable(I->getType()))

    return nullptr;


  switch (I->getOpcode()) {

  case Instruction::Add:

  case Instruction::Mul:

    OrigSCEV = SE->getSCEV(I);

    return tryReassociateBinaryOp(cast<BinaryOperator>(I));

  case Instruction::GetElementPtr:

    OrigSCEV = SE->getSCEV(I);

    return tryReassociateGEP(cast<GetElementPtrInst>(I));

  default:

    break;

  }


  // Try to match signed/unsigned Min/Max.

  Instruction *ResI = nullptr;

  // TODO: Currently min/max reassociation is restricted to integer types only

  // due to use of SCEVExpander which my introduce incompatible forms of min/max

  // for pointer types.

  if (I->getType()->isIntegerTy())

    if ((ResI = matchAndReassociateMinOrMax<umin_pred_ty>(I, OrigSCEV)) ||

        (ResI = matchAndReassociateMinOrMax<smin_pred_ty>(I, OrigSCEV)) ||

        (ResI = matchAndReassociateMinOrMax<umax_pred_ty>(I, OrigSCEV)) ||

        (ResI = matchAndReassociateMinOrMax<smax_pred_ty>(I, OrigSCEV)))

      return ResI;


  return nullptr;

}


static bool isGEPFoldable(GetElementPtrInst *GEP,

                          const TargetTransformInfo *TTI) {

  SmallVector<const Value *, 4> Indices(GEP->indices());

  return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),

                         Indices) == TargetTransformInfo::TCC_Free;

}


Instruction *NaryReassociatePass::tryReassociateGEP(GetElementPtrInst *GEP) {

  // Not worth reassociating GEP if it is foldable.

  if (isGEPFoldable(GEP, TTI))

    return nullptr;


  gep_type_iterator GTI = gep_type_begin(*GEP);

  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {

    if (GTI.isSequential()) {

      if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1,

                                                  GTI.getIndexedType())) {

        return NewGEP;

      }

    }

  }

  return nullptr;

}


bool NaryReassociatePass::requiresSignExtension(Value *Index,

                                                GetElementPtrInst *GEP) {

  unsigned IndexSizeInBits =

      DL->getIndexSizeInBits(GEP->getType()->getPointerAddressSpace());

  return cast<IntegerType>(Index->getType())->getBitWidth() < IndexSizeInBits;

}


GetElementPtrInst *

NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,

                                              unsigned I, Type *IndexedType) {

  SimplifyQuery SQ(*DL, DT, AC, GEP);

  Value *IndexToSplit = GEP->getOperand(I + 1);

  if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {

    IndexToSplit = SExt->getOperand(0);

  } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {

    // zext can be treated as sext if the source is non-negative.

    if (isKnownNonNegative(ZExt->getOperand(0), SQ))

      IndexToSplit = ZExt->getOperand(0);

  }


  if (AddOperator *AO = dyn_cast<AddOperator>(IndexToSplit)) {

    // If the I-th index needs sext and the underlying add is not equipped with

    // nsw, we cannot split the add because

    //   sext(LHS + RHS) != sext(LHS) + sext(RHS).

    if (requiresSignExtension(IndexToSplit, GEP) &&

        computeOverflowForSignedAdd(AO, SQ) != OverflowResult::NeverOverflows)

      return nullptr;


    Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);

    // IndexToSplit = LHS + RHS.

    if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))

      return NewGEP;

    // Symmetrically, try IndexToSplit = RHS + LHS.

    if (LHS != RHS) {

      if (auto *NewGEP =

              tryReassociateGEPAtIndex(GEP, I, RHS, LHS, IndexedType))

        return NewGEP;

    }

  }

  return nullptr;

}


GetElementPtrInst *

NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,

                                              unsigned I, Value *LHS,

                                              Value *RHS, Type *IndexedType) {

  // Look for GEP's closest dominator that has the same SCEV as GEP except that

  // the I-th index is replaced with LHS.

  SmallVector<const SCEV *, 4> IndexExprs;

  for (Use &Index : GEP->indices())

    IndexExprs.push_back(SE->getSCEV(Index));

  // Replace the I-th index with LHS.

  IndexExprs[I] = SE->getSCEV(LHS);

  Type *GEPArgType = SE->getEffectiveSCEVType(GEP->getOperand(I)->getType());

  Type *LHSType = SE->getEffectiveSCEVType(LHS->getType());

  size_t LHSSize = DL->getTypeSizeInBits(LHSType).getFixedValue();

  size_t GEPArgSize = DL->getTypeSizeInBits(GEPArgType).getFixedValue();

  if (isKnownNonNegative(LHS, SimplifyQuery(*DL, DT, AC, GEP)) &&

      LHSSize < GEPArgSize) {

    // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to

    // zext if the source operand is proved non-negative. We should do that

    // consistently so that CandidateExpr more likely appears before. See

    // @reassociate_gep_assume for an example of this canonicalization.

    IndexExprs[I] = SE->getZeroExtendExpr(IndexExprs[I], GEPArgType);

  }

  const SCEV *CandidateExpr = SE->getGEPExpr(cast<GEPOperator>(GEP),

                                             IndexExprs);


  Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);

  if (Candidate == nullptr)

    return nullptr;


  IRBuilder<> Builder(GEP);

  // Candidate should have the same pointer type as GEP.

  assert(Candidate->getType() == GEP->getType());


  // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)

  uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);

  Type *ElementType = GEP->getResultElementType();

  uint64_t ElementSize = DL->getTypeAllocSize(ElementType);

  // Another less rare case: because I is not necessarily the last index of the

  // GEP, the size of the type at the I-th index (IndexedSize) is not

  // necessarily divisible by ElementSize. For example,

  //

  // #pragma pack(1)

  // struct S {

  //   int a[3];

  //   int64 b[8];

  // };

  // #pragma pack()

  //

  // sizeof(S) = 100 is indivisible by sizeof(int64) = 8.

  //

  // TODO: bail out on this case for now. We could emit uglygep.

  if (IndexedSize % ElementSize != 0)

    return nullptr;


  // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));

  Type *PtrIdxTy = DL->getIndexType(GEP->getType());

  if (RHS->getType() != PtrIdxTy)

    RHS = Builder.CreateSExtOrTrunc(RHS, PtrIdxTy);

  if (IndexedSize != ElementSize) {

    RHS = Builder.CreateMul(

        RHS, ConstantInt::get(PtrIdxTy, IndexedSize / ElementSize));

  }

  GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(

      Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS));

  NewGEP->setIsInBounds(GEP->isInBounds());

  NewGEP->takeName(GEP);

  return NewGEP;

}


Instruction *NaryReassociatePass::tryReassociateBinaryOp(BinaryOperator *I) {

  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);

  // There is no need to reassociate 0.

  if (SE->getSCEV(I)->isZero())

    return nullptr;

  if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))

    return NewI;

  if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I))

    return NewI;

  return nullptr;

}


Instruction *NaryReassociatePass::tryReassociateBinaryOp(Value *LHS, Value *RHS,

                                                         BinaryOperator *I) {

  Value *A = nullptr, *B = nullptr;

  // To be conservative, we reassociate I only when it is the only user of (A op

  // B).

  if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) {

    // I = (A op B) op RHS

    //   = (A op RHS) op B or (B op RHS) op A

    const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);

    const SCEV *RHSExpr = SE->getSCEV(RHS);

    if (BExpr != RHSExpr) {

      if (auto *NewI =

              tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I))

        return NewI;

    }

    if (AExpr != RHSExpr) {

      if (auto *NewI =

              tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I))

        return NewI;

    }

  }

  return nullptr;

}


Instruction *NaryReassociatePass::tryReassociatedBinaryOp(const SCEV *LHSExpr,

                                                          Value *RHS,

                                                          BinaryOperator *I) {

  // Look for the closest dominator LHS of I that computes LHSExpr, and replace

  // I with LHS op RHS.

  auto *LHS = findClosestMatchingDominator(LHSExpr, I);

  if (LHS == nullptr)

    return nullptr;


  Instruction *NewI = nullptr;

  switch (I->getOpcode()) {

  case Instruction::Add:

    NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I->getIterator());

    break;

  case Instruction::Mul:

    NewI = BinaryOperator::CreateMul(LHS, RHS, "", I->getIterator());

    break;

  default:

    llvm_unreachable("Unexpected instruction.");

  }

  NewI->setDebugLoc(I->getDebugLoc());

  NewI->takeName(I);

  return NewI;

}


bool NaryReassociatePass::matchTernaryOp(BinaryOperator *I, Value *V,

                                         Value *&Op1, Value *&Op2) {

  switch (I->getOpcode()) {

  case Instruction::Add:

    return match(V, m_Add(m_Value(Op1), m_Value(Op2)));

  case Instruction::Mul:

    return match(V, m_Mul(m_Value(Op1), m_Value(Op2)));

  default:

    llvm_unreachable("Unexpected instruction.");

  }

  return false;

}


const SCEV *NaryReassociatePass::getBinarySCEV(BinaryOperator *I,

                                               const SCEV *LHS,

                                               const SCEV *RHS) {

  switch (I->getOpcode()) {

  case Instruction::Add:

    return SE->getAddExpr(LHS, RHS);

  case Instruction::Mul:

    return SE->getMulExpr(LHS, RHS);

  default:

    llvm_unreachable("Unexpected instruction.");

  }

  return nullptr;

}


Instruction *

NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,

                                                  Instruction *Dominatee) {

  auto Pos = SeenExprs.find(CandidateExpr);

  if (Pos == SeenExprs.end())

    return nullptr;


  auto &Candidates = Pos->second;

  // Because we process the basic blocks in pre-order of the dominator tree, a

  // candidate that doesn't dominate the current instruction won't dominate any

  // future instruction either. Therefore, we pop it out of the stack. This

  // optimization makes the algorithm O(n).

  while (!Candidates.empty()) {

    // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's

    // removed during rewriting.

    if (Value *Candidate = Candidates.pop_back_val()) {

      Instruction *CandidateInstruction = cast<Instruction>(Candidate);

      if (!DT->dominates(CandidateInstruction, Dominatee))

        continue;


      // Make sure that the instruction is safe to reuse without introducing

      // poison.

      SmallVector<Instruction *> DropPoisonGeneratingInsts;

      if (!SE->canReuseInstruction(CandidateExpr, CandidateInstruction,

                                   DropPoisonGeneratingInsts))

        continue;


      for (Instruction *I : DropPoisonGeneratingInsts)

        I->dropPoisonGeneratingAnnotations();


      return CandidateInstruction;

    }

  }

  return nullptr;

}


template <typename MaxMinT> static SCEVTypes convertToSCEVype(MaxMinT &MM) {

  if (std::is_same_v<smax_pred_ty, typename MaxMinT::PredType>)

    return scSMaxExpr;

  else if (std::is_same_v<umax_pred_ty, typename MaxMinT::PredType>)

    return scUMaxExpr;

  else if (std::is_same_v<smin_pred_ty, typename MaxMinT::PredType>)

    return scSMinExpr;

  else if (std::is_same_v<umin_pred_ty, typename MaxMinT::PredType>)

    return scUMinExpr;


  llvm_unreachable("Can't convert MinMax pattern to SCEV type");

  return scUnknown;

}


// Parameters:

//  I - instruction matched by MaxMinMatch matcher

//  MaxMinMatch - min/max idiom matcher

//  LHS - first operand of I

//  RHS - second operand of I

template <typename MaxMinT>

Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,

                                                   MaxMinT MaxMinMatch,

                                                   Value *LHS, Value *RHS) {

  Value *A = nullptr, *B = nullptr;

  MaxMinT m_MaxMin(m_Value(A), m_Value(B));


  if (!match(LHS, m_MaxMin))

    return nullptr;


  if (LHS->hasNUsesOrMore(3) ||

      // The optimization is profitable only if LHS can be removed in the end.

      // In other words LHS should be used (directly or indirectly) by I only.

      llvm::any_of(LHS->users(), [&](auto *U) {

        return U != I && !(U->hasOneUser() && *U->users().begin() == I);

      }))

    return nullptr;


  auto tryCombination = [&](Value *A, const SCEV *AExpr, Value *B,

                            const SCEV *BExpr, Value *C,

                            const SCEV *CExpr) -> Value * {

    SmallVector<const SCEV *, 2> Ops1{BExpr, AExpr};

    const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);

    const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);


    Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);


    if (!R1MinMax)

      return nullptr;


    LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax << "\n");


    SmallVector<const SCEV *, 2> Ops2{SE->getUnknown(C),

                                      SE->getUnknown(R1MinMax)};

    const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);


    SCEVExpander Expander(*SE, *DL, "nary-reassociate");

    Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);

    NewMinMax->setName(Twine(I->getName()).concat(".nary"));


    LLVM_DEBUG(dbgs() << "NARY: Deleting:  " << *I << "\n"

                      << "NARY: Inserting: " << *NewMinMax << "\n");

    return NewMinMax;

  };


  const SCEV *AExpr = SE->getSCEV(A);

  const SCEV *BExpr = SE->getSCEV(B);

  const SCEV *RHSExpr = SE->getSCEV(RHS);


  if (BExpr != RHSExpr) {

    // Try (A op RHS) op B

    if (auto *NewMinMax = tryCombination(A, AExpr, RHS, RHSExpr, B, BExpr))

      return NewMinMax;

  }


  if (AExpr != RHSExpr) {

    // Try (RHS op B) op A

    if (auto *NewMinMax = tryCombination(RHS, RHSExpr, B, BExpr, A, AExpr))

      return NewMinMax;

  }


  return nullptr;

}

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

AssumptionCache.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Casting.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

DataLayout.h

DepthFirstIterator.h
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.

DerivedTypes.h

Dominators.h

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition EntryExitInstrumenter.cpp:103

runImpl
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
Definition ExpandFp.cpp:994

GetElementPtrTypeIterator.h

GEP
Hexagon Common GEP
Definition HexagonCommonGEP.cpp:164

IRBuilder.h

BasicBlock.h

Function.h

Instruction.h

Module.h
Module.h This file contains the declarations for the Module class.

Operator.h

Type.h

Value.h

InitializePasses.h

InstrTypes.h

Instructions.h

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

convertToSCEVype
static SCEVTypes convertToSCEVype(MaxMinT &MM)
Definition NaryReassociate.cpp:588

isGEPFoldable
static bool isGEPFoldable(GetElementPtrInst *GEP, const TargetTransformInfo *TTI)
Definition NaryReassociate.cpp:328

NaryReassociate.h

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

Pass.h

PatternMatch.h

ScalarEvolutionExpander.h

ScalarEvolutionExpressions.h

ScalarEvolution.h

Scalar.h

SmallVector.h
This file defines the SmallVector class.

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

TargetLibraryInfo.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

ValueHandle.h

ValueTracking.h

RHS
Value * RHS
Definition X86PartialReduction.cpp:81

LHS
Value * LHS
Definition X86PartialReduction.cpp:80

Node
Definition ItaniumDemangle.h:166

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition PassManager.h:411

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition AssumptionCache.h:180

llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition AssumptionCache.h:211

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition AssumptionCache.h:44

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283

llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:64

llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition Instructions.h:950

llvm::GetElementPtrInst::setIsInBounds
LLVM_ABI void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition Instructions.cpp:1631

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition Instruction.h:510

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::NaryReassociatePass
Definition NaryReassociate.h:102

llvm::NaryReassociatePass::runImpl
bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, ScalarEvolution *SE_, TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_)
Definition NaryReassociate.cpp:199

llvm::NaryReassociatePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition NaryReassociate.cpp:182

llvm::PassRegistry::getPassRegistry
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition PassRegistry.cpp:23

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::PreservedAnalyses::preserve
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132

llvm::SCEV
This class represents an analyzed expression in the program.
Definition ScalarEvolution.h:72

llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition ScalarEvolution.h:2379

llvm::ScalarEvolutionWrapperPass
Definition ScalarEvolution.h:2411

llvm::ScalarEvolution
The main scalar evolution driver.
Definition ScalarEvolution.h:457

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:417

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1203

llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition TargetTransformInfo.h:2018

llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition TargetLibraryInfo.h:610

llvm::TargetLibraryInfoWrapperPass
Definition TargetLibraryInfo.h:635

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:266

llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition TargetTransformInfo.h:2075

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:277

llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition TargetTransformInfo.h:358

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::setName
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390

llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439

llvm::Value::users
iterator_range< user_iterator > users()
Definition Value.h:426

llvm::Value::hasNUsesOrMore
LLVM_ABI bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition Value.cpp:158

llvm::Value::takeName
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396

llvm::WeakTrackingVH
Value handle that is nullable, but tries to track the Value.
Definition ValueHandle.h:205

llvm::generic_gep_type_iterator::isSequential
bool isSequential() const
Definition GetElementPtrTypeIterator.h:147

llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition GetElementPtrTypeIterator.h:102

Changed
Changed
Definition ObjCARCOpts.cpp:2369

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:244

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::PatternMatch
Definition PatternMatch.h:47

llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition PatternMatch.h:1208

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:49

llvm::PatternMatch::m_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
Definition PatternMatch.h:1274

llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition PatternMatch.h:105

llvm::dwarf::Index
Index
Definition Dwarf.h:903

llvm::dxil::ElementType
ElementType
The element type of an SRV or UAV resource.
Definition DXILABI.h:60

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::createNaryReassociatePass
LLVM_ABI FunctionPass * createNaryReassociatePass()
Definition NaryReassociate.cpp:165

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::gep_type_iterator
generic_gep_type_iterator<> gep_type_iterator
Definition GetElementPtrTypeIterator.h:171

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1129

llvm::TTI
TargetTransformInfo TTI
Definition TargetTransformInfo.h:272

llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

llvm::computeOverflowForSignedAdd
LLVM_ABI OverflowResult computeOverflowForSignedAdd(const WithCache< const Value * > &LHS, const WithCache< const Value * > &RHS, const SimplifyQuery &SQ)
Definition ValueTracking.cpp:7944

llvm::initializeNaryReassociateLegacyPassPass
LLVM_ABI void initializeNaryReassociateLegacyPassPass(PassRegistry &)

llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition Local.cpp:548

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition GetElementPtrTypeIterator.h:173

llvm::SCEVTypes
SCEVTypes
Definition ScalarEvolutionExpressions.h:38

llvm::scUMinExpr
@ scUMinExpr
Definition ScalarEvolutionExpressions.h:52

llvm::scSMaxExpr
@ scSMaxExpr
Definition ScalarEvolutionExpressions.h:51

llvm::scUnknown
@ scUnknown
Definition ScalarEvolutionExpressions.h:56

llvm::scSMinExpr
@ scSMinExpr
Definition ScalarEvolutionExpressions.h:53

llvm::scUMaxExpr
@ scUMaxExpr
Definition ScalarEvolutionExpressions.h:50

llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition DepthFirstIterator.h:233

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:563

llvm::isKnownNonNegative
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
Definition ValueTracking.cpp:281