LLVM 17.0.0git
Go to the documentation of this file.
1//===- SLPVectorizer.h ------------------------------------------*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
9// stores that can be put together into vector-stores. Next, it attempts to
10// construct vectorizable tree using the use-def chains. If a profitable tree
11// was found, the SLP vectorizer performs vectorization on the tree.
13// The pass is inspired by the work described in the paper:
14// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/MapVector.h"
23#include "llvm/ADT/SetVector.h"
25#include "llvm/IR/PassManager.h"
27namespace llvm {
29class AAResults;
30class AssumptionCache;
31class BasicBlock;
32class CmpInst;
33class DemandedBits;
34class DominatorTree;
35class Function;
36class GetElementPtrInst;
37class InsertElementInst;
38class InsertValueInst;
39class Instruction;
40class LoopInfo;
41class OptimizationRemarkEmitter;
42class PHINode;
43class ScalarEvolution;
44class StoreInst;
45class TargetLibraryInfo;
46class TargetTransformInfo;
47class Value;
48class WeakTrackingVH;
50/// A private "module" namespace for types and utilities used by this pass.
51/// These are implementation details and should not be used by clients.
52namespace slpvectorizer {
54class BoUpSLP;
56} // end namespace slpvectorizer
58struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
65 ScalarEvolution *SE = nullptr;
68 AAResults *AA = nullptr;
69 LoopInfo *LI = nullptr;
70 DominatorTree *DT = nullptr;
71 AssumptionCache *AC = nullptr;
72 DemandedBits *DB = nullptr;
73 const DataLayout *DL = nullptr;
78 // Glue for old PM.
80 TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_,
85 /// Collect store and getelementptr instructions and organize them
86 /// according to the underlying object of their pointer operands. We sort the
87 /// instructions by their underlying objects to reduce the cost of
88 /// consecutive access queries.
89 ///
90 /// TODO: We can further reduce this cost if we flush the chain creation
91 /// every time we run into a memory barrier.
92 void collectSeedInstructions(BasicBlock *BB);
94 /// Try to vectorize a list of operands.
95 /// \param MaxVFOnly Vectorize only using maximal allowed register size.
96 /// \returns true if a value was vectorized.
97 bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
98 bool MaxVFOnly = false);
100 /// Try to vectorize a chain that may start at the operands of \p I.
101 bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
103 /// Try to vectorize chains that may start at the operands of
104 /// instructions in \p Insts.
105 bool tryToVectorize(ArrayRef<WeakTrackingVH> Insts,
108 /// Vectorize the store instructions collected in Stores.
109 bool vectorizeStoreChains(slpvectorizer::BoUpSLP &R);
111 /// Vectorize the index computations of the getelementptr instructions
112 /// collected in GEPs.
113 bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
115 /// Try to find horizontal reduction or otherwise, collect instructions
116 /// for postponed vectorization attempts.
117 /// \a P if not null designates phi node the reduction is fed into
118 /// (with reduction operators \a Root or one of its operands, in a basic block
119 /// \a BB).
120 /// \returns true if a horizontal reduction was matched and reduced.
121 /// \returns false if \a V is null or not an instruction,
122 /// or a horizontal reduction was not matched or not possible.
123 bool vectorizeHorReduction(PHINode *P, Instruction *Root, BasicBlock *BB,
126 SmallVectorImpl<WeakTrackingVH> &PostponedInsts);
128 /// Make an attempt to vectorize reduction and then try to vectorize
129 /// postponed binary operations.
130 /// \returns true on any successfull vectorization.
131 bool vectorizeRootInstruction(PHINode *P, Instruction *Root, BasicBlock *BB,
135 /// Try to vectorize trees that start at insertvalue instructions.
136 bool vectorizeInsertValueInst(InsertValueInst *IVI, BasicBlock *BB,
139 /// Try to vectorize trees that start at insertelement instructions.
140 bool vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB,
143 /// Tries to vectorize \p CmpInts. \Returns true on success.
144 template <typename ItT>
145 bool vectorizeCmpInsts(iterator_range<ItT> CmpInsts, BasicBlock *BB,
148 /// Tries to vectorize constructs started from InsertValueInst or
149 /// InsertElementInst instructions.
150 bool vectorizeInserts(InstSetVector &Instructions, BasicBlock *BB,
153 /// Scan the basic block and look for patterns that are likely to start
154 /// a vectorization chain.
155 bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
157 bool vectorizeStoreChain(ArrayRef<Value *> Chain, slpvectorizer::BoUpSLP &R,
158 unsigned Idx, unsigned MinVF);
160 bool vectorizeStores(ArrayRef<StoreInst *> Stores, slpvectorizer::BoUpSLP &R);
162 /// The store instructions in a basic block organized by base pointer.
163 StoreListMap Stores;
165 /// The getelementptr instructions in a basic block organized by base pointer.
166 GEPListMap GEPs;
169} // end namespace llvm
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
This header defines various interfaces for pass management in LLVM.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
This instruction inserts a single (scalar) element into a VectorType value.
This instruction inserts a struct field of array element value into an aggregate value.
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
The main scalar evolution driver.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:312
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
A range adaptor for a pair of iterators.
Bottom Up SLP Vectorizer.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:371
ScalarEvolution * SE
Definition: SLPVectorizer.h:65
AssumptionCache * AC
Definition: SLPVectorizer.h:71
DominatorTree * DT
Definition: SLPVectorizer.h:70
TargetLibraryInfo * TLI
Definition: SLPVectorizer.h:67
const DataLayout * DL
Definition: SLPVectorizer.h:73
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_, DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_, OptimizationRemarkEmitter *ORE_)