37 switch (
II->getIntrinsicID()) {
39 case Intrinsic::vector_reduce_fadd:
40 case Intrinsic::vector_reduce_fmul:
41 case Intrinsic::vector_reduce_add:
42 case Intrinsic::vector_reduce_mul:
43 case Intrinsic::vector_reduce_and:
44 case Intrinsic::vector_reduce_or:
45 case Intrinsic::vector_reduce_xor:
46 case Intrinsic::vector_reduce_smax:
47 case Intrinsic::vector_reduce_smin:
48 case Intrinsic::vector_reduce_umax:
49 case Intrinsic::vector_reduce_umin:
50 case Intrinsic::vector_reduce_fmax:
51 case Intrinsic::vector_reduce_fmin:
52 if (
TTI->shouldExpandReduction(
II))
60 for (
auto *
II : Worklist) {
65 TTI->getPreferredExpandedReductionShuffle(
II);
70 Builder.setFastMathFlags(FMF);
73 case Intrinsic::vector_reduce_fadd:
74 case Intrinsic::vector_reduce_fmul: {
77 Value *Acc =
II->getArgOperand(0);
78 Value *Vec =
II->getArgOperand(1);
96 case Intrinsic::vector_reduce_and:
97 case Intrinsic::vector_reduce_or: {
105 Value *Vec =
II->getArgOperand(0);
107 unsigned NumElts = FTy->getNumElements();
111 if (FTy->getElementType() == Builder.getInt1Ty()) {
112 Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
113 if (
ID == Intrinsic::vector_reduce_and) {
114 Rdx = Builder.CreateICmpEQ(
117 assert(
ID == Intrinsic::vector_reduce_or &&
"Expected or reduction.");
118 Rdx = Builder.CreateIsNotNull(Rdx);
126 case Intrinsic::vector_reduce_add:
127 case Intrinsic::vector_reduce_mul:
128 case Intrinsic::vector_reduce_xor:
129 case Intrinsic::vector_reduce_smax:
130 case Intrinsic::vector_reduce_smin:
131 case Intrinsic::vector_reduce_umax:
132 case Intrinsic::vector_reduce_umin: {
133 Value *Vec =
II->getArgOperand(0);
147 case Intrinsic::vector_reduce_fmax:
148 case Intrinsic::vector_reduce_fmin: {
151 Value *Vec =
II->getArgOperand(0);
161 II->replaceAllUsesWith(Rdx);
162 II->eraseFromParent();
171 ExpandReductions() : FunctionPass(ID) {}
174 const auto *
TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
175 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
176 auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
177 auto *DT = DTWP ? &DTWP->getDomTree() :
nullptr;
178 auto *LI = LIWP ? &LIWP->getLoopInfo() :
nullptr;
179 return expandReductions(
F,
TTI, DT, LI);
182 void getAnalysisUsage(AnalysisUsage &AU)
const override {
190char ExpandReductions::ID;
192 "Expand reduction intrinsics",
false,
false)
198 return new ExpandReductions();
206 if (!expandReductions(
F, &
TTI, DT, LI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
static bool runOnFunction(Function &F, bool PostInlining)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowReassoc() const
Flag queries.
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Analysis pass that exposes the LoopInfo for a function.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Value * getReductionIdentity(Intrinsic::ID RdxID, Type *Ty, FastMathFlags FMF)
Given information about an @llvm.vector.reduce.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, TargetTransformInfo::ReductionShuffle RS, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
RecurKind
These are the kinds of recurrences that we support.
LLVM_ABI FunctionPass * createExpandReductionsPass()
This pass expands the reduction intrinsics into sequences of shuffles.
LLVM_ABI Value * expandReductionViaLoop(IRBuilderBase &Builder, Value *Vec, unsigned RdxOpcode, Value *Acc, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr)
Expand a scalable vector reduction into a runtime loop that applies RdxOpcode element by element,...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID)
Returns the recurence kind used when expanding a min/max reduction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.