31 case Intrinsic::vector_reduce_fadd:
32 return Instruction::FAdd;
33 case Intrinsic::vector_reduce_fmul:
34 return Instruction::FMul;
35 case Intrinsic::vector_reduce_add:
36 return Instruction::Add;
37 case Intrinsic::vector_reduce_mul:
38 return Instruction::Mul;
39 case Intrinsic::vector_reduce_and:
40 return Instruction::And;
41 case Intrinsic::vector_reduce_or:
42 return Instruction::Or;
43 case Intrinsic::vector_reduce_xor:
44 return Instruction::Xor;
45 case Intrinsic::vector_reduce_smax:
46 case Intrinsic::vector_reduce_smin:
47 case Intrinsic::vector_reduce_umax:
48 case Intrinsic::vector_reduce_umin:
49 return Instruction::ICmp;
50 case Intrinsic::vector_reduce_fmax:
51 case Intrinsic::vector_reduce_fmin:
52 return Instruction::FCmp;
60 case Intrinsic::vector_reduce_smax:
61 return RecurKind::SMax;
62 case Intrinsic::vector_reduce_smin:
63 return RecurKind::SMin;
64 case Intrinsic::vector_reduce_umax:
65 return RecurKind::UMax;
66 case Intrinsic::vector_reduce_umin:
67 return RecurKind::UMin;
68 case Intrinsic::vector_reduce_fmax:
69 return RecurKind::FMax;
70 case Intrinsic::vector_reduce_fmin:
71 return RecurKind::FMin;
73 return RecurKind::None;
81 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
82 switch (II->getIntrinsicID()) {
84 case Intrinsic::vector_reduce_fadd:
85 case Intrinsic::vector_reduce_fmul:
86 case Intrinsic::vector_reduce_add:
87 case Intrinsic::vector_reduce_mul:
88 case Intrinsic::vector_reduce_and:
89 case Intrinsic::vector_reduce_or:
90 case Intrinsic::vector_reduce_xor:
91 case Intrinsic::vector_reduce_smax:
92 case Intrinsic::vector_reduce_smin:
93 case Intrinsic::vector_reduce_umax:
94 case Intrinsic::vector_reduce_umin:
95 case Intrinsic::vector_reduce_fmax:
96 case Intrinsic::vector_reduce_fmin:
105 for (
auto *II : Worklist) {
107 isa<FPMathOperator>(II) ? II->getFastMathFlags() :
FastMathFlags{};
111 Value *Rdx =
nullptr;
114 Builder.setFastMathFlags(FMF);
117 case Intrinsic::vector_reduce_fadd:
118 case Intrinsic::vector_reduce_fmul: {
121 Value *Acc = II->getArgOperand(0);
122 Value *Vec = II->getArgOperand(1);
127 cast<FixedVectorType>(Vec->
getType())->getNumElements()))
132 Acc, Rdx,
"bin.rdx");
136 case Intrinsic::vector_reduce_and:
137 case Intrinsic::vector_reduce_or: {
145 Value *Vec = II->getArgOperand(0);
146 auto *FTy = cast<FixedVectorType>(Vec->
getType());
147 unsigned NumElts = FTy->getNumElements();
151 if (FTy->getElementType() == Builder.getInt1Ty()) {
152 Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
153 if (
ID == Intrinsic::vector_reduce_and) {
154 Rdx = Builder.CreateICmpEQ(
155 Rdx, ConstantInt::getAllOnesValue(Rdx->
getType()));
157 assert(
ID == Intrinsic::vector_reduce_or &&
"Expected or reduction.");
158 Rdx = Builder.CreateIsNotNull(Rdx);
166 case Intrinsic::vector_reduce_add:
167 case Intrinsic::vector_reduce_mul:
168 case Intrinsic::vector_reduce_xor:
169 case Intrinsic::vector_reduce_smax:
170 case Intrinsic::vector_reduce_smin:
171 case Intrinsic::vector_reduce_umax:
172 case Intrinsic::vector_reduce_umin: {
173 Value *Vec = II->getArgOperand(0);
175 cast<FixedVectorType>(Vec->
getType())->getNumElements()))
181 case Intrinsic::vector_reduce_fmax:
182 case Intrinsic::vector_reduce_fmin: {
185 Value *Vec = II->getArgOperand(0);
187 cast<FixedVectorType>(Vec->
getType())->getNumElements()) ||
195 II->replaceAllUsesWith(Rdx);
196 II->eraseFromParent();
210 const auto *
TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
211 return expandReductions(
F,
TTI);
221char ExpandReductions::ID;
223 "Expand reduction intrinsics",
false,
false)
229 return new ExpandReductions();
235 if (!expandReductions(
F, &
TTI))
expand Expand reduction intrinsics
static Expected< BitVector > expand(StringRef S, StringRef Original)
Select target instructions out of generic instructions
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Straight line strength reduction
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Convenience struct for specifying and reasoning about fast-math flags.
bool allowReassoc() const
Flag queries.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
void initializeExpandReductionsPass(PassRegistry &)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Value * getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates a vector reduction using shufflevectors to reduce the value.
RecurKind
These are the kinds of recurrences that we support.
FunctionPass * createExpandReductionsPass()
This pass expands the reduction intrinsics into sequences of shuffles.
Value * getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind=RecurKind::None)
Generates an ordered vector reduction using extracts to reduce the value.