65#include "llvm/IR/IntrinsicsARM.h"
77#define DEBUG_TYPE "mve-laneinterleave"
81 cl::desc(
"Enable interleave MVE vector operation lowering"));
106char MVELaneInterleaving::ID = 0;
112 return new MVELaneInterleaving();
129 for (
auto *E : Exts) {
130 if (isa<FPExtInst>(E) || !isa<LoadInst>(E->getOperand(0))) {
135 for (
auto *
T : Truncs) {
136 if (
T->hasOneUse() && !isa<StoreInst>(*
T->user_begin())) {
144 for (
auto *E : Exts) {
145 if (!E->hasOneUse() ||
146 cast<Instruction>(*E->user_begin())->getOpcode() != Instruction::Mul) {
158 if (!isa<Instruction>(Start->getOperand(0)))
162 std::vector<Instruction *> Worklist;
163 Worklist.push_back(Start);
164 Worklist.push_back(cast<Instruction>(Start->getOperand(0)));
172 while (!Worklist.empty()) {
176 switch (
I->getOpcode()) {
178 case Instruction::Trunc:
179 case Instruction::FPTrunc:
186 case Instruction::SExt:
187 case Instruction::ZExt:
188 case Instruction::FPExt:
191 for (
auto *
Use :
I->users())
192 Worklist.push_back(cast<Instruction>(
Use));
196 case Instruction::Call: {
201 if (
II->getIntrinsicID() == Intrinsic::vector_reduce_add) {
208 switch (
II->getIntrinsicID()) {
210 case Intrinsic::smin:
211 case Intrinsic::smax:
212 case Intrinsic::umin:
213 case Intrinsic::umax:
214 case Intrinsic::sadd_sat:
215 case Intrinsic::ssub_sat:
216 case Intrinsic::uadd_sat:
217 case Intrinsic::usub_sat:
218 case Intrinsic::minnum:
219 case Intrinsic::maxnum:
220 case Intrinsic::fabs:
222 case Intrinsic::ceil:
223 case Intrinsic::floor:
224 case Intrinsic::rint:
225 case Intrinsic::round:
226 case Intrinsic::trunc:
234 case Instruction::Add:
235 case Instruction::Sub:
236 case Instruction::Mul:
237 case Instruction::AShr:
238 case Instruction::LShr:
239 case Instruction::Shl:
240 case Instruction::ICmp:
241 case Instruction::FCmp:
242 case Instruction::FAdd:
243 case Instruction::FMul:
244 case Instruction::Select:
248 for (
Use &
Op :
I->operands()) {
249 if (!isa<FixedVectorType>(
Op->getType()))
251 if (isa<Instruction>(
Op))
252 Worklist.push_back(cast<Instruction>(&
Op));
257 for (
auto *
Use :
I->users())
258 Worklist.push_back(cast<Instruction>(
Use));
261 case Instruction::ShuffleVector:
263 if (cast<ShuffleVectorInst>(
I)->isZeroEltSplat())
277 dbgs() <<
"Found group:\n Exts:\n";
279 dbgs() <<
" " << *
I <<
"\n";
282 dbgs() <<
" " << *
I <<
"\n";
283 dbgs() <<
" OtherLeafs:\n";
284 for (
auto *
I : OtherLeafs)
285 dbgs() <<
" " << *
I->get() <<
" of " << *
I->getUser() <<
"\n";
286 dbgs() <<
" Truncs:\n";
287 for (
auto *
I : Truncs)
288 dbgs() <<
" " << *
I <<
"\n";
289 dbgs() <<
" Reducts:\n";
290 for (
auto *
I : Reducts)
291 dbgs() <<
" " << *
I <<
"\n";
295 "Expected some truncs or reductions");
299 auto *VT = !Truncs.
empty()
300 ? cast<FixedVectorType>(Truncs[0]->
getType())
301 : cast<FixedVectorType>(Exts[0]->getOperand(0)->
getType());
305 unsigned NumElts = VT->getNumElements();
306 unsigned BaseElts = VT->getScalarSizeInBits() == 16
308 : (VT->getScalarSizeInBits() == 8 ? 16 : 0);
309 if (BaseElts == 0 || NumElts % BaseElts != 0) {
313 if (Start->getOperand(0)->getType()->getScalarSizeInBits() !=
314 VT->getScalarSizeInBits() * 2) {
319 if (
I->getOperand(0)->getType() != VT) {
324 if (
I->getType() != VT) {
333 return I->getOpcode() == Instruction::Mul ||
334 I->getOpcode() == Instruction::Select ||
335 I->getOpcode() == Instruction::ICmp;
348 for (
unsigned Base = 0;
Base < NumElts;
Base += BaseElts) {
349 for (
unsigned i = 0; i < BaseElts / 2; i++)
351 for (
unsigned i = 0; i < BaseElts / 2; i++)
354 for (
unsigned Base = 0;
Base < NumElts;
Base += BaseElts) {
355 for (
unsigned i = 0; i < BaseElts / 2; i++) {
365 bool FPext = isa<FPExtInst>(
I);
366 bool Sext = isa<SExtInst>(
I);
370 I->replaceAllUsesWith(Ext);
374 for (
Use *
I : OtherLeafs) {
378 I->getUser()->setOperand(
I->getOperandNo(), Shuffle);
387 I->replaceAllUsesWith(Shuf);
388 cast<Instruction>(Shuf)->setOperand(0,
I);
399 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I))
400 return II->getIntrinsicID() == Intrinsic::vector_reduce_add;
404bool MVELaneInterleaving::runOnFunction(
Function &
F) {
407 auto &TPC = getAnalysis<TargetPassConfig>();
410 if (!
ST->hasMVEIntegerOps())
413 bool Changed =
false;
417 if (((
I.getType()->isVectorTy() &&
418 (isa<TruncInst>(
I) || isa<FPTruncInst>(
I))) ||
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isProfitableToInterleave(SmallSetVector< Instruction *, 4 > &Exts, SmallSetVector< Instruction *, 4 > &Truncs)
static bool tryInterleave(Instruction *Start, SmallPtrSetImpl< Instruction * > &Visited)
cl::opt< bool > EnableInterleave("enable-mve-interleave", cl::Hidden, cl::init(true), cl::desc("Enable interleave MVE vector operation lowering"))
static bool isAddReduction(Instruction &I)
uint64_t IntrinsicInst * II
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A wrapper class for inspecting calls to intrinsic functions.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Pass * createMVELaneInterleavingPass()
void initializeMVELaneInterleavingPass(PassRegistry &)
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.