77#define DEBUG_TYPE "interleaved-access"
80 "lower-interleaved-accesses",
81 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
86class InterleavedAccessImpl {
87 friend class InterleavedAccess;
90 InterleavedAccessImpl() =
default;
92 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
100 unsigned MaxFactor = 0
u;
103 bool lowerInterleavedLoad(
LoadInst *LI,
107 bool lowerInterleavedStore(
StoreInst *SI,
138 InterleavedAccessImpl Impl;
163 InterleavedAccessImpl Impl(DT, TLI);
164 bool Changed = Impl.runOnFunction(
F);
174char InterleavedAccess::ID = 0;
176bool InterleavedAccess::runOnFunction(
Function &
F) {
177 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
181 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
183 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
185 Impl.TLI =
TM.getSubtargetImpl(
F)->getTargetLowering();
186 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
188 return Impl.runOnFunction(
F);
192 "Lower interleaved memory accesses to target specific intrinsics",
false,
200 return new InterleavedAccess();
214 for (; i < Mask.size(); i++)
215 if (Mask[i] >= 0 &&
static_cast<unsigned>(Mask[i]) !=
Index + i * Factor)
218 if (i == Mask.size())
231 unsigned &
Index,
unsigned MaxFactor,
232 unsigned NumLoadElements) {
237 for (Factor = 2; Factor <= MaxFactor; Factor++) {
239 if (Mask.size() * Factor > NumLoadElements)
260 unsigned MaxFactor) {
266 for (Factor = 2; Factor <= MaxFactor; Factor++) {
274bool InterleavedAccessImpl::lowerInterleavedLoad(
291 auto *Extract = dyn_cast<ExtractElementInst>(
User);
292 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
296 if (
auto *BI = dyn_cast<BinaryOperator>(
User)) {
297 if (!BI->user_empty() &&
all_of(BI->users(), [](
auto *U) {
298 auto *SVI = dyn_cast<ShuffleVectorInst>(U);
299 return SVI && isa<UndefValue>(SVI->getOperand(1));
301 for (
auto *SVI : BI->users())
302 BinOpShuffles.
insert(cast<ShuffleVectorInst>(SVI));
306 auto *SVI = dyn_cast<ShuffleVectorInst>(
User);
307 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
313 if (Shuffles.
empty() && BinOpShuffles.
empty())
316 unsigned Factor,
Index;
318 unsigned NumLoadElements =
319 cast<FixedVectorType>(LI->
getType())->getNumElements();
320 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
329 Type *VecTy = FirstSVI->getType();
333 for (
auto *Shuffle : Shuffles) {
334 if (Shuffle->getType() != VecTy)
340 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
343 for (
auto *Shuffle : BinOpShuffles) {
344 if (Shuffle->getType() != VecTy)
350 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
352 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
354 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
360 if (!tryReplaceExtracts(Extracts, Shuffles))
363 bool BinOpShuffleChanged =
364 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
366 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
369 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
371 return !Extracts.
empty() || BinOpShuffleChanged;
380bool InterleavedAccessImpl::replaceBinOpShuffles(
383 for (
auto *SVI : BinOpShuffles) {
388 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
394 Mask, SVI->
getName(), insertPos);
400 SVI->replaceAllUsesWith(NewBI);
402 <<
"\n With : " << *NewSVI1 <<
"\n And : "
403 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
405 if (NewSVI1->getOperand(0) == LI)
407 if (NewSVI2->getOperand(0) == LI)
411 return !BinOpShuffles.empty();
414bool InterleavedAccessImpl::tryReplaceExtracts(
419 if (Extracts.
empty())
426 for (
auto *Extract : Extracts) {
428 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
429 auto Index = IndexOperand->getSExtValue();
434 for (
auto *Shuffle : Shuffles) {
437 if (!DT->dominates(Shuffle, Extract))
444 Shuffle->getShuffleMask(Indices);
445 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
446 if (Indices[
I] ==
Index) {
447 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
448 "Vector operations do not match");
449 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
454 if (ReplacementMap.
count(Extract))
460 if (!ReplacementMap.
count(Extract))
466 for (
auto &Replacement : ReplacementMap) {
467 auto *Extract = Replacement.first;
468 auto *
Vector = Replacement.second.first;
469 auto Index = Replacement.second.second;
470 Builder.SetInsertPoint(Extract);
471 Extract->replaceAllUsesWith(Builder.CreateExtractElement(
Vector,
Index));
472 Extract->eraseFromParent();
478bool InterleavedAccessImpl::lowerInterleavedStore(
483 auto *SVI = dyn_cast<ShuffleVectorInst>(
SI->getValueOperand());
484 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
492 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *SI <<
"\n");
495 if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
504bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
511 LLVM_DEBUG(
dbgs() <<
"IA: Found a deinterleave intrinsic: " << *DI <<
"\n");
514 if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
523bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
530 if (!SI || !
SI->isSimple())
533 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleave intrinsic: " << *II <<
"\n");
536 if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
545bool InterleavedAccessImpl::runOnFunction(
Function &
F) {
548 bool Changed =
false;
551 if (
auto *LI = dyn_cast<LoadInst>(&
I))
552 Changed |= lowerInterleavedLoad(LI, DeadInsts);
554 if (
auto *SI = dyn_cast<StoreInst>(&
I))
555 Changed |= lowerInterleavedStore(SI, DeadInsts);
557 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
560 if (II->
getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
561 Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
562 if (II->
getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
563 Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
567 for (
auto *
I : DeadInsts)
568 I->eraseFromParent();
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
expand Expand reduction intrinsics
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name, BasicBlock::iterator InsertBefore)
Represents analyses that only rely on functions' control flow.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Value * getOperand(unsigned i) const
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void initializeInterleavedAccessPass(PassRegistry &)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...