78#define DEBUG_TYPE "interleaved-access"
81 "lower-interleaved-accesses",
82 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
87class InterleavedAccessImpl {
88 friend class InterleavedAccess;
91 InterleavedAccessImpl() =
default;
93 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
97 DominatorTree *DT =
nullptr;
98 const TargetLowering *TLI =
nullptr;
101 unsigned MaxFactor = 0
u;
104 bool lowerInterleavedLoad(Instruction *Load,
105 SmallSetVector<Instruction *, 32> &DeadInsts);
108 bool lowerInterleavedStore(Instruction *Store,
109 SmallSetVector<Instruction *, 32> &DeadInsts);
113 bool lowerDeinterleaveIntrinsic(IntrinsicInst *
II,
114 SmallSetVector<Instruction *, 32> &DeadInsts);
118 bool lowerInterleaveIntrinsic(IntrinsicInst *
II,
119 SmallSetVector<Instruction *, 32> &DeadInsts);
134 SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
139 InterleavedAccessImpl Impl;
144 InterleavedAccess() : FunctionPass(ID) {}
146 StringRef getPassName()
const override {
return "Interleaved Access Pass"; }
150 void getAnalysisUsage(AnalysisUsage &AU)
const override {
161 auto *TLI = TM->getSubtargetImpl(
F)->getTargetLowering();
162 InterleavedAccessImpl Impl(DT, TLI);
163 bool Changed = Impl.runOnFunction(
F);
173char InterleavedAccess::ID = 0;
175bool InterleavedAccess::runOnFunction(
Function &
F) {
179 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
183 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
185 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
190 return Impl.runOnFunction(
F);
194 "Lower interleaved memory accesses to target specific intrinsics",
false,
198 "Lower interleaved memory accesses to target specific intrinsics",
false,
202 return new InterleavedAccess();
211 unsigned &Index,
unsigned MaxFactor,
212 unsigned NumLoadElements) {
217 for (Factor = 2; Factor <= MaxFactor; Factor++) {
219 if (Mask.size() * Factor > NumLoadElements)
240 unsigned MaxFactor) {
246 for (Factor = 2; Factor <= MaxFactor; Factor++) {
255 switch (
II->getIntrinsicID()) {
258 case Intrinsic::vp_load:
259 case Intrinsic::masked_load:
260 return II->getOperand(1);
261 case Intrinsic::vp_store:
262 case Intrinsic::masked_store:
263 return II->getOperand(2);
272static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
275static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
277 return getMask(WideMask, Factor, LeafValueTy->getElementCount());
280bool InterleavedAccessImpl::lowerInterleavedLoad(
290 if (LI && !LI->isSimple())
312 if (!BI->user_empty() &&
314 for (
auto *SVI : BI->users())
326 if (Shuffles.
empty() && BinOpShuffles.
empty())
329 unsigned Factor,
Index;
331 unsigned NumLoadElements =
333 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
346 for (
auto *Shuffle : Shuffles) {
347 if (Shuffle->getType() != VecTy)
350 Shuffle->getShuffleMask(), Factor, Index))
353 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
356 for (
auto *Shuffle : BinOpShuffles) {
357 if (Shuffle->getType() != VecTy)
360 Shuffle->getShuffleMask(), Factor, Index))
363 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
373 if (!tryReplaceExtracts(Extracts, Shuffles))
376 bool BinOpShuffleChanged =
377 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
382 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *Load <<
"\n");
389 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved vp.load or masked.load: "
392 <<
" and actual factor " << GapMask.popcount() <<
"\n");
398 Indices, Factor, GapMask))
400 return !Extracts.
empty() || BinOpShuffleChanged;
408bool InterleavedAccessImpl::replaceBinOpShuffles(
411 for (
auto *SVI : BinOpShuffles) {
416 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
422 Mask, SVI->getName(), insertPos);
425 SVI->getName(), insertPos);
428 SVI->replaceAllUsesWith(NewBI);
430 <<
"\n With : " << *NewSVI1 <<
"\n And : "
431 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
433 if (NewSVI1->getOperand(0) == Load)
435 if (NewSVI2->getOperand(0) == Load)
439 return !BinOpShuffles.empty();
442bool InterleavedAccessImpl::tryReplaceExtracts(
447 if (Extracts.
empty())
454 for (
auto *Extract : Extracts) {
457 auto Index = IndexOperand->getSExtValue();
462 for (
auto *Shuffle : Shuffles) {
465 if (!DT->dominates(Shuffle, Extract))
472 Shuffle->getShuffleMask(Indices);
473 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
474 if (Indices[
I] == Index) {
475 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
476 "Vector operations do not match");
477 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
482 if (ReplacementMap.
count(Extract))
488 if (!ReplacementMap.
count(Extract))
494 for (
auto &Replacement : ReplacementMap) {
495 auto *Extract = Replacement.first;
496 auto *
Vector = Replacement.second.first;
497 auto Index = Replacement.second.second;
498 Builder.SetInsertPoint(Extract);
499 Extract->replaceAllUsesWith(Builder.CreateExtractElement(
Vector, Index));
500 Extract->eraseFromParent();
506bool InterleavedAccessImpl::lowerInterleavedStore(
514 StoredValue =
SI->getValueOperand();
516 assert(
II->getIntrinsicID() == Intrinsic::vp_store ||
517 II->getIntrinsicID() == Intrinsic::masked_store);
518 StoredValue =
II->getArgOperand(0);
525 unsigned NumStoredElements =
531 assert(NumStoredElements % Factor == 0 &&
532 "number of stored element should be a multiple of Factor");
537 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *Store <<
"\n");
540 unsigned LaneMaskLen = NumStoredElements / Factor;
546 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved vp.store or masked.store: "
549 <<
" and actual factor " << GapMask.popcount() <<
"\n");
554 if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
569 unsigned LeafMaskLen,
APInt &GapMask) {
571 for (
unsigned F = 0U;
F < Factor; ++
F) {
573 for (
unsigned Idx = 0U; Idx < LeafMaskLen; ++Idx) {
575 if (!
C->isZeroValue()) {
586static std::pair<Value *, APInt>
getMask(
Value *WideMask,
unsigned Factor,
593 Value *RefArg =
nullptr;
596 for (
auto [Idx, Arg] :
enumerate(IMI->args())) {
598 GapMask.clearBit(Idx);
604 else if (RefArg != Arg)
605 return {
nullptr, GapMask};
611 return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
617 AndOp && AndOp->getOpcode() == Instruction::And) {
618 auto [MaskLHS, GapMaskLHS] =
619 getMask(AndOp->getOperand(0), Factor, LeafValueEC);
620 auto [MaskRHS, GapMaskRHS] =
621 getMask(AndOp->getOperand(1), Factor, LeafValueEC);
622 if (!MaskLHS || !MaskRHS)
623 return {
nullptr, GapMask};
626 return {
IRBuilder<>(AndOp).CreateAnd(MaskLHS, MaskRHS),
627 GapMaskLHS & GapMaskRHS};
631 if (
auto *
Splat = ConstMask->getSplatValue())
638 getGapMask(*ConstMask, Factor, LeafMaskLen, GapMask);
644 for (
unsigned Idx = 0U; Idx < LeafMaskLen * Factor; ++Idx) {
645 if (!GapMask[Idx % Factor])
647 Constant *
C = ConstMask->getAggregateElement(Idx);
648 if (LeafMask[Idx / Factor] && LeafMask[Idx / Factor] !=
C)
649 return {
nullptr, GapMask};
650 LeafMask[Idx / Factor] =
C;
658 Type *Op1Ty = SVI->getOperand(1)->getType();
660 return {
nullptr, GapMask};
665 unsigned NumSrcElts =
669 NumSrcElts * 2, StartIndexes) &&
671 llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](
int Idx) {
672 return Idx < (int)NumSrcElts;
677 return {Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0),
683 return {
nullptr, GapMask};
686bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
689 if (!LoadedVal || !LoadedVal->
hasOneUse())
698 assert(Factor &&
"unexpected deinterleave intrinsic");
705 LLVM_DEBUG(
dbgs() <<
"IA: Found a load with deinterleave intrinsic " << *DI
706 <<
" and factor = " << Factor <<
"\n");
709 if (
II->getIntrinsicID() != Intrinsic::masked_load &&
710 II->getIntrinsicID() != Intrinsic::vp_load)
714 APInt GapMask(Factor, 0);
715 std::tie(Mask, GapMask) =
722 if (GapMask.popcount() != Factor)
725 LLVM_DEBUG(
dbgs() <<
"IA: Found a vp.load or masked.load with deinterleave"
726 <<
" intrinsic " << *DI <<
" and factor = "
731 if (!TLI->lowerDeinterleaveIntrinsicToLoad(LoadedVal, Mask, DI))
736 DeadInsts.
insert(LoadedVal);
740bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
754 assert(Factor &&
"unexpected interleave intrinsic");
758 if (
II->getIntrinsicID() != Intrinsic::masked_store &&
759 II->getIntrinsicID() != Intrinsic::vp_store)
762 APInt GapMask(Factor, 0);
763 std::tie(Mask, GapMask) =
770 if (GapMask.popcount() != Factor)
773 LLVM_DEBUG(
dbgs() <<
"IA: Found a vp.store or masked.store with interleave"
774 <<
" intrinsic " << *IntII <<
" and factor = "
780 LLVM_DEBUG(
dbgs() <<
"IA: Found a store with interleave intrinsic "
781 << *IntII <<
" and factor = " << Factor <<
"\n");
785 if (!TLI->lowerInterleaveIntrinsicToStore(StoredBy, Mask, InterleaveValues))
789 DeadInsts.
insert(StoredBy);
794bool InterleavedAccessImpl::runOnFunction(
Function &
F) {
804 Changed |= lowerInterleavedLoad(&
I, DeadInsts);
809 Changed |= lowerInterleavedStore(&
I, DeadInsts);
813 Changed |= lowerDeinterleaveIntrinsic(
II, DeadInsts);
815 Changed |= lowerInterleaveIntrinsic(
II, DeadInsts);
819 for (
auto *
I : DeadInsts)
820 I->eraseFromParent();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static void getGapMask(const Constant &MaskConst, unsigned Factor, unsigned LeafMaskLen, APInt &GapMask)
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
static Value * getMaskOperand(IntrinsicInst *II)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
unsigned getBitWidth() const
Return the number of bits in the APInt.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Represents analyses that only rely on functions' control flow.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getFixed(ScalarTy MinVal)
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
void insert_range(Range &&R)
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
LLVM_ABI bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
bool match(Val *V, const Pattern &P)
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
Context & getContext() const
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.