76#define DEBUG_TYPE "interleaved-access"
79 "lower-interleaved-accesses",
80 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
107 unsigned MaxFactor = 0
u;
110 bool lowerInterleavedLoad(
LoadInst *LI,
114 bool lowerInterleavedStore(
StoreInst *SI,
146char InterleavedAccess::ID = 0;
149 "Lower interleaved memory accesses to target specific intrinsics",
false,
157 return new InterleavedAccess();
171 for (; i < Mask.size(); i++)
172 if (Mask[i] >= 0 &&
static_cast<unsigned>(Mask[i]) !=
Index + i * Factor)
175 if (i == Mask.size())
188 unsigned &
Index,
unsigned MaxFactor,
189 unsigned NumLoadElements) {
194 for (Factor = 2; Factor <= MaxFactor; Factor++) {
196 if (Mask.size() * Factor > NumLoadElements)
217 unsigned MaxFactor) {
223 for (Factor = 2; Factor <= MaxFactor; Factor++) {
231bool InterleavedAccess::lowerInterleavedLoad(
248 auto *Extract = dyn_cast<ExtractElementInst>(
User);
249 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
253 if (
auto *BI = dyn_cast<BinaryOperator>(
User)) {
254 if (
all_of(BI->users(), [](
auto *U) {
255 auto *SVI = dyn_cast<ShuffleVectorInst>(U);
256 return SVI && isa<UndefValue>(SVI->getOperand(1));
258 for (
auto *SVI : BI->users())
259 BinOpShuffles.
insert(cast<ShuffleVectorInst>(SVI));
263 auto *SVI = dyn_cast<ShuffleVectorInst>(
User);
264 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
270 if (Shuffles.
empty() && BinOpShuffles.
empty())
273 unsigned Factor,
Index;
275 unsigned NumLoadElements =
276 cast<FixedVectorType>(LI->
getType())->getNumElements();
277 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
286 Type *VecTy = FirstSVI->getType();
290 for (
auto *Shuffle : Shuffles) {
291 if (Shuffle->getType() != VecTy)
297 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
300 for (
auto *Shuffle : BinOpShuffles) {
301 if (Shuffle->getType() != VecTy)
307 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
309 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
311 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
317 if (!tryReplaceExtracts(Extracts, Shuffles))
320 bool BinOpShuffleChanged =
321 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
323 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
326 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
328 return !Extracts.
empty() || BinOpShuffleChanged;
337bool InterleavedAccess::replaceBinOpShuffles(
340 for (
auto *SVI : BinOpShuffles) {
345 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
356 SVI->replaceAllUsesWith(NewBI);
358 <<
"\n With : " << *NewSVI1 <<
"\n And : "
359 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
361 if (NewSVI1->getOperand(0) == LI)
363 if (NewSVI2->getOperand(0) == LI)
367 return !BinOpShuffles.empty();
370bool InterleavedAccess::tryReplaceExtracts(
375 if (Extracts.
empty())
382 for (
auto *Extract : Extracts) {
384 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
385 auto Index = IndexOperand->getSExtValue();
390 for (
auto *Shuffle : Shuffles) {
393 if (!DT->dominates(Shuffle, Extract))
400 Shuffle->getShuffleMask(Indices);
401 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
402 if (Indices[
I] ==
Index) {
403 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
404 "Vector operations do not match");
405 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
410 if (ReplacementMap.
count(Extract))
416 if (!ReplacementMap.
count(Extract))
422 for (
auto &Replacement : ReplacementMap) {
423 auto *Extract = Replacement.first;
424 auto *
Vector = Replacement.second.first;
425 auto Index = Replacement.second.second;
426 Builder.SetInsertPoint(Extract);
428 Extract->eraseFromParent();
434bool InterleavedAccess::lowerInterleavedStore(
439 auto *SVI = dyn_cast<ShuffleVectorInst>(
SI->getValueOperand());
440 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
448 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *SI <<
"\n");
451 if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
460bool InterleavedAccess::lowerDeinterleaveIntrinsic(
467 LLVM_DEBUG(
dbgs() <<
"IA: Found a deinterleave intrinsic: " << *DI <<
"\n");
470 if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
479bool InterleavedAccess::lowerInterleaveIntrinsic(
486 if (!SI || !
SI->isSimple())
489 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleave intrinsic: " << *II <<
"\n");
492 if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
501bool InterleavedAccess::runOnFunction(
Function &
F) {
502 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
506 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
508 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
510 TLI =
TM.getSubtargetImpl(
F)->getTargetLowering();
511 MaxFactor = TLI->getMaxSupportedInterleaveFactor();
515 bool Changed =
false;
518 if (
auto *LI = dyn_cast<LoadInst>(&
I))
519 Changed |= lowerInterleavedLoad(LI, DeadInsts);
521 if (
auto *SI = dyn_cast<StoreInst>(&
I))
522 Changed |= lowerInterleavedStore(SI, DeadInsts);
524 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
527 if (II->
getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
528 Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
529 if (II->
getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
530 Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
534 for (
auto *
I : DeadInsts)
535 I->eraseFromParent();
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
Select target instructions out of generic instructions
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
Lower interleaved memory accesses to target specific intrinsics
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
The instances of the Type class are immutable: once they are created, they are never changed.
Value * getOperand(unsigned i) const
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void initializeInterleavedAccessPass(PassRegistry &)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...