75#define DEBUG_TYPE "interleaved-access"
78 "lower-interleaved-accesses",
79 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
106 unsigned MaxFactor = 0
u;
109 bool lowerInterleavedLoad(
LoadInst *LI,
113 bool lowerInterleavedStore(
StoreInst *SI,
135char InterleavedAccess::ID = 0;
138 "Lower interleaved memory accesses to target specific intrinsics",
false,
146 return new InterleavedAccess();
160 for (; i < Mask.size(); i++)
161 if (Mask[i] >= 0 &&
static_cast<unsigned>(Mask[i]) !=
Index + i * Factor)
164 if (i == Mask.size())
177 unsigned &
Index,
unsigned MaxFactor,
178 unsigned NumLoadElements) {
183 for (Factor = 2; Factor <= MaxFactor; Factor++) {
185 if (Mask.size() * Factor > NumLoadElements)
206 unsigned MaxFactor) {
212 for (Factor = 2; Factor <= MaxFactor; Factor++) {
220bool InterleavedAccess::lowerInterleavedLoad(
237 auto *Extract = dyn_cast<ExtractElementInst>(
User);
238 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
242 if (
auto *BI = dyn_cast<BinaryOperator>(
User)) {
243 if (
all_of(BI->users(), [](
auto *U) {
244 auto *SVI = dyn_cast<ShuffleVectorInst>(U);
245 return SVI && isa<UndefValue>(SVI->getOperand(1));
247 for (
auto *SVI : BI->users())
248 BinOpShuffles.
insert(cast<ShuffleVectorInst>(SVI));
252 auto *SVI = dyn_cast<ShuffleVectorInst>(
User);
253 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
259 if (Shuffles.
empty() && BinOpShuffles.
empty())
262 unsigned Factor,
Index;
264 unsigned NumLoadElements =
265 cast<FixedVectorType>(LI->
getType())->getNumElements();
266 auto *FirstSVI = Shuffles.
size() > 0 ? Shuffles[0] : BinOpShuffles[0];
275 Type *VecTy = FirstSVI->getType();
279 for (
auto *Shuffle : Shuffles) {
280 if (Shuffle->getType() != VecTy)
286 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
289 for (
auto *Shuffle : BinOpShuffles) {
290 if (Shuffle->getType() != VecTy)
296 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
298 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
300 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
306 if (!tryReplaceExtracts(Extracts, Shuffles))
309 bool BinOpShuffleChanged =
310 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
312 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
315 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
317 return !Extracts.
empty() || BinOpShuffleChanged;
326bool InterleavedAccess::replaceBinOpShuffles(
329 for (
auto *SVI : BinOpShuffles) {
334 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
345 SVI->replaceAllUsesWith(NewBI);
347 <<
"\n With : " << *NewSVI1 <<
"\n And : "
348 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
350 if (NewSVI1->getOperand(0) == LI)
352 if (NewSVI2->getOperand(0) == LI)
356 return !BinOpShuffles.empty();
359bool InterleavedAccess::tryReplaceExtracts(
364 if (Extracts.
empty())
371 for (
auto *Extract : Extracts) {
373 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
374 auto Index = IndexOperand->getSExtValue();
379 for (
auto *Shuffle : Shuffles) {
382 if (!DT->dominates(Shuffle, Extract))
389 Shuffle->getShuffleMask(Indices);
390 for (
unsigned I = 0;
I < Indices.
size(); ++
I)
391 if (Indices[
I] ==
Index) {
392 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
393 "Vector operations do not match");
394 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
399 if (ReplacementMap.
count(Extract))
405 if (!ReplacementMap.
count(Extract))
411 for (
auto &Replacement : ReplacementMap) {
412 auto *Extract = Replacement.first;
413 auto *
Vector = Replacement.second.first;
414 auto Index = Replacement.second.second;
415 Builder.SetInsertPoint(Extract);
417 Extract->eraseFromParent();
423bool InterleavedAccess::lowerInterleavedStore(
428 auto *SVI = dyn_cast<ShuffleVectorInst>(
SI->getValueOperand());
429 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
437 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved store: " << *SI <<
"\n");
440 if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
449bool InterleavedAccess::runOnFunction(
Function &
F) {
450 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
454 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
456 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
458 TLI =
TM.getSubtargetImpl(
F)->getTargetLowering();
459 MaxFactor = TLI->getMaxSupportedInterleaveFactor();
463 bool Changed =
false;
466 if (
auto *LI = dyn_cast<LoadInst>(&
I))
467 Changed |= lowerInterleavedLoad(LI, DeadInsts);
469 if (
auto *SI = dyn_cast<StoreInst>(&
I))
470 Changed |= lowerInterleavedStore(SI, DeadInsts);
473 for (
auto *
I : DeadInsts)
474 I->eraseFromParent();
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
Lower interleaved memory accesses to target specific intrinsics
static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
print must be executed print the must be executed context for all instructions
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
BinaryOps getOpcode() const
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
An instruction for reading from memory.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
This instruction constructs a fixed permutation of two input vectors.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
The instances of the Type class are immutable: once they are created, they are never changed.
Value * getOperand(unsigned i) const
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
StringRef getName() const
Return a constant reference to the value's name.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void initializeInterleavedAccessPass(PassRegistry &)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...