75 #define DEBUG_TYPE "interleaved-access"
78 "lower-interleaved-accesses",
79 cl::desc(
"Enable lowering interleaved accesses to intrinsics"),
92 StringRef getPassName()
const override {
return "Interleaved Access Pass"; }
109 bool lowerInterleavedLoad(
LoadInst *LI,
138 "Lower interleaved memory accesses to target specific intrinsics",
false,
146 return new InterleavedAccess();
160 for (;
i <
Mask.size();
i++)
164 if (
i ==
Mask.size())
177 unsigned &
Index,
unsigned MaxFactor,
178 unsigned NumLoadElements) {
183 for (Factor = 2; Factor <= MaxFactor; Factor++) {
185 if (
Mask.size() * Factor > NumLoadElements)
206 unsigned MaxFactor,
unsigned OpNumElts) {
207 unsigned NumElts =
Mask.size();
212 for (Factor = 2; Factor <= MaxFactor; Factor++) {
213 if (NumElts % Factor)
216 unsigned LaneLen = NumElts / Factor;
224 for (;
I < Factor;
I++) {
225 unsigned SavedLaneValue;
226 unsigned SavedNoUndefs = 0;
229 for (J = 0; J < LaneLen - 1; J++) {
231 unsigned Lane = J * Factor +
I;
232 unsigned NextLane = Lane + Factor;
233 int LaneValue =
Mask[Lane];
234 int NextLaneValue =
Mask[NextLane];
237 if (LaneValue >= 0 && NextLaneValue >= 0 &&
238 LaneValue + 1 != NextLaneValue)
242 if (LaneValue >= 0 && NextLaneValue < 0) {
243 SavedLaneValue = LaneValue;
252 if (SavedNoUndefs > 0 && LaneValue < 0) {
254 if (NextLaneValue >= 0 &&
255 SavedLaneValue + SavedNoUndefs != (
unsigned)NextLaneValue)
267 }
else if (
Mask[(LaneLen - 1) * Factor +
I] >= 0) {
269 StartMask =
Mask[(LaneLen - 1) * Factor +
I] - J;
270 }
else if (SavedNoUndefs > 0) {
272 StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
279 if (StartMask + LaneLen > OpNumElts*2)
291 bool InterleavedAccess::lowerInterleavedLoad(
308 auto *Extract = dyn_cast<ExtractElementInst>(
User);
309 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
310 Extracts.push_back(Extract);
313 if (
auto *BI = dyn_cast<BinaryOperator>(
User)) {
315 [](
auto *U) { return isa<ShuffleVectorInst>(U); })) {
316 for (
auto *SVI : BI->users())
317 BinOpShuffles.
insert(cast<ShuffleVectorInst>(SVI));
321 auto *SVI = dyn_cast<ShuffleVectorInst>(
User);
322 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
325 Shuffles.push_back(SVI);
328 if (Shuffles.empty() && BinOpShuffles.
empty())
331 unsigned Factor,
Index;
333 unsigned NumLoadElements =
334 cast<FixedVectorType>(LI->
getType())->getNumElements();
335 auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
344 Type *VecTy = FirstSVI->getType();
348 for (
auto *Shuffle : Shuffles) {
349 if (
Shuffle->getType() != VecTy)
356 Indices.push_back(
Index);
358 for (
auto *Shuffle : BinOpShuffles) {
359 if (
Shuffle->getType() != VecTy)
367 if (cast<Instruction>(
Shuffle->getOperand(0))->getOperand(0) == LI)
368 Indices.push_back(
Index);
369 if (cast<Instruction>(
Shuffle->getOperand(0))->getOperand(1) == LI)
370 Indices.push_back(
Index);
375 if (!tryReplaceExtracts(Extracts, Shuffles))
378 bool BinOpShuffleChanged =
379 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
381 LLVM_DEBUG(
dbgs() <<
"IA: Found an interleaved load: " << *LI <<
"\n");
384 if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
386 return !Extracts.empty() || BinOpShuffleChanged;
391 DeadInsts.push_back(LI);
395 bool InterleavedAccess::replaceBinOpShuffles(
398 for (
auto *SVI : BinOpShuffles) {
403 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
408 Mask, SVI->getName(), SVI);
411 SVI->getName(), SVI);
414 SVI->replaceAllUsesWith(NewBI);
416 <<
"\n With : " << *NewSVI1 <<
"\n And : "
417 << *NewSVI2 <<
"\n And : " << *NewBI <<
"\n");
419 if (NewSVI1->getOperand(0) == LI)
420 Shuffles.push_back(NewSVI1);
421 if (NewSVI2->getOperand(0) == LI)
422 Shuffles.push_back(NewSVI2);
425 return !BinOpShuffles.empty();
428 bool InterleavedAccess::tryReplaceExtracts(
433 if (Extracts.
empty())
440 for (
auto *Extract : Extracts) {
442 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
443 auto Index = IndexOperand->getSExtValue();
448 for (
auto *Shuffle : Shuffles) {
451 if (!DT->dominates(Shuffle, Extract))
458 Shuffle->getShuffleMask(Indices);
459 for (
unsigned I = 0;
I < Indices.size(); ++
I)
460 if (Indices[
I] ==
Index) {
462 "Vector operations do not match");
463 ReplacementMap[Extract] = std::make_pair(Shuffle,
I);
468 if (ReplacementMap.
count(Extract))
474 if (!ReplacementMap.
count(Extract))
480 for (
auto &Replacement : ReplacementMap) {
481 auto *Extract = Replacement.first;
482 auto *
Vector = Replacement.second.first;
483 auto Index = Replacement.second.second;
484 Builder.SetInsertPoint(Extract);
485 Extract->replaceAllUsesWith(
Builder.CreateExtractElement(Vector,
Index));
486 Extract->eraseFromParent();
492 bool InterleavedAccess::lowerInterleavedStore(
497 auto *SVI = dyn_cast<ShuffleVectorInst>(
SI->getValueOperand());
498 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
504 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
511 if (!TLI->lowerInterleavedStore(
SI, SVI, Factor))
515 DeadInsts.push_back(
SI);
516 DeadInsts.push_back(SVI);
521 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
525 LLVM_DEBUG(
dbgs() <<
"*** " << getPassName() <<
": " <<
F.getName() <<
"\n");
527 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
529 TLI =
TM.getSubtargetImpl(
F)->getTargetLowering();
530 MaxFactor = TLI->getMaxSupportedInterleaveFactor();
534 bool Changed =
false;
537 if (
auto *LI = dyn_cast<LoadInst>(&
I))
538 Changed |= lowerInterleavedLoad(LI, DeadInsts);
540 if (
auto *
SI = dyn_cast<StoreInst>(&
I))
541 Changed |= lowerInterleavedStore(
SI, DeadInsts);
544 for (
auto *
I : DeadInsts)
545 I->eraseFromParent();