LLVM 22.0.0git
InterleavedAccessPass.cpp
Go to the documentation of this file.
1//===- InterleavedAccessPass.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Interleaved Access pass, which identifies
10// interleaved memory accesses and transforms them into target specific
11// intrinsics.
12//
13// An interleaved load reads data from memory into several vectors, with
14// DE-interleaving the data on a factor. An interleaved store writes several
15// vectors to memory with RE-interleaving the data on a factor.
16//
17// As interleaved accesses are difficult to identified in CodeGen (mainly
18// because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
19// IR), we identify and transform them to intrinsics in this pass so the
20// intrinsics can be easily matched into target specific instructions later in
21// CodeGen.
22//
23// E.g. An interleaved load (Factor = 2):
24// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
25// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
26// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
27//
28// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
29// intrinsic in ARM backend.
30//
31// In X86, this can be further optimized into a set of target
32// specific loads followed by an optimized sequence of shuffles.
33//
34// E.g. An interleaved store (Factor = 3):
35// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
36// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
37// store <12 x i32> %i.vec, <12 x i32>* %ptr
38//
39// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
40// intrinsic in ARM backend.
41//
42// Similarly, a set of interleaved stores can be transformed into an optimized
43// sequence of shuffles followed by a set of target specific stores for X86.
44//
45//===----------------------------------------------------------------------===//
46
47#include "llvm/ADT/ArrayRef.h"
48#include "llvm/ADT/DenseMap.h"
49#include "llvm/ADT/SetVector.h"
56#include "llvm/IR/Constants.h"
57#include "llvm/IR/Dominators.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/Instruction.h"
66#include "llvm/Pass.h"
69#include "llvm/Support/Debug.h"
73#include <cassert>
74#include <utility>
75
76using namespace llvm;
77
78#define DEBUG_TYPE "interleaved-access"
79
81 "lower-interleaved-accesses",
82 cl::desc("Enable lowering interleaved accesses to intrinsics"),
83 cl::init(true), cl::Hidden);
84
85namespace {
86
87class InterleavedAccessImpl {
88 friend class InterleavedAccess;
89
90public:
91 InterleavedAccessImpl() = default;
92 InterleavedAccessImpl(DominatorTree *DT, const TargetLowering *TLI)
93 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
94 bool runOnFunction(Function &F);
95
96private:
97 DominatorTree *DT = nullptr;
98 const TargetLowering *TLI = nullptr;
99
100 /// The maximum supported interleave factor.
101 unsigned MaxFactor = 0u;
102
103 /// Transform an interleaved load into target specific intrinsics.
104 bool lowerInterleavedLoad(Instruction *Load,
105 SmallSetVector<Instruction *, 32> &DeadInsts);
106
107 /// Transform an interleaved store into target specific intrinsics.
108 bool lowerInterleavedStore(Instruction *Store,
109 SmallSetVector<Instruction *, 32> &DeadInsts);
110
111 /// Transform a load and a deinterleave intrinsic into target specific
112 /// instructions.
113 bool lowerDeinterleaveIntrinsic(IntrinsicInst *II,
114 SmallSetVector<Instruction *, 32> &DeadInsts);
115
116 /// Transform an interleave intrinsic and a store into target specific
117 /// instructions.
118 bool lowerInterleaveIntrinsic(IntrinsicInst *II,
119 SmallSetVector<Instruction *, 32> &DeadInsts);
120
121 /// Returns true if the uses of an interleaved load by the
122 /// extractelement instructions in \p Extracts can be replaced by uses of the
123 /// shufflevector instructions in \p Shuffles instead. If so, the necessary
124 /// replacements are also performed.
125 bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
127
128 /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
129 /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
130 /// interleaving load. Any newly created shuffles that operate on \p LI will
131 /// be added to \p Shuffles. Returns true, if any changes to the IR have been
132 /// made.
133 bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
134 SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
135 Instruction *LI);
136};
137
138class InterleavedAccess : public FunctionPass {
139 InterleavedAccessImpl Impl;
140
141public:
142 static char ID;
143
144 InterleavedAccess() : FunctionPass(ID) {
146 }
147
148 StringRef getPassName() const override { return "Interleaved Access Pass"; }
149
150 bool runOnFunction(Function &F) override;
151
152 void getAnalysisUsage(AnalysisUsage &AU) const override {
153 AU.addRequired<DominatorTreeWrapperPass>();
154 AU.setPreservesCFG();
155 }
156};
157
158} // end anonymous namespace.
159
162 auto *DT = &FAM.getResult<DominatorTreeAnalysis>(F);
163 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
164 InterleavedAccessImpl Impl(DT, TLI);
165 bool Changed = Impl.runOnFunction(F);
166
167 if (!Changed)
168 return PreservedAnalyses::all();
169
172 return PA;
173}
174
175char InterleavedAccess::ID = 0;
176
177bool InterleavedAccess::runOnFunction(Function &F) {
178 if (skipFunction(F))
179 return false;
180
181 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
182 if (!TPC || !LowerInterleavedAccesses)
183 return false;
184
185 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
186
187 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
188 auto &TM = TPC->getTM<TargetMachine>();
189 Impl.TLI = TM.getSubtargetImpl(F)->getTargetLowering();
190 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
191
192 return Impl.runOnFunction(F);
193}
194
196 "Lower interleaved memory accesses to target specific intrinsics", false,
197 false)
200 "Lower interleaved memory accesses to target specific intrinsics", false,
201 false)
202
204 return new InterleavedAccess();
205}
206
207/// Check if the mask is a DE-interleave mask for an interleaved load.
208///
209/// E.g. DE-interleave masks (Factor = 2) could be:
210/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
211/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
212static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
213 unsigned &Index, unsigned MaxFactor,
214 unsigned NumLoadElements) {
215 if (Mask.size() < 2)
216 return false;
217
218 // Check potential Factors.
219 for (Factor = 2; Factor <= MaxFactor; Factor++) {
220 // Make sure we don't produce a load wider than the input load.
221 if (Mask.size() * Factor > NumLoadElements)
222 return false;
223 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index))
224 return true;
225 }
226
227 return false;
228}
229
230/// Check if the mask can be used in an interleaved store.
231//
232/// It checks for a more general pattern than the RE-interleave mask.
233/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
234/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
235/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
236/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
237///
238/// The particular case of an RE-interleave mask is:
239/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
240/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
241static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
242 unsigned MaxFactor) {
243 unsigned NumElts = SVI->getShuffleMask().size();
244 if (NumElts < 4)
245 return false;
246
247 // Check potential Factors.
248 for (Factor = 2; Factor <= MaxFactor; Factor++) {
249 if (SVI->isInterleave(Factor))
250 return true;
251 }
252
253 return false;
254}
255
257 switch (II->getIntrinsicID()) {
258 default:
259 llvm_unreachable("Unexpected intrinsic");
260 case Intrinsic::vp_load:
261 case Intrinsic::masked_load:
262 return II->getOperand(1);
263 case Intrinsic::vp_store:
264 case Intrinsic::masked_store:
265 return II->getOperand(2);
266 }
267}
268
269// Return a pair of
270// (1) The corresponded deinterleaved mask, or nullptr if there is no valid
271// mask.
272// (2) Some mask effectively skips a certain field, and this element is a mask
273// in which inactive lanes represent fields that are skipped (i.e. "gaps").
274static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
275 ElementCount LeafValueEC);
276
277static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
278 VectorType *LeafValueTy) {
279 return getMask(WideMask, Factor, LeafValueTy->getElementCount());
280}
281
282bool InterleavedAccessImpl::lowerInterleavedLoad(
284 if (isa<ScalableVectorType>(Load->getType()))
285 return false;
286
287 auto *LI = dyn_cast<LoadInst>(Load);
288 auto *II = dyn_cast<IntrinsicInst>(Load);
289 if (!LI && !II)
290 return false;
291
292 if (LI && !LI->isSimple())
293 return false;
294
295 // Check if all users of this load are shufflevectors. If we encounter any
296 // users that are extractelement instructions or binary operators, we save
297 // them to later check if they can be modified to extract from one of the
298 // shufflevectors instead of the load.
299
302 // BinOpShuffles need to be handled a single time in case both operands of the
303 // binop are the same load.
305
306 for (auto *User : Load->users()) {
307 auto *Extract = dyn_cast<ExtractElementInst>(User);
308 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
309 Extracts.push_back(Extract);
310 continue;
311 }
312 if (auto *BI = dyn_cast<BinaryOperator>(User)) {
313 using namespace PatternMatch;
314 if (!BI->user_empty() &&
315 all_of(BI->users(), match_fn(m_Shuffle(m_Value(), m_Undef())))) {
316 for (auto *SVI : BI->users())
317 BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
318 continue;
319 }
320 }
322 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
323 return false;
324
325 Shuffles.push_back(SVI);
326 }
327
328 if (Shuffles.empty() && BinOpShuffles.empty())
329 return false;
330
331 unsigned Factor, Index;
332
333 unsigned NumLoadElements =
334 cast<FixedVectorType>(Load->getType())->getNumElements();
335 auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
336 // Check if the first shufflevector is DE-interleave shuffle.
337 if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
338 NumLoadElements))
339 return false;
340
341 // Holds the corresponding index for each DE-interleave shuffle.
343
344 VectorType *VecTy = cast<VectorType>(FirstSVI->getType());
345
346 // Check if other shufflevectors are also DE-interleaved of the same type
347 // and factor as the first shufflevector.
348 for (auto *Shuffle : Shuffles) {
349 if (Shuffle->getType() != VecTy)
350 return false;
352 Shuffle->getShuffleMask(), Factor, Index))
353 return false;
354
355 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
356 Indices.push_back(Index);
357 }
358 for (auto *Shuffle : BinOpShuffles) {
359 if (Shuffle->getType() != VecTy)
360 return false;
362 Shuffle->getShuffleMask(), Factor, Index))
363 return false;
364
365 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
366
367 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == Load)
368 Indices.push_back(Index);
369 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == Load)
370 Indices.push_back(Index);
371 }
372
373 // Try and modify users of the load that are extractelement instructions to
374 // use the shufflevector instructions instead of the load.
375 if (!tryReplaceExtracts(Extracts, Shuffles))
376 return false;
377
378 bool BinOpShuffleChanged =
379 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
380
381 Value *Mask = nullptr;
382 auto GapMask = APInt::getAllOnes(Factor);
383 if (LI) {
384 LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
385 } else {
386 // Check mask operand. Handle both all-true/false and interleaved mask.
387 std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor, VecTy);
388 if (!Mask)
389 return false;
390
391 LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load or masked.load: "
392 << *Load << "\n");
393 LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
394 << " and actual factor " << GapMask.popcount() << "\n");
395 }
396
397 // Try to create target specific intrinsics to replace the load and
398 // shuffles.
399 if (!TLI->lowerInterleavedLoad(cast<Instruction>(Load), Mask, Shuffles,
400 Indices, Factor, GapMask))
401 // If Extracts is not empty, tryReplaceExtracts made changes earlier.
402 return !Extracts.empty() || BinOpShuffleChanged;
403
404 DeadInsts.insert_range(Shuffles);
405
406 DeadInsts.insert(Load);
407 return true;
408}
409
410bool InterleavedAccessImpl::replaceBinOpShuffles(
411 ArrayRef<ShuffleVectorInst *> BinOpShuffles,
413 for (auto *SVI : BinOpShuffles) {
414 BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
415 Type *BIOp0Ty = BI->getOperand(0)->getType();
416 ArrayRef<int> Mask = SVI->getShuffleMask();
417 assert(all_of(Mask, [&](int Idx) {
418 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
419 }));
420
421 BasicBlock::iterator insertPos = SVI->getIterator();
422 auto *NewSVI1 =
423 new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
424 Mask, SVI->getName(), insertPos);
425 auto *NewSVI2 = new ShuffleVectorInst(
426 BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
427 SVI->getName(), insertPos);
429 BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), insertPos);
430 SVI->replaceAllUsesWith(NewBI);
431 LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
432 << "\n With : " << *NewSVI1 << "\n And : "
433 << *NewSVI2 << "\n And : " << *NewBI << "\n");
435 if (NewSVI1->getOperand(0) == Load)
436 Shuffles.push_back(NewSVI1);
437 if (NewSVI2->getOperand(0) == Load)
438 Shuffles.push_back(NewSVI2);
439 }
440
441 return !BinOpShuffles.empty();
442}
443
444bool InterleavedAccessImpl::tryReplaceExtracts(
447 // If there aren't any extractelement instructions to modify, there's nothing
448 // to do.
449 if (Extracts.empty())
450 return true;
451
452 // Maps extractelement instructions to vector-index pairs. The extractlement
453 // instructions will be modified to use the new vector and index operands.
455
456 for (auto *Extract : Extracts) {
457 // The vector index that is extracted.
458 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
459 auto Index = IndexOperand->getSExtValue();
460
461 // Look for a suitable shufflevector instruction. The goal is to modify the
462 // extractelement instruction (which uses an interleaved load) to use one
463 // of the shufflevector instructions instead of the load.
464 for (auto *Shuffle : Shuffles) {
465 // If the shufflevector instruction doesn't dominate the extract, we
466 // can't create a use of it.
467 if (!DT->dominates(Shuffle, Extract))
468 continue;
469
470 // Inspect the indices of the shufflevector instruction. If the shuffle
471 // selects the same index that is extracted, we can modify the
472 // extractelement instruction.
473 SmallVector<int, 4> Indices;
474 Shuffle->getShuffleMask(Indices);
475 for (unsigned I = 0; I < Indices.size(); ++I)
476 if (Indices[I] == Index) {
477 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
478 "Vector operations do not match");
479 ReplacementMap[Extract] = std::make_pair(Shuffle, I);
480 break;
481 }
482
483 // If we found a suitable shufflevector instruction, stop looking.
484 if (ReplacementMap.count(Extract))
485 break;
486 }
487
488 // If we did not find a suitable shufflevector instruction, the
489 // extractelement instruction cannot be modified, so we must give up.
490 if (!ReplacementMap.count(Extract))
491 return false;
492 }
493
494 // Finally, perform the replacements.
495 IRBuilder<> Builder(Extracts[0]->getContext());
496 for (auto &Replacement : ReplacementMap) {
497 auto *Extract = Replacement.first;
498 auto *Vector = Replacement.second.first;
499 auto Index = Replacement.second.second;
500 Builder.SetInsertPoint(Extract);
501 Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
502 Extract->eraseFromParent();
503 }
504
505 return true;
506}
507
508bool InterleavedAccessImpl::lowerInterleavedStore(
510 Value *StoredValue;
511 auto *SI = dyn_cast<StoreInst>(Store);
512 auto *II = dyn_cast<IntrinsicInst>(Store);
513 if (SI) {
514 if (!SI->isSimple())
515 return false;
516 StoredValue = SI->getValueOperand();
517 } else {
518 assert(II->getIntrinsicID() == Intrinsic::vp_store ||
519 II->getIntrinsicID() == Intrinsic::masked_store);
520 StoredValue = II->getArgOperand(0);
521 }
522
523 auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
524 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
525 return false;
526
527 unsigned NumStoredElements =
528 cast<FixedVectorType>(SVI->getType())->getNumElements();
529 // Check if the shufflevector is RE-interleave shuffle.
530 unsigned Factor;
531 if (!isReInterleaveMask(SVI, Factor, MaxFactor))
532 return false;
533 assert(NumStoredElements % Factor == 0 &&
534 "number of stored element should be a multiple of Factor");
535
536 Value *Mask = nullptr;
537 auto GapMask = APInt::getAllOnes(Factor);
538 if (SI) {
539 LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
540 } else {
541 // Check mask operand. Handle both all-true/false and interleaved mask.
542 unsigned LaneMaskLen = NumStoredElements / Factor;
543 std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor,
544 ElementCount::getFixed(LaneMaskLen));
545 if (!Mask)
546 return false;
547
548 LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
549 << *Store << "\n");
550 LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
551 << " and actual factor " << GapMask.popcount() << "\n");
552 }
553
554 // Try to create target specific intrinsics to replace the store and
555 // shuffle.
556 if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
557 return false;
558
559 // Already have a new target specific interleaved store. Erase the old store.
560 DeadInsts.insert(Store);
561 DeadInsts.insert(SVI);
562 return true;
563}
564
565// A wide mask <1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0> could be used to skip the
566// last field in a factor-of-three interleaved store or deinterleaved load (in
567// which case LeafMaskLen is 4). Such (wide) mask is also known as gap mask.
568// This helper function tries to detect this pattern and return the actual
569// factor we're accessing, which is 2 in this example.
570static void getGapMask(const Constant &MaskConst, unsigned Factor,
571 unsigned LeafMaskLen, APInt &GapMask) {
572 assert(GapMask.getBitWidth() == Factor);
573 for (unsigned F = 0U; F < Factor; ++F) {
574 bool AllZero = true;
575 for (unsigned Idx = 0U; Idx < LeafMaskLen; ++Idx) {
576 Constant *C = MaskConst.getAggregateElement(F + Idx * Factor);
577 if (!C->isZeroValue()) {
578 AllZero = false;
579 break;
580 }
581 }
582 // All mask bits on this field are zero, skipping it.
583 if (AllZero)
584 GapMask.clearBit(F);
585 }
586}
587
588static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
589 ElementCount LeafValueEC) {
590 auto GapMask = APInt::getAllOnes(Factor);
591
592 if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
593 if (unsigned F = getInterleaveIntrinsicFactor(IMI->getIntrinsicID());
594 F && F == Factor) {
595 Value *RefArg = nullptr;
596 // Check if all the intrinsic arguments are the same, except those that
597 // are zeros, which we mark as gaps in the gap mask.
598 for (auto [Idx, Arg] : enumerate(IMI->args())) {
599 if (auto *C = dyn_cast<Constant>(Arg); C && C->isZeroValue()) {
600 GapMask.clearBit(Idx);
601 continue;
602 }
603
604 if (!RefArg)
605 RefArg = Arg;
606 else if (RefArg != Arg)
607 return {nullptr, GapMask};
608 }
609
610 // In a very rare occasion, all the intrinsic arguments might be zeros,
611 // in which case we still want to return an all-zeros constant instead of
612 // nullptr.
613 return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
614 }
615 }
616
617 // Masks that are assembled from bitwise AND.
618 if (auto *AndOp = dyn_cast<BinaryOperator>(WideMask);
619 AndOp && AndOp->getOpcode() == Instruction::And) {
620 auto [MaskLHS, GapMaskLHS] =
621 getMask(AndOp->getOperand(0), Factor, LeafValueEC);
622 auto [MaskRHS, GapMaskRHS] =
623 getMask(AndOp->getOperand(1), Factor, LeafValueEC);
624 if (!MaskLHS || !MaskRHS)
625 return {nullptr, GapMask};
626 // Using IRBuilder here so that any trivial constants could be folded right
627 // away.
628 return {IRBuilder<>(AndOp).CreateAnd(MaskLHS, MaskRHS),
629 GapMaskLHS & GapMaskRHS};
630 }
631
632 if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
633 if (auto *Splat = ConstMask->getSplatValue())
634 // All-ones or all-zeros mask.
635 return {ConstantVector::getSplat(LeafValueEC, Splat), GapMask};
636
637 if (LeafValueEC.isFixed()) {
638 unsigned LeafMaskLen = LeafValueEC.getFixedValue();
639 // First, check if we use a gap mask to skip some of the factors / fields.
640 getGapMask(*ConstMask, Factor, LeafMaskLen, GapMask);
641
642 SmallVector<Constant *, 8> LeafMask(LeafMaskLen, nullptr);
643 // If this is a fixed-length constant mask, each lane / leaf has to
644 // use the same mask. This is done by checking if every group with Factor
645 // number of elements in the interleaved mask has homogeneous values.
646 for (unsigned Idx = 0U; Idx < LeafMaskLen * Factor; ++Idx) {
647 if (!GapMask[Idx % Factor])
648 continue;
649 Constant *C = ConstMask->getAggregateElement(Idx);
650 if (LeafMask[Idx / Factor] && LeafMask[Idx / Factor] != C)
651 return {nullptr, GapMask};
652 LeafMask[Idx / Factor] = C;
653 }
654
655 return {ConstantVector::get(LeafMask), GapMask};
656 }
657 }
658
659 if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
660 Type *Op1Ty = SVI->getOperand(1)->getType();
661 if (!isa<FixedVectorType>(Op1Ty))
662 return {nullptr, GapMask};
663
664 // Check that the shuffle mask is: a) an interleave, b) all of the same
665 // set of the elements, and c) contained by the first source. (c) could
666 // be relaxed if desired.
667 unsigned NumSrcElts =
668 cast<FixedVectorType>(SVI->getOperand(1)->getType())->getNumElements();
669 SmallVector<unsigned> StartIndexes;
670 if (ShuffleVectorInst::isInterleaveMask(SVI->getShuffleMask(), Factor,
671 NumSrcElts * 2, StartIndexes) &&
672 llvm::all_of(StartIndexes, [](unsigned Start) { return Start == 0; }) &&
673 llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](int Idx) {
674 return Idx < (int)NumSrcElts;
675 })) {
676 auto *LeafMaskTy =
677 VectorType::get(Type::getInt1Ty(SVI->getContext()), LeafValueEC);
678 IRBuilder<> Builder(SVI);
679 return {Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0),
680 uint64_t(0)),
681 GapMask};
682 }
683 }
684
685 return {nullptr, GapMask};
686}
687
688bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
690 Instruction *LoadedVal = dyn_cast<Instruction>(DI->getOperand(0));
691 if (!LoadedVal || !LoadedVal->hasOneUse())
692 return false;
693
694 auto *LI = dyn_cast<LoadInst>(LoadedVal);
695 auto *II = dyn_cast<IntrinsicInst>(LoadedVal);
696 if (!LI && !II)
697 return false;
698
699 const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
700 assert(Factor && "unexpected deinterleave intrinsic");
701
702 Value *Mask = nullptr;
703 if (LI) {
704 if (!LI->isSimple())
705 return false;
706
707 LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
708 << " and factor = " << Factor << "\n");
709 } else {
710 assert(II);
711 if (II->getIntrinsicID() != Intrinsic::masked_load &&
712 II->getIntrinsicID() != Intrinsic::vp_load)
713 return false;
714
715 // Check mask operand. Handle both all-true/false and interleaved mask.
716 APInt GapMask(Factor, 0);
717 std::tie(Mask, GapMask) =
719 if (!Mask)
720 return false;
721 // We haven't supported gap mask if it's deinterleaving using intrinsics.
722 // Yet it is possible that we already changed the IR, hence returning true
723 // here.
724 if (GapMask.popcount() != Factor)
725 return true;
726
727 LLVM_DEBUG(dbgs() << "IA: Found a vp.load or masked.load with deinterleave"
728 << " intrinsic " << *DI << " and factor = "
729 << Factor << "\n");
730 }
731
732 // Try and match this with target specific intrinsics.
733 if (!TLI->lowerDeinterleaveIntrinsicToLoad(LoadedVal, Mask, DI))
734 return false;
735
736 DeadInsts.insert(DI);
737 // We now have a target-specific load, so delete the old one.
738 DeadInsts.insert(LoadedVal);
739 return true;
740}
741
742bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
744 if (!IntII->hasOneUse())
745 return false;
746 Instruction *StoredBy = dyn_cast<Instruction>(IntII->user_back());
747 if (!StoredBy)
748 return false;
749 auto *SI = dyn_cast<StoreInst>(StoredBy);
750 auto *II = dyn_cast<IntrinsicInst>(StoredBy);
751 if (!SI && !II)
752 return false;
753
754 SmallVector<Value *, 8> InterleaveValues(IntII->args());
755 const unsigned Factor = getInterleaveIntrinsicFactor(IntII->getIntrinsicID());
756 assert(Factor && "unexpected interleave intrinsic");
757
758 Value *Mask = nullptr;
759 if (II) {
760 if (II->getIntrinsicID() != Intrinsic::masked_store &&
761 II->getIntrinsicID() != Intrinsic::vp_store)
762 return false;
763 // Check mask operand. Handle both all-true/false and interleaved mask.
764 APInt GapMask(Factor, 0);
765 std::tie(Mask, GapMask) =
766 getMask(getMaskOperand(II), Factor,
767 cast<VectorType>(InterleaveValues[0]->getType()));
768 if (!Mask)
769 return false;
770 // We haven't supported gap mask if it's interleaving using intrinsics. Yet
771 // it is possible that we already changed the IR, hence returning true here.
772 if (GapMask.popcount() != Factor)
773 return true;
774
775 LLVM_DEBUG(dbgs() << "IA: Found a vp.store or masked.store with interleave"
776 << " intrinsic " << *IntII << " and factor = "
777 << Factor << "\n");
778 } else {
779 if (!SI->isSimple())
780 return false;
781
782 LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic "
783 << *IntII << " and factor = " << Factor << "\n");
784 }
785
786 // Try and match this with target specific intrinsics.
787 if (!TLI->lowerInterleaveIntrinsicToStore(StoredBy, Mask, InterleaveValues))
788 return false;
789
790 // We now have a target-specific store, so delete the old one.
791 DeadInsts.insert(StoredBy);
792 DeadInsts.insert(IntII);
793 return true;
794}
795
796bool InterleavedAccessImpl::runOnFunction(Function &F) {
797 // Holds dead instructions that will be erased later.
799 bool Changed = false;
800
801 using namespace PatternMatch;
802 for (auto &I : instructions(F)) {
806 Changed |= lowerInterleavedLoad(&I, DeadInsts);
807
811 Changed |= lowerInterleavedStore(&I, DeadInsts);
812
813 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
814 if (getDeinterleaveIntrinsicFactor(II->getIntrinsicID()))
815 Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
816 else if (getInterleaveIntrinsicFactor(II->getIntrinsicID()))
817 Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
818 }
819 }
820
821 for (auto *I : DeadInsts)
822 I->eraseFromParent();
823
824 return Changed;
825}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static void getGapMask(const Constant &MaskConst, unsigned Factor, unsigned LeafMaskLen, APInt &GapMask)
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
static Value * getMaskOperand(IntrinsicInst *II)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1406
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:138
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:219
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
void insert_range(Range &&R)
Definition SetVector.h:175
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
LLVM_ABI bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:294
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:172
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
Context & getContext() const
Definition BasicBlock.h:99
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void initializeInterleavedAccessPass(PassRegistry &)
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.