LLVM 23.0.0git
InterleavedAccessPass.cpp
Go to the documentation of this file.
1//===- InterleavedAccessPass.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Interleaved Access pass, which identifies
10// interleaved memory accesses and transforms them into target specific
11// intrinsics.
12//
13// An interleaved load reads data from memory into several vectors, with
14// DE-interleaving the data on a factor. An interleaved store writes several
15// vectors to memory with RE-interleaving the data on a factor.
16//
17// As interleaved accesses are difficult to identified in CodeGen (mainly
18// because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
19// IR), we identify and transform them to intrinsics in this pass so the
20// intrinsics can be easily matched into target specific instructions later in
21// CodeGen.
22//
23// E.g. An interleaved load (Factor = 2):
24// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
25// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
26// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
27//
28// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
29// intrinsic in ARM backend.
30//
31// In X86, this can be further optimized into a set of target
32// specific loads followed by an optimized sequence of shuffles.
33//
34// E.g. An interleaved store (Factor = 3):
35// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
36// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
37// store <12 x i32> %i.vec, <12 x i32>* %ptr
38//
39// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
40// intrinsic in ARM backend.
41//
42// Similarly, a set of interleaved stores can be transformed into an optimized
43// sequence of shuffles followed by a set of target specific stores for X86.
44//
45//===----------------------------------------------------------------------===//
46
47#include "llvm/ADT/ArrayRef.h"
48#include "llvm/ADT/DenseMap.h"
49#include "llvm/ADT/SetVector.h"
56#include "llvm/IR/Constants.h"
57#include "llvm/IR/Dominators.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/Instruction.h"
66#include "llvm/Pass.h"
69#include "llvm/Support/Debug.h"
73#include <cassert>
74#include <utility>
75
76using namespace llvm;
77
78#define DEBUG_TYPE "interleaved-access"
79
81 "lower-interleaved-accesses",
82 cl::desc("Enable lowering interleaved accesses to intrinsics"),
83 cl::init(true), cl::Hidden);
84
85namespace {
86
87class InterleavedAccessImpl {
88 friend class InterleavedAccess;
89
90public:
91 InterleavedAccessImpl() = default;
92 InterleavedAccessImpl(DominatorTree *DT, const TargetLowering *TLI)
93 : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
94 bool runOnFunction(Function &F);
95
96private:
97 DominatorTree *DT = nullptr;
98 const TargetLowering *TLI = nullptr;
99
100 /// The maximum supported interleave factor.
101 unsigned MaxFactor = 0u;
102
103 /// Transform an interleaved load into target specific intrinsics.
104 bool lowerInterleavedLoad(Instruction *Load,
105 SmallSetVector<Instruction *, 32> &DeadInsts);
106
107 /// Transform an interleaved store into target specific intrinsics.
108 bool lowerInterleavedStore(Instruction *Store,
109 SmallSetVector<Instruction *, 32> &DeadInsts);
110
111 /// Transform a load and a deinterleave intrinsic into target specific
112 /// instructions.
113 bool lowerDeinterleaveIntrinsic(IntrinsicInst *II,
114 SmallSetVector<Instruction *, 32> &DeadInsts);
115
116 /// Transform an interleave intrinsic and a store into target specific
117 /// instructions.
118 bool lowerInterleaveIntrinsic(IntrinsicInst *II,
119 SmallSetVector<Instruction *, 32> &DeadInsts);
120
121 /// Returns true if the uses of an interleaved load by the
122 /// extractelement instructions in \p Extracts can be replaced by uses of the
123 /// shufflevector instructions in \p Shuffles instead. If so, the necessary
124 /// replacements are also performed.
125 bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
127
128 /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
129 /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
130 /// interleaving load. Any newly created shuffles that operate on \p LI will
131 /// be added to \p Shuffles. Returns true, if any changes to the IR have been
132 /// made.
133 bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
134 SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
135 Instruction *LI);
136};
137
138class InterleavedAccess : public FunctionPass {
139 InterleavedAccessImpl Impl;
140
141public:
142 static char ID;
143
144 InterleavedAccess() : FunctionPass(ID) {}
145
146 StringRef getPassName() const override { return "Interleaved Access Pass"; }
147
148 bool runOnFunction(Function &F) override;
149
150 void getAnalysisUsage(AnalysisUsage &AU) const override {
151 AU.addRequired<DominatorTreeWrapperPass>();
152 AU.setPreservesCFG();
153 }
154};
155
156} // end anonymous namespace.
157
160 auto *DT = &FAM.getResult<DominatorTreeAnalysis>(F);
161 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
162 InterleavedAccessImpl Impl(DT, TLI);
163 bool Changed = Impl.runOnFunction(F);
164
165 if (!Changed)
166 return PreservedAnalyses::all();
167
170 return PA;
171}
172
173char InterleavedAccess::ID = 0;
174
175bool InterleavedAccess::runOnFunction(Function &F) {
176 if (skipFunction(F))
177 return false;
178
179 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
180 if (!TPC || !LowerInterleavedAccesses)
181 return false;
182
183 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
184
185 Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
186 auto &TM = TPC->getTM<TargetMachine>();
187 Impl.TLI = TM.getSubtargetImpl(F)->getTargetLowering();
188 Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
189
190 return Impl.runOnFunction(F);
191}
192
194 "Lower interleaved memory accesses to target specific intrinsics", false,
195 false)
198 "Lower interleaved memory accesses to target specific intrinsics", false,
199 false)
200
202 return new InterleavedAccess();
203}
204
205/// Check if the mask is a DE-interleave mask for an interleaved load.
206///
207/// E.g. DE-interleave masks (Factor = 2) could be:
208/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
209/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
210static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
211 unsigned &Index, unsigned MaxFactor,
212 unsigned NumLoadElements) {
213 if (Mask.size() < 2)
214 return false;
215
216 // Check potential Factors.
217 for (Factor = 2; Factor <= MaxFactor; Factor++) {
218 // Make sure we don't produce a load wider than the input load.
219 if (Mask.size() * Factor > NumLoadElements)
220 return false;
221 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index))
222 return true;
223 }
224
225 return false;
226}
227
228/// Check if the mask can be used in an interleaved store.
229//
230/// It checks for a more general pattern than the RE-interleave mask.
231/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
232/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
233/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
234/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
235///
236/// The particular case of an RE-interleave mask is:
237/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
238/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
239static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
240 unsigned MaxFactor) {
241 unsigned NumElts = SVI->getShuffleMask().size();
242 if (NumElts < 4)
243 return false;
244
245 // Check potential Factors.
246 for (Factor = 2; Factor <= MaxFactor; Factor++) {
247 if (SVI->isInterleave(Factor))
248 return true;
249 }
250
251 return false;
252}
253
255 switch (II->getIntrinsicID()) {
256 default:
257 llvm_unreachable("Unexpected intrinsic");
258 case Intrinsic::vp_load:
259 case Intrinsic::masked_load:
260 return II->getOperand(1);
261 case Intrinsic::vp_store:
262 case Intrinsic::masked_store:
263 return II->getOperand(2);
264 }
265}
266
267// Return a pair of
268// (1) The corresponded deinterleaved mask, or nullptr if there is no valid
269// mask.
270// (2) Some mask effectively skips a certain field, and this element is a mask
271// in which inactive lanes represent fields that are skipped (i.e. "gaps").
272static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
273 ElementCount LeafValueEC);
274
275static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
276 VectorType *LeafValueTy) {
277 return getMask(WideMask, Factor, LeafValueTy->getElementCount());
278}
279
280bool InterleavedAccessImpl::lowerInterleavedLoad(
282 if (isa<ScalableVectorType>(Load->getType()))
283 return false;
284
285 auto *LI = dyn_cast<LoadInst>(Load);
286 auto *II = dyn_cast<IntrinsicInst>(Load);
287 if (!LI && !II)
288 return false;
289
290 if (LI && !LI->isSimple())
291 return false;
292
293 // Check if all users of this load are shufflevectors. If we encounter any
294 // users that are extractelement instructions or binary operators, we save
295 // them to later check if they can be modified to extract from one of the
296 // shufflevectors instead of the load.
297
300 // BinOpShuffles need to be handled a single time in case both operands of the
301 // binop are the same load.
303
304 for (auto *User : Load->users()) {
305 auto *Extract = dyn_cast<ExtractElementInst>(User);
306 if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
307 Extracts.push_back(Extract);
308 continue;
309 }
310 if (auto *BI = dyn_cast<BinaryOperator>(User)) {
311 using namespace PatternMatch;
312 if (!BI->user_empty() &&
313 all_of(BI->users(), match_fn(m_Shuffle(m_Value(), m_Undef())))) {
314 for (auto *SVI : BI->users())
315 BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
316 continue;
317 }
318 }
320 if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
321 return false;
322
323 Shuffles.push_back(SVI);
324 }
325
326 if (Shuffles.empty() && BinOpShuffles.empty())
327 return false;
328
329 unsigned Factor, Index;
330
331 unsigned NumLoadElements =
332 cast<FixedVectorType>(Load->getType())->getNumElements();
333 auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
334 // Check if the first shufflevector is DE-interleave shuffle.
335 if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
336 NumLoadElements))
337 return false;
338
339 // Holds the corresponding index for each DE-interleave shuffle.
341
342 VectorType *VecTy = cast<VectorType>(FirstSVI->getType());
343
344 // Check if other shufflevectors are also DE-interleaved of the same type
345 // and factor as the first shufflevector.
346 for (auto *Shuffle : Shuffles) {
347 if (Shuffle->getType() != VecTy)
348 return false;
350 Shuffle->getShuffleMask(), Factor, Index))
351 return false;
352
353 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
354 Indices.push_back(Index);
355 }
356 for (auto *Shuffle : BinOpShuffles) {
357 if (Shuffle->getType() != VecTy)
358 return false;
360 Shuffle->getShuffleMask(), Factor, Index))
361 return false;
362
363 assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
364
365 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == Load)
366 Indices.push_back(Index);
367 if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == Load)
368 Indices.push_back(Index);
369 }
370
371 // Try and modify users of the load that are extractelement instructions to
372 // use the shufflevector instructions instead of the load.
373 if (!tryReplaceExtracts(Extracts, Shuffles))
374 return false;
375
376 bool BinOpShuffleChanged =
377 replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
378
379 Value *Mask = nullptr;
380 auto GapMask = APInt::getAllOnes(Factor);
381 if (LI) {
382 LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
383 } else {
384 // Check mask operand. Handle both all-true/false and interleaved mask.
385 std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor, VecTy);
386 if (!Mask)
387 return false;
388
389 LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load or masked.load: "
390 << *Load << "\n");
391 LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
392 << " and actual factor " << GapMask.popcount() << "\n");
393 }
394
395 // Try to create target specific intrinsics to replace the load and
396 // shuffles.
397 if (!TLI->lowerInterleavedLoad(cast<Instruction>(Load), Mask, Shuffles,
398 Indices, Factor, GapMask))
399 // If Extracts is not empty, tryReplaceExtracts made changes earlier.
400 return !Extracts.empty() || BinOpShuffleChanged;
401
402 DeadInsts.insert_range(Shuffles);
403
404 DeadInsts.insert(Load);
405 return true;
406}
407
408bool InterleavedAccessImpl::replaceBinOpShuffles(
409 ArrayRef<ShuffleVectorInst *> BinOpShuffles,
411 for (auto *SVI : BinOpShuffles) {
412 BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
413 Type *BIOp0Ty = BI->getOperand(0)->getType();
414 ArrayRef<int> Mask = SVI->getShuffleMask();
415 assert(all_of(Mask, [&](int Idx) {
416 return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
417 }));
418
419 BasicBlock::iterator insertPos = SVI->getIterator();
420 auto *NewSVI1 =
421 new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
422 Mask, SVI->getName(), insertPos);
423 auto *NewSVI2 = new ShuffleVectorInst(
424 BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
425 SVI->getName(), insertPos);
427 BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), insertPos);
428 SVI->replaceAllUsesWith(NewBI);
429 LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
430 << "\n With : " << *NewSVI1 << "\n And : "
431 << *NewSVI2 << "\n And : " << *NewBI << "\n");
433 if (NewSVI1->getOperand(0) == Load)
434 Shuffles.push_back(NewSVI1);
435 if (NewSVI2->getOperand(0) == Load)
436 Shuffles.push_back(NewSVI2);
437 }
438
439 return !BinOpShuffles.empty();
440}
441
442bool InterleavedAccessImpl::tryReplaceExtracts(
445 // If there aren't any extractelement instructions to modify, there's nothing
446 // to do.
447 if (Extracts.empty())
448 return true;
449
450 // Maps extractelement instructions to vector-index pairs. The extractlement
451 // instructions will be modified to use the new vector and index operands.
453
454 for (auto *Extract : Extracts) {
455 // The vector index that is extracted.
456 auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
457 auto Index = IndexOperand->getSExtValue();
458
459 // Look for a suitable shufflevector instruction. The goal is to modify the
460 // extractelement instruction (which uses an interleaved load) to use one
461 // of the shufflevector instructions instead of the load.
462 for (auto *Shuffle : Shuffles) {
463 // If the shufflevector instruction doesn't dominate the extract, we
464 // can't create a use of it.
465 if (!DT->dominates(Shuffle, Extract))
466 continue;
467
468 // Inspect the indices of the shufflevector instruction. If the shuffle
469 // selects the same index that is extracted, we can modify the
470 // extractelement instruction.
471 SmallVector<int, 4> Indices;
472 Shuffle->getShuffleMask(Indices);
473 for (unsigned I = 0; I < Indices.size(); ++I)
474 if (Indices[I] == Index) {
475 assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
476 "Vector operations do not match");
477 ReplacementMap[Extract] = std::make_pair(Shuffle, I);
478 break;
479 }
480
481 // If we found a suitable shufflevector instruction, stop looking.
482 if (ReplacementMap.count(Extract))
483 break;
484 }
485
486 // If we did not find a suitable shufflevector instruction, the
487 // extractelement instruction cannot be modified, so we must give up.
488 if (!ReplacementMap.count(Extract))
489 return false;
490 }
491
492 // Finally, perform the replacements.
493 IRBuilder<> Builder(Extracts[0]->getContext());
494 for (auto &Replacement : ReplacementMap) {
495 auto *Extract = Replacement.first;
496 auto *Vector = Replacement.second.first;
497 auto Index = Replacement.second.second;
498 Builder.SetInsertPoint(Extract);
499 Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
500 Extract->eraseFromParent();
501 }
502
503 return true;
504}
505
506bool InterleavedAccessImpl::lowerInterleavedStore(
508 Value *StoredValue;
509 auto *SI = dyn_cast<StoreInst>(Store);
510 auto *II = dyn_cast<IntrinsicInst>(Store);
511 if (SI) {
512 if (!SI->isSimple())
513 return false;
514 StoredValue = SI->getValueOperand();
515 } else {
516 assert(II->getIntrinsicID() == Intrinsic::vp_store ||
517 II->getIntrinsicID() == Intrinsic::masked_store);
518 StoredValue = II->getArgOperand(0);
519 }
520
521 auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
522 if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
523 return false;
524
525 unsigned NumStoredElements =
526 cast<FixedVectorType>(SVI->getType())->getNumElements();
527 // Check if the shufflevector is RE-interleave shuffle.
528 unsigned Factor;
529 if (!isReInterleaveMask(SVI, Factor, MaxFactor))
530 return false;
531 assert(NumStoredElements % Factor == 0 &&
532 "number of stored element should be a multiple of Factor");
533
534 Value *Mask = nullptr;
535 auto GapMask = APInt::getAllOnes(Factor);
536 if (SI) {
537 LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
538 } else {
539 // Check mask operand. Handle both all-true/false and interleaved mask.
540 unsigned LaneMaskLen = NumStoredElements / Factor;
541 std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor,
542 ElementCount::getFixed(LaneMaskLen));
543 if (!Mask)
544 return false;
545
546 LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
547 << *Store << "\n");
548 LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
549 << " and actual factor " << GapMask.popcount() << "\n");
550 }
551
552 // Try to create target specific intrinsics to replace the store and
553 // shuffle.
554 if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
555 return false;
556
557 // Already have a new target specific interleaved store. Erase the old store.
558 DeadInsts.insert(Store);
559 DeadInsts.insert(SVI);
560 return true;
561}
562
563// A wide mask <1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0> could be used to skip the
564// last field in a factor-of-three interleaved store or deinterleaved load (in
565// which case LeafMaskLen is 4). Such (wide) mask is also known as gap mask.
566// This helper function tries to detect this pattern and return the actual
567// factor we're accessing, which is 2 in this example.
568static void getGapMask(const Constant &MaskConst, unsigned Factor,
569 unsigned LeafMaskLen, APInt &GapMask) {
570 assert(GapMask.getBitWidth() == Factor);
571 for (unsigned F = 0U; F < Factor; ++F) {
572 bool AllZero = true;
573 for (unsigned Idx = 0U; Idx < LeafMaskLen; ++Idx) {
574 Constant *C = MaskConst.getAggregateElement(F + Idx * Factor);
575 if (!C->isZeroValue()) {
576 AllZero = false;
577 break;
578 }
579 }
580 // All mask bits on this field are zero, skipping it.
581 if (AllZero)
582 GapMask.clearBit(F);
583 }
584}
585
586static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
587 ElementCount LeafValueEC) {
588 auto GapMask = APInt::getAllOnes(Factor);
589
590 if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
591 if (unsigned F = getInterleaveIntrinsicFactor(IMI->getIntrinsicID());
592 F && F == Factor) {
593 Value *RefArg = nullptr;
594 // Check if all the intrinsic arguments are the same, except those that
595 // are zeros, which we mark as gaps in the gap mask.
596 for (auto [Idx, Arg] : enumerate(IMI->args())) {
597 if (auto *C = dyn_cast<Constant>(Arg); C && C->isZeroValue()) {
598 GapMask.clearBit(Idx);
599 continue;
600 }
601
602 if (!RefArg)
603 RefArg = Arg;
604 else if (RefArg != Arg)
605 return {nullptr, GapMask};
606 }
607
608 // In a very rare occasion, all the intrinsic arguments might be zeros,
609 // in which case we still want to return an all-zeros constant instead of
610 // nullptr.
611 return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
612 }
613 }
614
615 // Masks that are assembled from bitwise AND.
616 if (auto *AndOp = dyn_cast<BinaryOperator>(WideMask);
617 AndOp && AndOp->getOpcode() == Instruction::And) {
618 auto [MaskLHS, GapMaskLHS] =
619 getMask(AndOp->getOperand(0), Factor, LeafValueEC);
620 auto [MaskRHS, GapMaskRHS] =
621 getMask(AndOp->getOperand(1), Factor, LeafValueEC);
622 if (!MaskLHS || !MaskRHS)
623 return {nullptr, GapMask};
624 // Using IRBuilder here so that any trivial constants could be folded right
625 // away.
626 return {IRBuilder<>(AndOp).CreateAnd(MaskLHS, MaskRHS),
627 GapMaskLHS & GapMaskRHS};
628 }
629
630 if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
631 if (auto *Splat = ConstMask->getSplatValue())
632 // All-ones or all-zeros mask.
633 return {ConstantVector::getSplat(LeafValueEC, Splat), GapMask};
634
635 if (LeafValueEC.isFixed()) {
636 unsigned LeafMaskLen = LeafValueEC.getFixedValue();
637 // First, check if we use a gap mask to skip some of the factors / fields.
638 getGapMask(*ConstMask, Factor, LeafMaskLen, GapMask);
639
640 SmallVector<Constant *, 8> LeafMask(LeafMaskLen, nullptr);
641 // If this is a fixed-length constant mask, each lane / leaf has to
642 // use the same mask. This is done by checking if every group with Factor
643 // number of elements in the interleaved mask has homogeneous values.
644 for (unsigned Idx = 0U; Idx < LeafMaskLen * Factor; ++Idx) {
645 if (!GapMask[Idx % Factor])
646 continue;
647 Constant *C = ConstMask->getAggregateElement(Idx);
648 if (LeafMask[Idx / Factor] && LeafMask[Idx / Factor] != C)
649 return {nullptr, GapMask};
650 LeafMask[Idx / Factor] = C;
651 }
652
653 return {ConstantVector::get(LeafMask), GapMask};
654 }
655 }
656
657 if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
658 Type *Op1Ty = SVI->getOperand(1)->getType();
659 if (!isa<FixedVectorType>(Op1Ty))
660 return {nullptr, GapMask};
661
662 // Check that the shuffle mask is: a) an interleave, b) all of the same
663 // set of the elements, and c) contained by the first source. (c) could
664 // be relaxed if desired.
665 unsigned NumSrcElts =
666 cast<FixedVectorType>(SVI->getOperand(1)->getType())->getNumElements();
667 SmallVector<unsigned> StartIndexes;
668 if (ShuffleVectorInst::isInterleaveMask(SVI->getShuffleMask(), Factor,
669 NumSrcElts * 2, StartIndexes) &&
670 llvm::all_of(StartIndexes, equal_to(0)) &&
671 llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](int Idx) {
672 return Idx < (int)NumSrcElts;
673 })) {
674 auto *LeafMaskTy =
675 VectorType::get(Type::getInt1Ty(SVI->getContext()), LeafValueEC);
676 IRBuilder<> Builder(SVI);
677 return {Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0),
678 uint64_t(0)),
679 GapMask};
680 }
681 }
682
683 return {nullptr, GapMask};
684}
685
686bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
688 Instruction *LoadedVal = dyn_cast<Instruction>(DI->getOperand(0));
689 if (!LoadedVal || !LoadedVal->hasOneUse())
690 return false;
691
692 auto *LI = dyn_cast<LoadInst>(LoadedVal);
693 auto *II = dyn_cast<IntrinsicInst>(LoadedVal);
694 if (!LI && !II)
695 return false;
696
697 const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
698 assert(Factor && "unexpected deinterleave intrinsic");
699
700 Value *Mask = nullptr;
701 if (LI) {
702 if (!LI->isSimple())
703 return false;
704
705 LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
706 << " and factor = " << Factor << "\n");
707 } else {
708 assert(II);
709 if (II->getIntrinsicID() != Intrinsic::masked_load &&
710 II->getIntrinsicID() != Intrinsic::vp_load)
711 return false;
712
713 // Check mask operand. Handle both all-true/false and interleaved mask.
714 APInt GapMask(Factor, 0);
715 std::tie(Mask, GapMask) =
717 if (!Mask)
718 return false;
719 // We haven't supported gap mask if it's deinterleaving using intrinsics.
720 // Yet it is possible that we already changed the IR, hence returning true
721 // here.
722 if (GapMask.popcount() != Factor)
723 return true;
724
725 LLVM_DEBUG(dbgs() << "IA: Found a vp.load or masked.load with deinterleave"
726 << " intrinsic " << *DI << " and factor = "
727 << Factor << "\n");
728 }
729
730 // Try and match this with target specific intrinsics.
731 if (!TLI->lowerDeinterleaveIntrinsicToLoad(LoadedVal, Mask, DI))
732 return false;
733
734 DeadInsts.insert(DI);
735 // We now have a target-specific load, so delete the old one.
736 DeadInsts.insert(LoadedVal);
737 return true;
738}
739
740bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
742 if (!IntII->hasOneUse())
743 return false;
744 Instruction *StoredBy = dyn_cast<Instruction>(IntII->user_back());
745 if (!StoredBy)
746 return false;
747 auto *SI = dyn_cast<StoreInst>(StoredBy);
748 auto *II = dyn_cast<IntrinsicInst>(StoredBy);
749 if (!SI && !II)
750 return false;
751
752 SmallVector<Value *, 8> InterleaveValues(IntII->args());
753 const unsigned Factor = getInterleaveIntrinsicFactor(IntII->getIntrinsicID());
754 assert(Factor && "unexpected interleave intrinsic");
755
756 Value *Mask = nullptr;
757 if (II) {
758 if (II->getIntrinsicID() != Intrinsic::masked_store &&
759 II->getIntrinsicID() != Intrinsic::vp_store)
760 return false;
761 // Check mask operand. Handle both all-true/false and interleaved mask.
762 APInt GapMask(Factor, 0);
763 std::tie(Mask, GapMask) =
764 getMask(getMaskOperand(II), Factor,
765 cast<VectorType>(InterleaveValues[0]->getType()));
766 if (!Mask)
767 return false;
768 // We haven't supported gap mask if it's interleaving using intrinsics. Yet
769 // it is possible that we already changed the IR, hence returning true here.
770 if (GapMask.popcount() != Factor)
771 return true;
772
773 LLVM_DEBUG(dbgs() << "IA: Found a vp.store or masked.store with interleave"
774 << " intrinsic " << *IntII << " and factor = "
775 << Factor << "\n");
776 } else {
777 if (!SI->isSimple())
778 return false;
779
780 LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic "
781 << *IntII << " and factor = " << Factor << "\n");
782 }
783
784 // Try and match this with target specific intrinsics.
785 if (!TLI->lowerInterleaveIntrinsicToStore(StoredBy, Mask, InterleaveValues))
786 return false;
787
788 // We now have a target-specific store, so delete the old one.
789 DeadInsts.insert(StoredBy);
790 DeadInsts.insert(IntII);
791 return true;
792}
793
794bool InterleavedAccessImpl::runOnFunction(Function &F) {
795 // Holds dead instructions that will be erased later.
797 bool Changed = false;
798
799 using namespace PatternMatch;
800 for (auto &I : instructions(F)) {
804 Changed |= lowerInterleavedLoad(&I, DeadInsts);
805
809 Changed |= lowerInterleavedStore(&I, DeadInsts);
810
811 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
812 if (getDeinterleaveIntrinsicFactor(II->getIntrinsicID()))
813 Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
814 else if (getInterleaveIntrinsicFactor(II->getIntrinsicID()))
815 Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
816 }
817 }
818
819 for (auto *I : DeadInsts)
820 I->eraseFromParent();
821
822 return Changed;
823}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static bool isDeInterleaveMask(ArrayRef< int > Mask, unsigned &Factor, unsigned &Index, unsigned MaxFactor, unsigned NumLoadElements)
Check if the mask is a DE-interleave mask for an interleaved load.
static void getGapMask(const Constant &MaskConst, unsigned Factor, unsigned LeafMaskLen, APInt &GapMask)
static cl::opt< bool > LowerInterleavedAccesses("lower-interleaved-accesses", cl::desc("Enable lowering interleaved accesses to intrinsics"), cl::init(true), cl::Hidden)
static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, unsigned MaxFactor)
Check if the mask can be used in an interleaved store.
static Value * getMaskOperand(IntrinsicInst *II)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
This file contains the declaration of the InterleavedAccessPass class, its corresponding pass name is...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1415
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:219
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2776
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
void insert_range(Range &&R)
Definition SetVector.h:176
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
LLVM_ABI bool isInterleave(unsigned Factor)
Return if this shuffle interleaves its two input vectors together.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:293
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
TwoOps_match< ValueOpTy, PointerOpTy, Instruction::Store > m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp)
Matches StoreInst.
bool match(Val *V, const Pattern &P)
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
Context & getContext() const
Definition BasicBlock.h:99
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2163
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI FunctionPass * createInterleavedAccessPass()
InterleavedAccess Pass - This pass identifies and matches interleaved memory accesses to target speci...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.