LLVM 23.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
23#include "llvm/IR/Constants.h"
25#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Value.h"
30
31#define DEBUG_TYPE "vectorutils"
32
33using namespace llvm;
34using namespace llvm::PatternMatch;
35
36/// Maximum factor for an interleaved memory access.
38 "max-interleave-group-factor", cl::Hidden,
39 cl::desc("Maximum factor for an interleaved access group (default = 8)"),
40 cl::init(8));
41
42/// Return true if all of the intrinsic's arguments and return type are scalars
43/// for the scalar form of the intrinsic, and vectors for the vector form of the
44/// intrinsic (except operands that are marked as always being scalar by
45/// isVectorIntrinsicWithScalarOpAtArg).
47 switch (ID) {
48 case Intrinsic::abs: // Begin integer bit-manipulation.
49 case Intrinsic::bswap:
50 case Intrinsic::bitreverse:
51 case Intrinsic::ctpop:
52 case Intrinsic::ctlz:
53 case Intrinsic::cttz:
54 case Intrinsic::fshl:
55 case Intrinsic::fshr:
56 case Intrinsic::smax:
57 case Intrinsic::smin:
58 case Intrinsic::umax:
59 case Intrinsic::umin:
60 case Intrinsic::sadd_sat:
61 case Intrinsic::ssub_sat:
62 case Intrinsic::uadd_sat:
63 case Intrinsic::usub_sat:
64 case Intrinsic::smul_fix:
65 case Intrinsic::smul_fix_sat:
66 case Intrinsic::umul_fix:
67 case Intrinsic::umul_fix_sat:
68 case Intrinsic::uadd_with_overflow:
69 case Intrinsic::sadd_with_overflow:
70 case Intrinsic::usub_with_overflow:
71 case Intrinsic::ssub_with_overflow:
72 case Intrinsic::umul_with_overflow:
73 case Intrinsic::smul_with_overflow:
74 case Intrinsic::sqrt: // Begin floating-point.
75 case Intrinsic::asin:
76 case Intrinsic::acos:
77 case Intrinsic::atan:
78 case Intrinsic::atan2:
79 case Intrinsic::sin:
80 case Intrinsic::cos:
81 case Intrinsic::sincos:
82 case Intrinsic::sincospi:
83 case Intrinsic::tan:
84 case Intrinsic::sinh:
85 case Intrinsic::cosh:
86 case Intrinsic::tanh:
87 case Intrinsic::exp:
88 case Intrinsic::exp10:
89 case Intrinsic::exp2:
90 case Intrinsic::frexp:
91 case Intrinsic::ldexp:
92 case Intrinsic::log:
93 case Intrinsic::log10:
94 case Intrinsic::log2:
95 case Intrinsic::fabs:
96 case Intrinsic::minnum:
97 case Intrinsic::maxnum:
98 case Intrinsic::minimum:
99 case Intrinsic::maximum:
100 case Intrinsic::minimumnum:
101 case Intrinsic::maximumnum:
102 case Intrinsic::modf:
103 case Intrinsic::copysign:
104 case Intrinsic::floor:
105 case Intrinsic::ceil:
106 case Intrinsic::trunc:
107 case Intrinsic::rint:
108 case Intrinsic::nearbyint:
109 case Intrinsic::round:
110 case Intrinsic::roundeven:
111 case Intrinsic::pow:
112 case Intrinsic::fma:
113 case Intrinsic::fmuladd:
114 case Intrinsic::is_fpclass:
115 case Intrinsic::powi:
116 case Intrinsic::canonicalize:
117 case Intrinsic::fptosi_sat:
118 case Intrinsic::fptoui_sat:
119 case Intrinsic::lround:
120 case Intrinsic::llround:
121 case Intrinsic::lrint:
122 case Intrinsic::llrint:
123 case Intrinsic::ucmp:
124 case Intrinsic::scmp:
125 case Intrinsic::clmul:
126 return true;
127 default:
128 return false;
129 }
130}
131
138
139/// Identifies if the vector form of the intrinsic has a scalar operand.
141 unsigned ScalarOpdIdx,
142 const TargetTransformInfo *TTI) {
143
145 return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);
146
147 // Vector predication intrinsics have the EVL as the last operand.
148 if (VPIntrinsic::getVectorLengthParamPos(ID) == ScalarOpdIdx)
149 return true;
150
151 switch (ID) {
152 case Intrinsic::abs:
153 case Intrinsic::vp_abs:
154 case Intrinsic::ctlz:
155 case Intrinsic::vp_ctlz:
156 case Intrinsic::cttz:
157 case Intrinsic::vp_cttz:
158 case Intrinsic::is_fpclass:
159 case Intrinsic::vp_is_fpclass:
160 case Intrinsic::powi:
161 case Intrinsic::vector_extract:
162 return (ScalarOpdIdx == 1);
163 case Intrinsic::smul_fix:
164 case Intrinsic::smul_fix_sat:
165 case Intrinsic::umul_fix:
166 case Intrinsic::umul_fix_sat:
167 return (ScalarOpdIdx == 2);
168 case Intrinsic::experimental_vp_splice:
169 return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
170 default:
171 return false;
172 }
173}
174
176 Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI) {
177 assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!");
178
180 return TTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);
181
183 return OpdIdx == -1 || OpdIdx == 0;
184
185 switch (ID) {
186 case Intrinsic::fptosi_sat:
187 case Intrinsic::fptoui_sat:
188 case Intrinsic::lround:
189 case Intrinsic::llround:
190 case Intrinsic::lrint:
191 case Intrinsic::llrint:
192 case Intrinsic::vp_lrint:
193 case Intrinsic::vp_llrint:
194 case Intrinsic::ucmp:
195 case Intrinsic::scmp:
196 case Intrinsic::vector_extract:
197 return OpdIdx == -1 || OpdIdx == 0;
198 case Intrinsic::modf:
199 case Intrinsic::sincos:
200 case Intrinsic::sincospi:
201 case Intrinsic::is_fpclass:
202 case Intrinsic::vp_is_fpclass:
203 return OpdIdx == 0;
204 case Intrinsic::powi:
205 case Intrinsic::ldexp:
206 return OpdIdx == -1 || OpdIdx == 1;
207 default:
208 return OpdIdx == -1;
209 }
210}
211
213 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI) {
214
216 return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);
217
218 switch (ID) {
219 case Intrinsic::frexp:
220 return RetIdx == 0 || RetIdx == 1;
221 default:
222 return RetIdx == 0;
223 }
224}
225
226/// Returns intrinsic ID for call.
227/// For the input call instruction it finds mapping intrinsic and returns
228/// its ID, in case it does not found it return not_intrinsic.
230 const TargetLibraryInfo *TLI) {
234
235 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
236 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
237 ID == Intrinsic::experimental_noalias_scope_decl ||
238 ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
239 return ID;
241}
242
244 switch (ID) {
245 case Intrinsic::vector_interleave2:
246 return 2;
247 case Intrinsic::vector_interleave3:
248 return 3;
249 case Intrinsic::vector_interleave4:
250 return 4;
251 case Intrinsic::vector_interleave5:
252 return 5;
253 case Intrinsic::vector_interleave6:
254 return 6;
255 case Intrinsic::vector_interleave7:
256 return 7;
257 case Intrinsic::vector_interleave8:
258 return 8;
259 default:
260 return 0;
261 }
262}
263
265 switch (ID) {
266 case Intrinsic::vector_deinterleave2:
267 return 2;
268 case Intrinsic::vector_deinterleave3:
269 return 3;
270 case Intrinsic::vector_deinterleave4:
271 return 4;
272 case Intrinsic::vector_deinterleave5:
273 return 5;
274 case Intrinsic::vector_deinterleave6:
275 return 6;
276 case Intrinsic::vector_deinterleave7:
277 return 7;
278 case Intrinsic::vector_deinterleave8:
279 return 8;
280 default:
281 return 0;
282 }
283}
284
286 [[maybe_unused]] unsigned Factor =
288 ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();
289 assert(Factor && Factor == DISubtypes.size() &&
290 "unexpected deinterleave factor or result type");
291 return cast<VectorType>(DISubtypes[0]);
292}
293
294/// Given a vector and an element number, see if the scalar value is
295/// already around as a register, for example if it were inserted then extracted
296/// from the vector.
297Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
298 assert(V->getType()->isVectorTy() && "Not looking at a vector?");
299 VectorType *VTy = cast<VectorType>(V->getType());
300 // For fixed-length vector, return poison for out of range access.
301 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
302 unsigned Width = FVTy->getNumElements();
303 if (EltNo >= Width)
304 return PoisonValue::get(FVTy->getElementType());
305 }
306
307 if (Constant *C = dyn_cast<Constant>(V))
308 return C->getAggregateElement(EltNo);
309
311 // If this is an insert to a variable element, we don't know what it is.
312 uint64_t IIElt;
313 if (!match(III->getOperand(2), m_ConstantInt(IIElt)))
314 return nullptr;
315
316 // If this is an insert to the element we are looking for, return the
317 // inserted value.
318 if (EltNo == IIElt)
319 return III->getOperand(1);
320
321 // Guard against infinite loop on malformed, unreachable IR.
322 if (III == III->getOperand(0))
323 return nullptr;
324
325 // Otherwise, the insertelement doesn't modify the value, recurse on its
326 // vector input.
327 return findScalarElement(III->getOperand(0), EltNo);
328 }
329
331 // Restrict the following transformation to fixed-length vector.
332 if (SVI && isa<FixedVectorType>(SVI->getType())) {
333 unsigned LHSWidth =
334 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
335 int InEl = SVI->getMaskValue(EltNo);
336 if (InEl < 0)
337 return PoisonValue::get(VTy->getElementType());
338 if (InEl < (int)LHSWidth)
339 return findScalarElement(SVI->getOperand(0), InEl);
340 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
341 }
342
343 // Extract a value from a vector add operation with a constant zero.
344 // TODO: Use getBinOpIdentity() to generalize this.
345 Value *Val; Constant *C;
346 if (match(V, m_Add(m_Value(Val), m_Constant(C))))
347 if (Constant *Elt = C->getAggregateElement(EltNo))
348 if (Elt->isNullValue())
349 return findScalarElement(Val, EltNo);
350
351 // If the vector is a splat then we can trivially find the scalar element.
353 if (Value *Splat = getSplatValue(V))
354 if (EltNo < VTy->getElementCount().getKnownMinValue())
355 return Splat;
356
357 // Otherwise, we don't know.
358 return nullptr;
359}
360
362 int SplatIndex = -1;
363 for (int M : Mask) {
364 // Ignore invalid (undefined) mask elements.
365 if (M < 0)
366 continue;
367
368 // There can be only 1 non-negative mask element value if this is a splat.
369 if (SplatIndex != -1 && SplatIndex != M)
370 return -1;
371
372 // Initialize the splat index to the 1st non-negative mask element.
373 SplatIndex = M;
374 }
375 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
376 return SplatIndex;
377}
378
379/// Get splat value if the input is a splat vector or return nullptr.
380/// This function is not fully general. It checks only 2 cases:
381/// the input value is (1) a splat constant vector or (2) a sequence
382/// of instructions that broadcasts a scalar at element 0.
384 if (isa<VectorType>(V->getType()))
385 if (auto *C = dyn_cast<Constant>(V))
386 return C->getSplatValue();
387
388 // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
389 Value *Splat;
390 if (match(V,
392 m_Value(), m_ZeroMask())))
393 return Splat;
394
395 return nullptr;
396}
397
398bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
399 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
400
401 if (isa<VectorType>(V->getType())) {
402 if (isa<UndefValue>(V))
403 return true;
404 // FIXME: We can allow undefs, but if Index was specified, we may want to
405 // check that the constant is defined at that index.
406 if (auto *C = dyn_cast<Constant>(V))
407 return C->getSplatValue() != nullptr;
408 }
409
410 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
411 // FIXME: We can safely allow undefs here. If Index was specified, we will
412 // check that the mask elt is defined at the required index.
413 if (!all_equal(Shuf->getShuffleMask()))
414 return false;
415
416 // Match any index.
417 if (Index == -1)
418 return true;
419
420 // Match a specific element. The mask should be defined at and match the
421 // specified index.
422 return Shuf->getMaskValue(Index) == Index;
423 }
424
425 // The remaining tests are all recursive, so bail out if we hit the limit.
427 return false;
428
429 // If both operands of a binop are splats, the result is a splat.
430 Value *X, *Y, *Z;
431 if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
432 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
433
434 // If all operands of a select are splats, the result is a splat.
435 if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
436 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
437 isSplatValue(Z, Index, Depth);
438
439 // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
440
441 return false;
442}
443
445 const APInt &DemandedElts, APInt &DemandedLHS,
446 APInt &DemandedRHS, bool AllowUndefElts) {
447 DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);
448
449 // Early out if we don't demand any elements.
450 if (DemandedElts.isZero())
451 return true;
452
453 // Simple case of a shuffle with zeroinitializer.
454 if (all_of(Mask, equal_to(0))) {
455 DemandedLHS.setBit(0);
456 return true;
457 }
458
459 for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
460 int M = Mask[I];
461 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
462 "Invalid shuffle mask constant");
463
464 if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))
465 continue;
466
467 // For undef elements, we don't know anything about the common state of
468 // the shuffle result.
469 if (M < 0)
470 return false;
471
472 if (M < SrcWidth)
473 DemandedLHS.setBit(M);
474 else
475 DemandedRHS.setBit(M - SrcWidth);
476 }
477
478 return true;
479}
480
482 std::array<std::pair<int, int>, 2> &SrcInfo) {
483 const int SignalValue = NumElts * 2;
484 SrcInfo[0] = {-1, SignalValue};
485 SrcInfo[1] = {-1, SignalValue};
486 for (auto [i, M] : enumerate(Mask)) {
487 if (M < 0)
488 continue;
489 int Src = M >= NumElts;
490 int Diff = (int)i - (M % NumElts);
491 bool Match = false;
492 for (int j = 0; j < 2; j++) {
493 auto &[SrcE, DiffE] = SrcInfo[j];
494 if (SrcE == -1) {
495 assert(DiffE == SignalValue);
496 SrcE = Src;
497 DiffE = Diff;
498 }
499 if (SrcE == Src && DiffE == Diff) {
500 Match = true;
501 break;
502 }
503 }
504 if (!Match)
505 return false;
506 }
507 // Avoid all undef masks
508 return SrcInfo[0].first != -1;
509}
510
512 SmallVectorImpl<int> &ScaledMask) {
513 assert(Scale > 0 && "Unexpected scaling factor");
514
515 // Fast-path: if no scaling, then it is just a copy.
516 if (Scale == 1) {
517 ScaledMask.assign(Mask.begin(), Mask.end());
518 return;
519 }
520
521 ScaledMask.clear();
522 for (int MaskElt : Mask) {
523 if (MaskElt >= 0) {
524 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
525 "Overflowed 32-bits");
526 }
527 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
528 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
529 }
530}
531
533 SmallVectorImpl<int> &ScaledMask) {
534 assert(Scale > 0 && "Unexpected scaling factor");
535
536 // Fast-path: if no scaling, then it is just a copy.
537 if (Scale == 1) {
538 ScaledMask.assign(Mask.begin(), Mask.end());
539 return true;
540 }
541
542 // We must map the original elements down evenly to a type with less elements.
543 int NumElts = Mask.size();
544 if (NumElts % Scale != 0)
545 return false;
546
547 ScaledMask.clear();
548 ScaledMask.reserve(NumElts / Scale);
549
550 // Step through the input mask by splitting into Scale-sized slices.
551 do {
552 ArrayRef<int> MaskSlice = Mask.take_front(Scale);
553 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
554
555 // The first element of the slice determines how we evaluate this slice.
556 int SliceFront = MaskSlice.front();
557 if (SliceFront < 0) {
558 // Negative values (undef or other "sentinel" values) must be equal across
559 // the entire slice.
560 if (!all_equal(MaskSlice))
561 return false;
562 ScaledMask.push_back(SliceFront);
563 } else {
564 // A positive mask element must be cleanly divisible.
565 if (SliceFront % Scale != 0)
566 return false;
567 // Elements of the slice must be consecutive.
568 for (int i = 1; i < Scale; ++i)
569 if (MaskSlice[i] != SliceFront + i)
570 return false;
571 ScaledMask.push_back(SliceFront / Scale);
572 }
573 Mask = Mask.drop_front(Scale);
574 } while (!Mask.empty());
575
576 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
577
578 // All elements of the original mask can be scaled down to map to the elements
579 // of a mask with wider elements.
580 return true;
581}
582
584 SmallVectorImpl<int> &NewMask) {
585 unsigned NumElts = M.size();
586 if (NumElts % 2 != 0)
587 return false;
588
589 NewMask.clear();
590 for (unsigned i = 0; i < NumElts; i += 2) {
591 int M0 = M[i];
592 int M1 = M[i + 1];
593
594 // If both elements are undef, new mask is undef too.
595 if (M0 == -1 && M1 == -1) {
596 NewMask.push_back(-1);
597 continue;
598 }
599
600 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
601 NewMask.push_back(M1 / 2);
602 continue;
603 }
604
605 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
606 NewMask.push_back(M0 / 2);
607 continue;
608 }
609
610 NewMask.clear();
611 return false;
612 }
613
614 assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
615 return true;
616}
617
618bool llvm::scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,
619 SmallVectorImpl<int> &ScaledMask) {
620 unsigned NumSrcElts = Mask.size();
621 assert(NumSrcElts > 0 && NumDstElts > 0 && "Unexpected scaling factor");
622
623 // Fast-path: if no scaling, then it is just a copy.
624 if (NumSrcElts == NumDstElts) {
625 ScaledMask.assign(Mask.begin(), Mask.end());
626 return true;
627 }
628
629 // Ensure we can find a whole scale factor.
630 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
631 "Unexpected scaling factor");
632
633 if (NumSrcElts > NumDstElts) {
634 int Scale = NumSrcElts / NumDstElts;
635 return widenShuffleMaskElts(Scale, Mask, ScaledMask);
636 }
637
638 int Scale = NumDstElts / NumSrcElts;
639 narrowShuffleMaskElts(Scale, Mask, ScaledMask);
640 return true;
641}
642
644 SmallVectorImpl<int> &ScaledMask) {
645 std::array<SmallVector<int, 16>, 2> TmpMasks;
646 SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];
647 ArrayRef<int> InputMask = Mask;
648 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {
649 while (widenShuffleMaskElts(Scale, InputMask, *Output)) {
650 InputMask = *Output;
651 std::swap(Output, Tmp);
652 }
653 }
654 ScaledMask.assign(InputMask.begin(), InputMask.end());
655}
656
658 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
659 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
660 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
661 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
662 ManyInputsAction) {
663 SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
664 // Try to perform better estimation of the permutation.
665 // 1. Split the source/destination vectors into real registers.
666 // 2. Do the mask analysis to identify which real registers are
667 // permuted.
668 int Sz = Mask.size();
669 unsigned SzDest = Sz / NumOfDestRegs;
670 unsigned SzSrc = Sz / NumOfSrcRegs;
671 for (unsigned I = 0; I < NumOfDestRegs; ++I) {
672 auto &RegMasks = Res[I];
673 RegMasks.assign(2 * NumOfSrcRegs, {});
674 // Check that the values in dest registers are in the one src
675 // register.
676 for (unsigned K = 0; K < SzDest; ++K) {
677 int Idx = I * SzDest + K;
678 if (Idx == Sz)
679 break;
680 if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)
681 continue;
682 int MaskIdx = Mask[Idx] % Sz;
683 int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);
684 // Add a cost of PermuteTwoSrc for each new source register permute,
685 // if we have more than one source registers.
686 if (RegMasks[SrcRegIdx].empty())
687 RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);
688 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
689 }
690 }
691 // Process split mask.
692 for (unsigned I : seq<unsigned>(NumOfUsedRegs)) {
693 auto &Dest = Res[I];
694 int NumSrcRegs =
695 count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
696 switch (NumSrcRegs) {
697 case 0:
698 // No input vectors were used!
699 NoInputAction();
700 break;
701 case 1: {
702 // Find the only mask with at least single undef mask elem.
703 auto *It =
704 find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
705 unsigned SrcReg = std::distance(Dest.begin(), It);
706 SingleInputAction(*It, SrcReg, I);
707 break;
708 }
709 default: {
710 // The first mask is a permutation of a single register. Since we have >2
711 // input registers to shuffle, we merge the masks for 2 first registers
712 // and generate a shuffle of 2 registers rather than the reordering of the
713 // first register and then shuffle with the second register. Next,
714 // generate the shuffles of the resulting register + the remaining
715 // registers from the list.
716 auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
717 ArrayRef<int> SecondMask) {
718 for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
719 if (SecondMask[Idx] != PoisonMaskElem) {
720 assert(FirstMask[Idx] == PoisonMaskElem &&
721 "Expected undefined mask element.");
722 FirstMask[Idx] = SecondMask[Idx] + VF;
723 }
724 }
725 };
726 auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
727 for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
728 if (Mask[Idx] != PoisonMaskElem)
729 Mask[Idx] = Idx;
730 }
731 };
732 int SecondIdx;
733 bool NewReg = true;
734 do {
735 int FirstIdx = -1;
736 SecondIdx = -1;
737 MutableArrayRef<int> FirstMask, SecondMask;
738 for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {
739 SmallVectorImpl<int> &RegMask = Dest[I];
740 if (RegMask.empty())
741 continue;
742
743 if (FirstIdx == SecondIdx) {
744 FirstIdx = I;
745 FirstMask = RegMask;
746 continue;
747 }
748 SecondIdx = I;
749 SecondMask = RegMask;
750 CombineMasks(FirstMask, SecondMask);
751 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);
752 NewReg = false;
753 NormalizeMask(FirstMask);
754 RegMask.clear();
755 SecondMask = FirstMask;
756 SecondIdx = FirstIdx;
757 }
758 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
759 CombineMasks(SecondMask, FirstMask);
760 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);
761 NewReg = false;
762 Dest[FirstIdx].clear();
763 NormalizeMask(SecondMask);
764 }
765 } while (SecondIdx >= 0);
766 break;
767 }
768 }
769 }
770}
771
772void llvm::getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,
773 const APInt &DemandedElts,
774 APInt &DemandedLHS,
775 APInt &DemandedRHS) {
776 assert(VectorBitWidth >= 128 && "Vectors smaller than 128 bit not supported");
777 int NumLanes = VectorBitWidth / 128;
778 int NumElts = DemandedElts.getBitWidth();
779 int NumEltsPerLane = NumElts / NumLanes;
780 int HalfEltsPerLane = NumEltsPerLane / 2;
781
782 DemandedLHS = APInt::getZero(NumElts);
783 DemandedRHS = APInt::getZero(NumElts);
784
785 // Map DemandedElts to the horizontal operands.
786 for (int Idx = 0; Idx != NumElts; ++Idx) {
787 if (!DemandedElts[Idx])
788 continue;
789 int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
790 int LocalIdx = Idx % NumEltsPerLane;
791 if (LocalIdx < HalfEltsPerLane) {
792 DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);
793 } else {
794 LocalIdx -= HalfEltsPerLane;
795 DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);
796 }
797 }
798}
799
802 const TargetTransformInfo *TTI) {
803
804 // DemandedBits will give us every value's live-out bits. But we want
805 // to ensure no extra casts would need to be inserted, so every DAG
806 // of connected values must have the same minimum bitwidth.
812 SmallPtrSet<Instruction *, 4> InstructionSet;
814
815 // Determine the roots. We work bottom-up, from truncs or icmps.
816 bool SeenExtFromIllegalType = false;
817 for (auto *BB : Blocks)
818 for (auto &I : *BB) {
819 InstructionSet.insert(&I);
820
821 if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
822 !TTI->isTypeLegal(I.getOperand(0)->getType()))
823 SeenExtFromIllegalType = true;
824
825 // Only deal with non-vector integers up to 64-bits wide.
826 if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
827 !I.getType()->isVectorTy() &&
828 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
829 // Don't make work for ourselves. If we know the loaded type is legal,
830 // don't add it to the worklist.
831 if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
832 continue;
833
834 Worklist.push_back(&I);
835 Roots.insert(&I);
836 }
837 }
838 // Early exit.
839 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
840 return MinBWs;
841
842 // Now proceed breadth-first, unioning values together.
843 while (!Worklist.empty()) {
844 Instruction *I = Worklist.pop_back_val();
845 Value *Leader = ECs.getOrInsertLeaderValue(I);
846
847 if (!Visited.insert(I).second)
848 continue;
849
850 // If we encounter a type that is larger than 64 bits, we can't represent
851 // it so bail out.
852 if (DB.getDemandedBits(I).getBitWidth() > 64)
854
855 uint64_t V = DB.getDemandedBits(I).getZExtValue();
856 DBits[Leader] |= V;
857 DBits[I] = V;
858
859 // Casts, loads and instructions outside of our range terminate a chain
860 // successfully.
862 !InstructionSet.count(I))
863 continue;
864
865 // Unsafe casts terminate a chain unsuccessfully. We can't do anything
866 // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
867 // transform anything that relies on them.
869 !I->getType()->isIntegerTy()) {
870 DBits[Leader] |= ~0ULL;
871 continue;
872 }
873
874 // We don't modify the types of PHIs. Reductions will already have been
875 // truncated if possible, and inductions' sizes will have been chosen by
876 // indvars.
877 if (isa<PHINode>(I))
878 continue;
879
880 // Don't modify the types of operands of a call, as doing that would cause a
881 // signature mismatch.
882 if (isa<CallBase>(I))
883 continue;
884
885 if (DBits[Leader] == ~0ULL)
886 // All bits demanded, no point continuing.
887 continue;
888
889 for (Value *O : I->operands()) {
890 ECs.unionSets(Leader, O);
891 if (auto *OI = dyn_cast<Instruction>(O))
892 Worklist.push_back(OI);
893 }
894 }
895
896 // Now we've discovered all values, walk them to see if there are
897 // any users we didn't see. If there are, we can't optimize that
898 // chain.
899 for (auto &I : DBits)
900 for (auto *U : I.first->users())
901 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
902 DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
903
904 for (const auto &E : ECs) {
905 if (!E->isLeader())
906 continue;
907 uint64_t LeaderDemandedBits = 0;
908 for (Value *M : ECs.members(*E))
909 LeaderDemandedBits |= DBits[M];
910
911 uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);
912 // Round up to a power of 2
913 MinBW = llvm::bit_ceil(MinBW);
914
915 // We don't modify the types of PHIs. Reductions will already have been
916 // truncated if possible, and inductions' sizes will have been chosen by
917 // indvars.
918 // If we are required to shrink a PHI, abandon this entire equivalence class.
919 bool Abort = false;
920 for (Value *M : ECs.members(*E))
921 if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
922 Abort = true;
923 break;
924 }
925 if (Abort)
926 continue;
927
928 for (Value *M : ECs.members(*E)) {
929 auto *MI = dyn_cast<Instruction>(M);
930 if (!MI)
931 continue;
932 Type *Ty = M->getType();
933 if (Roots.count(MI))
934 Ty = MI->getOperand(0)->getType();
935
936 if (MinBW >= Ty->getScalarSizeInBits())
937 continue;
938
939 // If any of M's operands demand more bits than MinBW then M cannot be
940 // performed safely in MinBW.
941 auto *Call = dyn_cast<CallBase>(MI);
942 auto Ops = Call ? Call->args() : MI->operands();
943 if (any_of(Ops, [&DB, MinBW](Use &U) {
944 auto *CI = dyn_cast<ConstantInt>(U);
945 // For constants shift amounts, check if the shift would result in
946 // poison.
947 if (CI &&
949 U.getOperandNo() == 1)
950 return CI->uge(MinBW);
951 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
952 return bit_ceil(BW) > MinBW;
953 }))
954 continue;
955
956 MinBWs[MI] = MinBW;
957 }
958 }
959
960 return MinBWs;
961}
962
963/// Add all access groups in @p AccGroups to @p List.
964template <typename ListT>
965static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
966 // Interpret an access group as a list containing itself.
967 if (AccGroups->getNumOperands() == 0) {
968 assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
969 List.insert(AccGroups);
970 return;
971 }
972
973 for (const auto &AccGroupListOp : AccGroups->operands()) {
974 auto *Item = cast<MDNode>(AccGroupListOp.get());
975 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
976 List.insert(Item);
977 }
978}
979
980MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
981 if (!AccGroups1)
982 return AccGroups2;
983 if (!AccGroups2)
984 return AccGroups1;
985 if (AccGroups1 == AccGroups2)
986 return AccGroups1;
987
989 addToAccessGroupList(Union, AccGroups1);
990 addToAccessGroupList(Union, AccGroups2);
991
992 if (Union.size() == 0)
993 return nullptr;
994 if (Union.size() == 1)
995 return cast<MDNode>(Union.front());
996
997 LLVMContext &Ctx = AccGroups1->getContext();
998 return MDNode::get(Ctx, Union.getArrayRef());
999}
1000
1002 const Instruction *Inst2) {
1003 bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
1004 bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
1005
1006 if (!MayAccessMem1 && !MayAccessMem2)
1007 return nullptr;
1008 if (!MayAccessMem1)
1009 return Inst2->getMetadata(LLVMContext::MD_access_group);
1010 if (!MayAccessMem2)
1011 return Inst1->getMetadata(LLVMContext::MD_access_group);
1012
1013 MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
1014 MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
1015 if (!MD1 || !MD2)
1016 return nullptr;
1017 if (MD1 == MD2)
1018 return MD1;
1019
1020 // Use set for scalable 'contains' check.
1021 SmallPtrSet<Metadata *, 4> AccGroupSet2;
1022 addToAccessGroupList(AccGroupSet2, MD2);
1023
1024 SmallVector<Metadata *, 4> Intersection;
1025 if (MD1->getNumOperands() == 0) {
1026 assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
1027 if (AccGroupSet2.count(MD1))
1028 Intersection.push_back(MD1);
1029 } else {
1030 for (const MDOperand &Node : MD1->operands()) {
1031 auto *Item = cast<MDNode>(Node.get());
1032 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
1033 if (AccGroupSet2.count(Item))
1034 Intersection.push_back(Item);
1035 }
1036 }
1037
1038 if (Intersection.size() == 0)
1039 return nullptr;
1040 if (Intersection.size() == 1)
1041 return cast<MDNode>(Intersection.front());
1042
1043 LLVMContext &Ctx = Inst1->getContext();
1044 return MDNode::get(Ctx, Intersection);
1045}
1046
1047/// Add metadata from \p Inst to \p Metadata, if it can be preserved after
1048/// vectorization.
1050 Instruction *Inst,
1051 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata) {
1053 static const unsigned SupportedIDs[] = {
1054 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1055 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
1056 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
1057 LLVMContext::MD_access_group, LLVMContext::MD_mmra};
1058
1059 // Remove any unsupported metadata kinds from Metadata.
1060 for (unsigned Idx = 0; Idx != Metadata.size();) {
1061 if (is_contained(SupportedIDs, Metadata[Idx].first)) {
1062 ++Idx;
1063 } else {
1064 // Swap element to end and remove it.
1065 std::swap(Metadata[Idx], Metadata.back());
1066 Metadata.pop_back();
1067 }
1068 }
1069}
1070
1071/// \returns \p I after propagating metadata from \p VL.
1073 if (VL.empty())
1074 return Inst;
1077
1078 for (auto &[Kind, MD] : Metadata) {
1079 // Skip MMRA metadata if the instruction cannot have it.
1080 if (Kind == LLVMContext::MD_mmra && !canInstructionHaveMMRAs(*Inst))
1081 continue;
1082
1083 for (int J = 1, E = VL.size(); MD && J != E; ++J) {
1084 const Instruction *IJ = cast<Instruction>(VL[J]);
1085 MDNode *IMD = IJ->getMetadata(Kind);
1086
1087 switch (Kind) {
1088 case LLVMContext::MD_mmra: {
1089 MD = MMRAMetadata::combine(Inst->getContext(), MD, IMD);
1090 break;
1091 }
1092 case LLVMContext::MD_tbaa:
1093 MD = MDNode::getMostGenericTBAA(MD, IMD);
1094 break;
1095 case LLVMContext::MD_alias_scope:
1097 break;
1098 case LLVMContext::MD_fpmath:
1099 MD = MDNode::getMostGenericFPMath(MD, IMD);
1100 break;
1101 case LLVMContext::MD_noalias:
1102 case LLVMContext::MD_nontemporal:
1103 case LLVMContext::MD_invariant_load:
1104 MD = MDNode::intersect(MD, IMD);
1105 break;
1106 case LLVMContext::MD_access_group:
1107 MD = intersectAccessGroups(Inst, IJ);
1108 break;
1109 default:
1110 llvm_unreachable("unhandled metadata");
1111 }
1112 }
1113
1114 Inst->setMetadata(Kind, MD);
1115 }
1116
1117 return Inst;
1118}
1119
1120Constant *
1122 const InterleaveGroup<Instruction> &Group) {
1123 // All 1's means mask is not needed.
1124 if (Group.isFull())
1125 return nullptr;
1126
1127 // TODO: support reversed access.
1128 assert(!Group.isReverse() && "Reversed group not supported.");
1129
1131 for (unsigned i = 0; i < VF; i++)
1132 for (unsigned j = 0; j < Group.getFactor(); ++j) {
1133 unsigned HasMember = Group.getMember(j) ? 1 : 0;
1134 Mask.push_back(Builder.getInt1(HasMember));
1135 }
1136
1137 return ConstantVector::get(Mask);
1138}
1139
1141llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
1142 SmallVector<int, 16> MaskVec;
1143 for (unsigned i = 0; i < VF; i++)
1144 for (unsigned j = 0; j < ReplicationFactor; j++)
1145 MaskVec.push_back(i);
1146
1147 return MaskVec;
1148}
1149
1151 unsigned NumVecs) {
1153 for (unsigned i = 0; i < VF; i++)
1154 for (unsigned j = 0; j < NumVecs; j++)
1155 Mask.push_back(j * VF + i);
1156
1157 return Mask;
1158}
1159
1161llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
1163 for (unsigned i = 0; i < VF; i++)
1164 Mask.push_back(Start + i * Stride);
1165
1166 return Mask;
1167}
1168
1170 unsigned NumInts,
1171 unsigned NumUndefs) {
1173 for (unsigned i = 0; i < NumInts; i++)
1174 Mask.push_back(Start + i);
1175
1176 for (unsigned i = 0; i < NumUndefs; i++)
1177 Mask.push_back(-1);
1178
1179 return Mask;
1180}
1181
1183 unsigned NumElts) {
1184 // Avoid casts in the loop and make sure we have a reasonable number.
1185 int NumEltsSigned = NumElts;
1186 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
1187
1188 // If the mask chooses an element from operand 1, reduce it to choose from the
1189 // corresponding element of operand 0. Undef mask elements are unchanged.
1190 SmallVector<int, 16> UnaryMask;
1191 for (int MaskElt : Mask) {
1192 assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
1193 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1194 UnaryMask.push_back(UnaryElt);
1195 }
1196 return UnaryMask;
1197}
1198
1199/// A helper function for concatenating vectors. This function concatenates two
1200/// vectors having the same element type. If the second vector has fewer
1201/// elements than the first, it is padded with undefs.
1203 Value *V2) {
1204 VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
1205 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
1206 assert(VecTy1 && VecTy2 &&
1207 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1208 "Expect two vectors with the same element type");
1209
1210 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1211 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1212 assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
1213
1214 if (NumElts1 > NumElts2) {
1215 // Extend with UNDEFs.
1216 V2 = Builder.CreateShuffleVector(
1217 V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
1218 }
1219
1220 return Builder.CreateShuffleVector(
1221 V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
1222}
1223
1225 ArrayRef<Value *> Vecs) {
1226 unsigned NumVecs = Vecs.size();
1227 assert(NumVecs > 1 && "Should be at least two vectors");
1228
1230 ResList.append(Vecs.begin(), Vecs.end());
1231 do {
1233 for (unsigned i = 0; i < NumVecs - 1; i += 2) {
1234 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1235 assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
1236 "Only the last vector may have a different type");
1237
1238 TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
1239 }
1240
1241 // Push the last vector if the total number of vectors is odd.
1242 if (NumVecs % 2 != 0)
1243 TmpList.push_back(ResList[NumVecs - 1]);
1244
1245 ResList = TmpList;
1246 NumVecs = ResList.size();
1247 } while (NumVecs > 1);
1248
1249 return ResList[0];
1250}
1251
1253 assert(isa<VectorType>(Mask->getType()) &&
1254 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1255 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1256 1 &&
1257 "Mask must be a vector of i1");
1258
1259 auto *ConstMask = dyn_cast<Constant>(Mask);
1260 if (!ConstMask)
1261 return false;
1262 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1263 return true;
1264 if (isa<ScalableVectorType>(ConstMask->getType()))
1265 return false;
1266 for (unsigned
1267 I = 0,
1268 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1269 I != E; ++I) {
1270 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1271 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1272 continue;
1273 return false;
1274 }
1275 return true;
1276}
1277
1279 assert(isa<VectorType>(Mask->getType()) &&
1280 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1281 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1282 1 &&
1283 "Mask must be a vector of i1");
1284
1285 auto *ConstMask = dyn_cast<Constant>(Mask);
1286 if (!ConstMask)
1287 return false;
1288 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1289 return true;
1290 if (isa<ScalableVectorType>(ConstMask->getType()))
1291 return false;
1292 for (unsigned
1293 I = 0,
1294 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1295 I != E; ++I) {
1296 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1297 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1298 continue;
1299 return false;
1300 }
1301 return true;
1302}
1303
1305 assert(isa<VectorType>(Mask->getType()) &&
1306 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1307 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1308 1 &&
1309 "Mask must be a vector of i1");
1310
1311 auto *ConstMask = dyn_cast<Constant>(Mask);
1312 if (!ConstMask)
1313 return false;
1314 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1315 return true;
1316 if (isa<ScalableVectorType>(ConstMask->getType()))
1317 return false;
1318 for (unsigned
1319 I = 0,
1320 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1321 I != E; ++I) {
1322 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1323 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1324 return true;
1325 }
1326 return false;
1327}
1328
1329/// TODO: This is a lot like known bits, but for
1330/// vectors. Is there something we can common this with?
1332 assert(isa<FixedVectorType>(Mask->getType()) &&
1333 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1334 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1335 1 &&
1336 "Mask must be a fixed width vector of i1");
1337
1338 const unsigned VWidth =
1339 cast<FixedVectorType>(Mask->getType())->getNumElements();
1340 APInt DemandedElts = APInt::getAllOnes(VWidth);
1341 if (auto *CV = dyn_cast<ConstantVector>(Mask))
1342 for (unsigned i = 0; i < VWidth; i++)
1343 if (CV->getAggregateElement(i)->isNullValue())
1344 DemandedElts.clearBit(i);
1345 return DemandedElts;
1346}
1347
1348bool InterleavedAccessInfo::isStrided(int Stride) {
1349 unsigned Factor = std::abs(Stride);
1350 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
1351}
1352
1353void InterleavedAccessInfo::collectConstStrideAccesses(
1355 const DenseMap<Value*, const SCEV*> &Strides) {
1356 auto &DL = TheLoop->getHeader()->getDataLayout();
1357
1358 // Since it's desired that the load/store instructions be maintained in
1359 // "program order" for the interleaved access analysis, we have to visit the
1360 // blocks in the loop in reverse postorder (i.e., in a topological order).
1361 // Such an ordering will ensure that any load/store that may be executed
1362 // before a second load/store will precede the second load/store in
1363 // AccessStrideInfo.
1364 LoopBlocksDFS DFS(TheLoop);
1365 DFS.perform(LI);
1366 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
1367 for (auto &I : *BB) {
1369 if (!Ptr)
1370 continue;
1371 Type *ElementTy = getLoadStoreType(&I);
1372
1373 // Currently, codegen doesn't support cases where the type size doesn't
1374 // match the alloc size. Skip them for now.
1375 uint64_t Size = DL.getTypeAllocSize(ElementTy);
1376 if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
1377 continue;
1378
1379 // We don't check wrapping here because we don't know yet if Ptr will be
1380 // part of a full group or a group with gaps. Checking wrapping for all
1381 // pointers (even those that end up in groups with no gaps) will be overly
1382 // conservative. For full groups, wrapping should be ok since if we would
1383 // wrap around the address space we would do a memory access at nullptr
1384 // even without the transformation. The wrapping checks are therefore
1385 // deferred until after we've formed the interleaved groups.
1386 int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,
1387 /*Assume=*/true, /*ShouldCheckWrap=*/false)
1388 .value_or(0);
1389
1390 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1391 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
1393 }
1394}
1395
1396// Analyze interleaved accesses and collect them into interleaved load and
1397// store groups.
1398//
1399// When generating code for an interleaved load group, we effectively hoist all
1400// loads in the group to the location of the first load in program order. When
1401// generating code for an interleaved store group, we sink all stores to the
1402// location of the last store. This code motion can change the order of load
1403// and store instructions and may break dependences.
1404//
1405// The code generation strategy mentioned above ensures that we won't violate
1406// any write-after-read (WAR) dependences.
1407//
1408// E.g., for the WAR dependence: a = A[i]; // (1)
1409// A[i] = b; // (2)
1410//
1411// The store group of (2) is always inserted at or below (2), and the load
1412// group of (1) is always inserted at or above (1). Thus, the instructions will
1413// never be reordered. All other dependences are checked to ensure the
1414// correctness of the instruction reordering.
1415//
1416// The algorithm visits all memory accesses in the loop in bottom-up program
1417// order. Program order is established by traversing the blocks in the loop in
1418// reverse postorder when collecting the accesses.
1419//
1420// We visit the memory accesses in bottom-up order because it can simplify the
1421// construction of store groups in the presence of write-after-write (WAW)
1422// dependences.
1423//
1424// E.g., for the WAW dependence: A[i] = a; // (1)
1425// A[i] = b; // (2)
1426// A[i + 1] = c; // (3)
1427//
1428// We will first create a store group with (3) and (2). (1) can't be added to
1429// this group because it and (2) are dependent. However, (1) can be grouped
1430// with other accesses that may precede it in program order. Note that a
1431// bottom-up order does not imply that WAW dependences should not be checked.
1433 bool EnablePredicatedInterleavedMemAccesses) {
1434 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1435 const auto &Strides = LAI->getSymbolicStrides();
1436
1437 // Holds all accesses with a constant stride.
1439 collectConstStrideAccesses(AccessStrideInfo, Strides);
1440
1441 if (AccessStrideInfo.empty())
1442 return;
1443
1444 // Collect the dependences in the loop.
1445 collectDependences();
1446
1447 // Holds all interleaved store groups temporarily.
1449 // Holds all interleaved load groups temporarily.
1451 // Groups added to this set cannot have new members added.
1452 SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;
1453
1454 // Search in bottom-up program order for pairs of accesses (A and B) that can
1455 // form interleaved load or store groups. In the algorithm below, access A
1456 // precedes access B in program order. We initialize a group for B in the
1457 // outer loop of the algorithm, and then in the inner loop, we attempt to
1458 // insert each A into B's group if:
1459 //
1460 // 1. A and B have the same stride,
1461 // 2. A and B have the same memory object size, and
1462 // 3. A belongs in B's group according to its distance from B.
1463 //
1464 // Special care is taken to ensure group formation will not break any
1465 // dependences.
1466 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1467 BI != E; ++BI) {
1468 Instruction *B = BI->first;
1469 StrideDescriptor DesB = BI->second;
1470
1471 // Initialize a group for B if it has an allowable stride. Even if we don't
1472 // create a group for B, we continue with the bottom-up algorithm to ensure
1473 // we don't break any of B's dependences.
1474 InterleaveGroup<Instruction> *GroupB = nullptr;
1475 if (isStrided(DesB.Stride) &&
1476 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1477 GroupB = getInterleaveGroup(B);
1478 if (!GroupB) {
1479 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1480 << '\n');
1481 GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1482 if (B->mayWriteToMemory())
1483 StoreGroups.insert(GroupB);
1484 else
1485 LoadGroups.insert(GroupB);
1486 }
1487 }
1488
1489 for (auto AI = std::next(BI); AI != E; ++AI) {
1490 Instruction *A = AI->first;
1491 StrideDescriptor DesA = AI->second;
1492
1493 // Our code motion strategy implies that we can't have dependences
1494 // between accesses in an interleaved group and other accesses located
1495 // between the first and last member of the group. Note that this also
1496 // means that a group can't have more than one member at a given offset.
1497 // The accesses in a group can have dependences with other accesses, but
1498 // we must ensure we don't extend the boundaries of the group such that
1499 // we encompass those dependent accesses.
1500 //
1501 // For example, assume we have the sequence of accesses shown below in a
1502 // stride-2 loop:
1503 //
1504 // (1, 2) is a group | A[i] = a; // (1)
1505 // | A[i-1] = b; // (2) |
1506 // A[i-3] = c; // (3)
1507 // A[i] = d; // (4) | (2, 4) is not a group
1508 //
1509 // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1510 // but not with (4). If we did, the dependent access (3) would be within
1511 // the boundaries of the (2, 4) group.
1512 auto DependentMember = [&](InterleaveGroup<Instruction> *Group,
1513 StrideEntry *A) -> Instruction * {
1514 for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {
1515 Instruction *MemberOfGroupB = Group->getMember(Index);
1516 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1517 A, &*AccessStrideInfo.find(MemberOfGroupB)))
1518 return MemberOfGroupB;
1519 }
1520 return nullptr;
1521 };
1522
1523 auto GroupA = getInterleaveGroup(A);
1524 // If A is a load, dependencies are tolerable, there's nothing to do here.
1525 // If both A and B belong to the same (store) group, they are independent,
1526 // even if dependencies have not been recorded.
1527 // If both GroupA and GroupB are null, there's nothing to do here.
1528 if (A->mayWriteToMemory() && GroupA != GroupB) {
1529 Instruction *DependentInst = nullptr;
1530 // If GroupB is a load group, we have to compare AI against all
1531 // members of GroupB because if any load within GroupB has a dependency
1532 // on AI, we need to mark GroupB as complete and also release the
1533 // store GroupA (if A belongs to one). The former prevents incorrect
1534 // hoisting of load B above store A while the latter prevents incorrect
1535 // sinking of store A below load B.
1536 if (GroupB && LoadGroups.contains(GroupB))
1537 DependentInst = DependentMember(GroupB, &*AI);
1538 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1539 DependentInst = B;
1540
1541 if (DependentInst) {
1542 // A has a store dependence on B (or on some load within GroupB) and
1543 // is part of a store group. Release A's group to prevent illegal
1544 // sinking of A below B. A will then be free to form another group
1545 // with instructions that precede it.
1546 if (GroupA && StoreGroups.contains(GroupA)) {
1547 LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1548 "dependence between "
1549 << *A << " and " << *DependentInst << '\n');
1550 StoreGroups.remove(GroupA);
1551 releaseGroup(GroupA);
1552 }
1553 // If B is a load and part of an interleave group, no earlier loads
1554 // can be added to B's interleave group, because this would mean the
1555 // DependentInst would move across store A. Mark the interleave group
1556 // as complete.
1557 if (GroupB && LoadGroups.contains(GroupB)) {
1558 LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
1559 << " as complete.\n");
1560 CompletedLoadGroups.insert(GroupB);
1561 }
1562 }
1563 }
1564 if (CompletedLoadGroups.contains(GroupB)) {
1565 // Skip trying to add A to B, continue to look for other conflicting A's
1566 // in groups to be released.
1567 continue;
1568 }
1569
1570 // At this point, we've checked for illegal code motion. If either A or B
1571 // isn't strided, there's nothing left to do.
1572 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1573 continue;
1574
1575 // Ignore A if it's already in a group or isn't the same kind of memory
1576 // operation as B.
1577 // Note that mayReadFromMemory() isn't mutually exclusive to
1578 // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1579 // here, canVectorizeMemory() should have returned false - except for the
1580 // case we asked for optimization remarks.
1581 if (isInterleaved(A) ||
1582 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1583 (A->mayWriteToMemory() != B->mayWriteToMemory()))
1584 continue;
1585
1586 // Check rules 1 and 2. Ignore A if its stride or size is different from
1587 // that of B.
1588 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1589 continue;
1590
1591 // Ignore A if the memory object of A and B don't belong to the same
1592 // address space
1594 continue;
1595
1596 // Calculate the distance from A to B.
1597 const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1598 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1599 if (!DistToB)
1600 continue;
1601 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1602
1603 // Check rule 3. Ignore A if its distance to B is not a multiple of the
1604 // size.
1605 if (DistanceToB % static_cast<int64_t>(DesB.Size))
1606 continue;
1607
1608 // All members of a predicated interleave-group must have the same predicate,
1609 // and currently must reside in the same BB.
1610 BasicBlock *BlockA = A->getParent();
1611 BasicBlock *BlockB = B->getParent();
1612 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1613 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1614 continue;
1615
1616 // The index of A is the index of B plus A's distance to B in multiples
1617 // of the size.
1618 int IndexA =
1619 GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1620
1621 // Try to insert A into B's group.
1622 if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {
1623 LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1624 << " into the interleave group with" << *B
1625 << '\n');
1626 InterleaveGroupMap[A] = GroupB;
1627
1628 // Set the first load in program order as the insert position.
1629 if (A->mayReadFromMemory())
1630 GroupB->setInsertPos(A);
1631 }
1632 } // Iteration over A accesses.
1633 } // Iteration over B accesses.
1634
1635 auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
1636 int Index,
1637 const char *FirstOrLast) -> bool {
1638 Instruction *Member = Group->getMember(Index);
1639 assert(Member && "Group member does not exist");
1640 Value *MemberPtr = getLoadStorePointerOperand(Member);
1641 Type *AccessTy = getLoadStoreType(Member);
1642 if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,
1643 /*Assume=*/false, /*ShouldCheckWrap=*/true)
1644 .value_or(0))
1645 return false;
1646 LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
1647 << FirstOrLast
1648 << " group member potentially pointer-wrapping.\n");
1649 releaseGroup(Group);
1650 return true;
1651 };
1652
1653 // Remove interleaved groups with gaps whose memory
1654 // accesses may wrap around. We have to revisit the getPtrStride analysis,
1655 // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1656 // not check wrapping (see documentation there).
1657 // FORNOW we use Assume=false;
1658 // TODO: Change to Assume=true but making sure we don't exceed the threshold
1659 // of runtime SCEV assumptions checks (thereby potentially failing to
1660 // vectorize altogether).
1661 // Additional optional optimizations:
1662 // TODO: If we are peeling the loop and we know that the first pointer doesn't
1663 // wrap then we can deduce that all pointers in the group don't wrap.
1664 // This means that we can forcefully peel the loop in order to only have to
1665 // check the first pointer for no-wrap. When we'll change to use Assume=true
1666 // we'll only need at most one runtime check per interleaved group.
1667 for (auto *Group : LoadGroups) {
1668 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1669 // load would wrap around the address space we would do a memory access at
1670 // nullptr even without the transformation.
1671 if (Group->isFull())
1672 continue;
1673
1674 // Case 2: If first and last members of the group don't wrap this implies
1675 // that all the pointers in the group don't wrap.
1676 // So we check only group member 0 (which is always guaranteed to exist),
1677 // and group member Factor - 1; If the latter doesn't exist we rely on
1678 // peeling (if it is a non-reversed access -- see Case 3).
1679 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1680 continue;
1681 if (Group->getMember(Group->getFactor() - 1))
1682 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, "last");
1683 else {
1684 // Case 3: A non-reversed interleaved load group with gaps: We need
1685 // to execute at least one scalar epilogue iteration. This will ensure
1686 // we don't speculatively access memory out-of-bounds. We only need
1687 // to look for a member at index factor - 1, since every group must have
1688 // a member at index zero.
1689 if (Group->isReverse()) {
1690 LLVM_DEBUG(
1691 dbgs() << "LV: Invalidate candidate interleaved group due to "
1692 "a reverse access with gaps.\n");
1693 releaseGroup(Group);
1694 continue;
1695 }
1696 LLVM_DEBUG(
1697 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1698 RequiresScalarEpilogue = true;
1699 }
1700 }
1701
1702 for (auto *Group : StoreGroups) {
1703 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1704 // store would wrap around the address space we would do a memory access at
1705 // nullptr even without the transformation.
1706 if (Group->isFull())
1707 continue;
1708
1709 // Interleave-store-group with gaps is implemented using masked wide store.
1710 // Remove interleaved store groups with gaps if
1711 // masked-interleaved-accesses are not enabled by the target.
1712 if (!EnablePredicatedInterleavedMemAccesses) {
1713 LLVM_DEBUG(
1714 dbgs() << "LV: Invalidate candidate interleaved store group due "
1715 "to gaps.\n");
1716 releaseGroup(Group);
1717 continue;
1718 }
1719
1720 // Case 2: If first and last members of the group don't wrap this implies
1721 // that all the pointers in the group don't wrap.
1722 // So we check only group member 0 (which is always guaranteed to exist),
1723 // and the last group member. Case 3 (scalar epilog) is not relevant for
1724 // stores with gaps, which are implemented with masked-store (rather than
1725 // speculative access, as in loads).
1726 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1727 continue;
1728 for (int Index = Group->getFactor() - 1; Index > 0; Index--)
1729 if (Group->getMember(Index)) {
1730 InvalidateGroupIfMemberMayWrap(Group, Index, "last");
1731 break;
1732 }
1733 }
1734}
1735
1737 // If no group had triggered the requirement to create an epilogue loop,
1738 // there is nothing to do.
1740 return;
1741
1742 // Release groups requiring scalar epilogues. Note that this also removes them
1743 // from InterleaveGroups.
1744 bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {
1745 if (!Group->requiresScalarEpilogue())
1746 return false;
1747 LLVM_DEBUG(
1748 dbgs()
1749 << "LV: Invalidate candidate interleaved group due to gaps that "
1750 "require a scalar epilogue (not allowed under optsize) and cannot "
1751 "be masked (not enabled). \n");
1752 releaseGroupWithoutRemovingFromSet(Group);
1753 return true;
1754 });
1755 assert(ReleasedGroup && "At least one group must be invalidated, as a "
1756 "scalar epilogue was required");
1757 (void)ReleasedGroup;
1758 RequiresScalarEpilogue = false;
1759}
1760
1761template <typename InstT>
1762void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1763 llvm_unreachable("addMetadata can only be used for Instruction");
1764}
1765
1766namespace llvm {
1767template <>
1772} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
bool isReverse() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Metadata node.
Definition Metadata.h:1080
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
static LLVM_ABI MDNode * combine(LLVMContext &Ctx, const MMRAMetadata &A, const MMRAMetadata &B)
Combines A and B according to MMRA semantics.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
reverse_iterator rend()
Definition MapVector.h:74
iterator find(const KeyT &Key)
Definition MapVector.h:154
bool empty() const
Definition MapVector.h:77
reverse_iterator rbegin()
Definition MapVector.h:70
Root of the metadata hierarchy.
Definition Metadata.h:64
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a constant integer value.
const APInt & getAPInt() const
bool remove(const value_type &X)
Remove an item from the set vector.
Definition SetVector.h:181
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
ArrayRef< Type * > subtypes() const
Definition Type.h:383
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
static LLVM_ABI bool isVPCast(Intrinsic::ID ID)
static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isTriviallyScalarizable(ID id)
Returns true if the intrinsic is trivially scalarizable.
LLVM_ABI bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
unsigned M1(unsigned Val)
Definition VE.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially scalarizable.
LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned M0(unsigned Val)
Definition VE.h:376
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
Definition STLExtras.h:1409
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2166
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872