LLVM 23.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
23#include "llvm/IR/Constants.h"
25#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Value.h"
30
31#define DEBUG_TYPE "vectorutils"
32
33using namespace llvm;
34using namespace llvm::PatternMatch;
35
36/// Maximum factor for an interleaved memory access.
38 "max-interleave-group-factor", cl::Hidden,
39 cl::desc("Maximum factor for an interleaved access group (default = 8)"),
40 cl::init(8));
41
42/// Return true if all of the intrinsic's arguments and return type are scalars
43/// for the scalar form of the intrinsic, and vectors for the vector form of the
44/// intrinsic (except operands that are marked as always being scalar by
45/// isVectorIntrinsicWithScalarOpAtArg).
47 switch (ID) {
48 case Intrinsic::abs: // Begin integer bit-manipulation.
49 case Intrinsic::bswap:
50 case Intrinsic::bitreverse:
51 case Intrinsic::ctpop:
52 case Intrinsic::ctlz:
53 case Intrinsic::cttz:
54 case Intrinsic::fshl:
55 case Intrinsic::fshr:
56 case Intrinsic::smax:
57 case Intrinsic::smin:
58 case Intrinsic::umax:
59 case Intrinsic::umin:
60 case Intrinsic::sadd_sat:
61 case Intrinsic::ssub_sat:
62 case Intrinsic::uadd_sat:
63 case Intrinsic::usub_sat:
64 case Intrinsic::smul_fix:
65 case Intrinsic::smul_fix_sat:
66 case Intrinsic::umul_fix:
67 case Intrinsic::umul_fix_sat:
68 case Intrinsic::uadd_with_overflow:
69 case Intrinsic::sadd_with_overflow:
70 case Intrinsic::usub_with_overflow:
71 case Intrinsic::ssub_with_overflow:
72 case Intrinsic::umul_with_overflow:
73 case Intrinsic::smul_with_overflow:
74 case Intrinsic::sqrt: // Begin floating-point.
75 case Intrinsic::asin:
76 case Intrinsic::acos:
77 case Intrinsic::atan:
78 case Intrinsic::atan2:
79 case Intrinsic::sin:
80 case Intrinsic::cos:
81 case Intrinsic::sincos:
82 case Intrinsic::sincospi:
83 case Intrinsic::tan:
84 case Intrinsic::sinh:
85 case Intrinsic::cosh:
86 case Intrinsic::tanh:
87 case Intrinsic::exp:
88 case Intrinsic::exp10:
89 case Intrinsic::exp2:
90 case Intrinsic::frexp:
91 case Intrinsic::ldexp:
92 case Intrinsic::log:
93 case Intrinsic::log10:
94 case Intrinsic::log2:
95 case Intrinsic::fabs:
96 case Intrinsic::minnum:
97 case Intrinsic::maxnum:
98 case Intrinsic::minimum:
99 case Intrinsic::maximum:
100 case Intrinsic::minimumnum:
101 case Intrinsic::maximumnum:
102 case Intrinsic::modf:
103 case Intrinsic::copysign:
104 case Intrinsic::floor:
105 case Intrinsic::ceil:
106 case Intrinsic::trunc:
107 case Intrinsic::rint:
108 case Intrinsic::nearbyint:
109 case Intrinsic::round:
110 case Intrinsic::roundeven:
111 case Intrinsic::pow:
112 case Intrinsic::fma:
113 case Intrinsic::fmuladd:
114 case Intrinsic::is_fpclass:
115 case Intrinsic::powi:
116 case Intrinsic::canonicalize:
117 case Intrinsic::fptosi_sat:
118 case Intrinsic::fptoui_sat:
119 case Intrinsic::lround:
120 case Intrinsic::llround:
121 case Intrinsic::lrint:
122 case Intrinsic::llrint:
123 case Intrinsic::ucmp:
124 case Intrinsic::scmp:
125 case Intrinsic::clmul:
126 return true;
127 default:
128 return false;
129 }
130}
131
138
139/// Identifies if the vector form of the intrinsic has a scalar operand.
141 unsigned ScalarOpdIdx,
142 const TargetTransformInfo *TTI) {
143
145 return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);
146
147 // Vector predication intrinsics have the EVL as the last operand.
148 if (VPIntrinsic::getVectorLengthParamPos(ID) == ScalarOpdIdx)
149 return true;
150
151 switch (ID) {
152 case Intrinsic::abs:
153 case Intrinsic::vp_abs:
154 case Intrinsic::ctlz:
155 case Intrinsic::vp_ctlz:
156 case Intrinsic::cttz:
157 case Intrinsic::vp_cttz:
158 case Intrinsic::is_fpclass:
159 case Intrinsic::vp_is_fpclass:
160 case Intrinsic::powi:
161 case Intrinsic::vector_extract:
162 return (ScalarOpdIdx == 1);
163 case Intrinsic::smul_fix:
164 case Intrinsic::smul_fix_sat:
165 case Intrinsic::umul_fix:
166 case Intrinsic::umul_fix_sat:
167 return (ScalarOpdIdx == 2);
168 case Intrinsic::experimental_vp_splice:
169 return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
170 case Intrinsic::experimental_vp_strided_load:
171 return ScalarOpdIdx == 0 || ScalarOpdIdx == 1;
172 case Intrinsic::loop_dependence_war_mask:
173 return true;
174 default:
175 return false;
176 }
177}
178
180 Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI) {
181 assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!");
182
184 return TTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);
185
187 return OpdIdx == -1 || OpdIdx == 0;
188
189 switch (ID) {
190 case Intrinsic::fptosi_sat:
191 case Intrinsic::fptoui_sat:
192 case Intrinsic::lround:
193 case Intrinsic::llround:
194 case Intrinsic::lrint:
195 case Intrinsic::llrint:
196 case Intrinsic::vp_lrint:
197 case Intrinsic::vp_llrint:
198 case Intrinsic::ucmp:
199 case Intrinsic::scmp:
200 case Intrinsic::vector_extract:
201 case Intrinsic::loop_dependence_war_mask:
202 return OpdIdx == -1 || OpdIdx == 0;
203 case Intrinsic::modf:
204 case Intrinsic::sincos:
205 case Intrinsic::sincospi:
206 case Intrinsic::is_fpclass:
207 case Intrinsic::vp_is_fpclass:
208 return OpdIdx == 0;
209 case Intrinsic::powi:
210 case Intrinsic::ldexp:
211 return OpdIdx == -1 || OpdIdx == 1;
212 case Intrinsic::experimental_vp_strided_load:
213 return OpdIdx == -1 || OpdIdx == 0 || OpdIdx == 1;
214 default:
215 return OpdIdx == -1;
216 }
217}
218
220 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI) {
221
223 return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);
224
225 switch (ID) {
226 case Intrinsic::frexp:
227 return RetIdx == 0 || RetIdx == 1;
228 default:
229 return RetIdx == 0;
230 }
231}
232
233/// Returns intrinsic ID for call.
234/// For the input call instruction it finds mapping intrinsic and returns
235/// its ID, in case it does not found it return not_intrinsic.
237 const TargetLibraryInfo *TLI) {
241
242 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
243 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
244 ID == Intrinsic::experimental_noalias_scope_decl ||
245 ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
246 return ID;
248}
249
251 switch (ID) {
252 case Intrinsic::vector_interleave2:
253 return 2;
254 case Intrinsic::vector_interleave3:
255 return 3;
256 case Intrinsic::vector_interleave4:
257 return 4;
258 case Intrinsic::vector_interleave5:
259 return 5;
260 case Intrinsic::vector_interleave6:
261 return 6;
262 case Intrinsic::vector_interleave7:
263 return 7;
264 case Intrinsic::vector_interleave8:
265 return 8;
266 default:
267 return 0;
268 }
269}
270
272 switch (ID) {
273 case Intrinsic::vector_deinterleave2:
274 return 2;
275 case Intrinsic::vector_deinterleave3:
276 return 3;
277 case Intrinsic::vector_deinterleave4:
278 return 4;
279 case Intrinsic::vector_deinterleave5:
280 return 5;
281 case Intrinsic::vector_deinterleave6:
282 return 6;
283 case Intrinsic::vector_deinterleave7:
284 return 7;
285 case Intrinsic::vector_deinterleave8:
286 return 8;
287 default:
288 return 0;
289 }
290}
291
293 [[maybe_unused]] unsigned Factor =
295 ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();
296 assert(Factor && Factor == DISubtypes.size() &&
297 "unexpected deinterleave factor or result type");
298 return cast<VectorType>(DISubtypes[0]);
299}
300
301/// Given a vector and an element number, see if the scalar value is
302/// already around as a register, for example if it were inserted then extracted
303/// from the vector.
304Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
305 assert(V->getType()->isVectorTy() && "Not looking at a vector?");
306 VectorType *VTy = cast<VectorType>(V->getType());
307 // For fixed-length vector, return poison for out of range access.
308 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
309 unsigned Width = FVTy->getNumElements();
310 if (EltNo >= Width)
311 return PoisonValue::get(FVTy->getElementType());
312 }
313
314 if (Constant *C = dyn_cast<Constant>(V))
315 return C->getAggregateElement(EltNo);
316
318 // If this is an insert to a variable element, we don't know what it is.
319 uint64_t IIElt;
320 if (!match(III->getOperand(2), m_ConstantInt(IIElt)))
321 return nullptr;
322
323 // If this is an insert to the element we are looking for, return the
324 // inserted value.
325 if (EltNo == IIElt)
326 return III->getOperand(1);
327
328 // Guard against infinite loop on malformed, unreachable IR.
329 if (III == III->getOperand(0))
330 return nullptr;
331
332 // Otherwise, the insertelement doesn't modify the value, recurse on its
333 // vector input.
334 return findScalarElement(III->getOperand(0), EltNo);
335 }
336
338 // Restrict the following transformation to fixed-length vector.
339 if (SVI && isa<FixedVectorType>(SVI->getType())) {
340 unsigned LHSWidth =
341 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
342 int InEl = SVI->getMaskValue(EltNo);
343 if (InEl < 0)
344 return PoisonValue::get(VTy->getElementType());
345 if (InEl < (int)LHSWidth)
346 return findScalarElement(SVI->getOperand(0), InEl);
347 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
348 }
349
350 // Extract a value from a vector add operation with a constant zero.
351 // TODO: Use getBinOpIdentity() to generalize this.
352 Value *Val; Constant *C;
353 if (match(V, m_Add(m_Value(Val), m_Constant(C))))
354 if (Constant *Elt = C->getAggregateElement(EltNo))
355 if (Elt->isNullValue())
356 return findScalarElement(Val, EltNo);
357
358 // If the vector is a splat then we can trivially find the scalar element.
360 if (Value *Splat = getSplatValue(V))
361 if (EltNo < VTy->getElementCount().getKnownMinValue())
362 return Splat;
363
364 // Otherwise, we don't know.
365 return nullptr;
366}
367
369 int SplatIndex = -1;
370 for (int M : Mask) {
371 // Ignore invalid (undefined) mask elements.
372 if (M < 0)
373 continue;
374
375 // There can be only 1 non-negative mask element value if this is a splat.
376 if (SplatIndex != -1 && SplatIndex != M)
377 return -1;
378
379 // Initialize the splat index to the 1st non-negative mask element.
380 SplatIndex = M;
381 }
382 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
383 return SplatIndex;
384}
385
386/// Get splat value if the input is a splat vector or return nullptr.
387/// This function is not fully general. It checks only 2 cases:
388/// the input value is (1) a splat constant vector or (2) a sequence
389/// of instructions that broadcasts a scalar at element 0.
391 if (isa<VectorType>(V->getType()))
392 if (auto *C = dyn_cast<Constant>(V))
393 return C->getSplatValue();
394
395 // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
396 Value *Splat;
397 if (match(V,
399 m_Value(), m_ZeroMask())))
400 return Splat;
401
402 return nullptr;
403}
404
405bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
406 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
407
408 if (isa<VectorType>(V->getType())) {
409 if (isa<UndefValue>(V))
410 return true;
411 // FIXME: We can allow undefs, but if Index was specified, we may want to
412 // check that the constant is defined at that index.
413 if (auto *C = dyn_cast<Constant>(V))
414 return C->getSplatValue() != nullptr;
415 }
416
417 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
418 // FIXME: We can safely allow undefs here. If Index was specified, we will
419 // check that the mask elt is defined at the required index.
420 if (!all_equal(Shuf->getShuffleMask()))
421 return false;
422
423 // Match any index.
424 if (Index == -1)
425 return true;
426
427 // Match a specific element. The mask should be defined at and match the
428 // specified index.
429 return Shuf->getMaskValue(Index) == Index;
430 }
431
432 // The remaining tests are all recursive, so bail out if we hit the limit.
434 return false;
435
436 // If both operands of a binop are splats, the result is a splat.
437 Value *X, *Y, *Z;
438 if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
439 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
440
441 // If all operands of a select are splats, the result is a splat.
442 if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
443 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
444 isSplatValue(Z, Index, Depth);
445
446 // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
447
448 return false;
449}
450
452 const APInt &DemandedElts, APInt &DemandedLHS,
453 APInt &DemandedRHS, bool AllowUndefElts) {
454 DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);
455
456 // Early out if we don't demand any elements.
457 if (DemandedElts.isZero())
458 return true;
459
460 // Simple case of a shuffle with zeroinitializer.
461 if (all_of(Mask, equal_to(0))) {
462 DemandedLHS.setBit(0);
463 return true;
464 }
465
466 for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
467 int M = Mask[I];
468 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
469 "Invalid shuffle mask constant");
470
471 if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))
472 continue;
473
474 // For undef elements, we don't know anything about the common state of
475 // the shuffle result.
476 if (M < 0)
477 return false;
478
479 if (M < SrcWidth)
480 DemandedLHS.setBit(M);
481 else
482 DemandedRHS.setBit(M - SrcWidth);
483 }
484
485 return true;
486}
487
489 std::array<std::pair<int, int>, 2> &SrcInfo) {
490 const int SignalValue = NumElts * 2;
491 SrcInfo[0] = {-1, SignalValue};
492 SrcInfo[1] = {-1, SignalValue};
493 for (auto [i, M] : enumerate(Mask)) {
494 if (M < 0)
495 continue;
496 int Src = M >= NumElts;
497 int Diff = (int)i - (M % NumElts);
498 bool Match = false;
499 for (int j = 0; j < 2; j++) {
500 auto &[SrcE, DiffE] = SrcInfo[j];
501 if (SrcE == -1) {
502 assert(DiffE == SignalValue);
503 SrcE = Src;
504 DiffE = Diff;
505 }
506 if (SrcE == Src && DiffE == Diff) {
507 Match = true;
508 break;
509 }
510 }
511 if (!Match)
512 return false;
513 }
514 // Avoid all undef masks
515 return SrcInfo[0].first != -1;
516}
517
519 SmallVectorImpl<int> &ScaledMask) {
520 assert(Scale > 0 && "Unexpected scaling factor");
521
522 // Fast-path: if no scaling, then it is just a copy.
523 if (Scale == 1) {
524 ScaledMask.assign(Mask.begin(), Mask.end());
525 return;
526 }
527
528 ScaledMask.clear();
529 for (int MaskElt : Mask) {
530 if (MaskElt >= 0) {
531 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
532 "Overflowed 32-bits");
533 }
534 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
535 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
536 }
537}
538
540 SmallVectorImpl<int> &ScaledMask) {
541 assert(Scale > 0 && "Unexpected scaling factor");
542
543 // Fast-path: if no scaling, then it is just a copy.
544 if (Scale == 1) {
545 ScaledMask.assign(Mask.begin(), Mask.end());
546 return true;
547 }
548
549 // We must map the original elements down evenly to a type with less elements.
550 int NumElts = Mask.size();
551 if (NumElts % Scale != 0)
552 return false;
553
554 ScaledMask.clear();
555 ScaledMask.reserve(NumElts / Scale);
556
557 // Step through the input mask by splitting into Scale-sized slices.
558 do {
559 ArrayRef<int> MaskSlice = Mask.take_front(Scale);
560 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
561
562 // The first element of the slice determines how we evaluate this slice.
563 int SliceFront = MaskSlice.front();
564 if (SliceFront < 0) {
565 // Negative values (undef or other "sentinel" values) must be equal across
566 // the entire slice.
567 if (!all_equal(MaskSlice))
568 return false;
569 ScaledMask.push_back(SliceFront);
570 } else {
571 // A positive mask element must be cleanly divisible.
572 if (SliceFront % Scale != 0)
573 return false;
574 // Elements of the slice must be consecutive.
575 for (int i = 1; i < Scale; ++i)
576 if (MaskSlice[i] != SliceFront + i)
577 return false;
578 ScaledMask.push_back(SliceFront / Scale);
579 }
580 Mask = Mask.drop_front(Scale);
581 } while (!Mask.empty());
582
583 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
584
585 // All elements of the original mask can be scaled down to map to the elements
586 // of a mask with wider elements.
587 return true;
588}
589
591 SmallVectorImpl<int> &NewMask) {
592 unsigned NumElts = M.size();
593 if (NumElts % 2 != 0)
594 return false;
595
596 NewMask.clear();
597 for (unsigned i = 0; i < NumElts; i += 2) {
598 int M0 = M[i];
599 int M1 = M[i + 1];
600
601 // If both elements are undef, new mask is undef too.
602 if (M0 == -1 && M1 == -1) {
603 NewMask.push_back(-1);
604 continue;
605 }
606
607 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
608 NewMask.push_back(M1 / 2);
609 continue;
610 }
611
612 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
613 NewMask.push_back(M0 / 2);
614 continue;
615 }
616
617 NewMask.clear();
618 return false;
619 }
620
621 assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
622 return true;
623}
624
625bool llvm::scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,
626 SmallVectorImpl<int> &ScaledMask) {
627 unsigned NumSrcElts = Mask.size();
628 assert(NumSrcElts > 0 && NumDstElts > 0 && "Unexpected scaling factor");
629
630 // Fast-path: if no scaling, then it is just a copy.
631 if (NumSrcElts == NumDstElts) {
632 ScaledMask.assign(Mask.begin(), Mask.end());
633 return true;
634 }
635
636 // Ensure we can find a whole scale factor.
637 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
638 "Unexpected scaling factor");
639
640 if (NumSrcElts > NumDstElts) {
641 int Scale = NumSrcElts / NumDstElts;
642 return widenShuffleMaskElts(Scale, Mask, ScaledMask);
643 }
644
645 int Scale = NumDstElts / NumSrcElts;
646 narrowShuffleMaskElts(Scale, Mask, ScaledMask);
647 return true;
648}
649
651 SmallVectorImpl<int> &ScaledMask) {
652 std::array<SmallVector<int, 16>, 2> TmpMasks;
653 SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];
654 ArrayRef<int> InputMask = Mask;
655 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {
656 while (widenShuffleMaskElts(Scale, InputMask, *Output)) {
657 InputMask = *Output;
658 std::swap(Output, Tmp);
659 }
660 }
661 ScaledMask.assign(InputMask.begin(), InputMask.end());
662}
663
665 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
666 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
667 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
668 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
669 ManyInputsAction) {
670 SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
671 // Try to perform better estimation of the permutation.
672 // 1. Split the source/destination vectors into real registers.
673 // 2. Do the mask analysis to identify which real registers are
674 // permuted.
675 int Sz = Mask.size();
676 unsigned SzDest = Sz / NumOfDestRegs;
677 unsigned SzSrc = Sz / NumOfSrcRegs;
678 for (unsigned I = 0; I < NumOfDestRegs; ++I) {
679 auto &RegMasks = Res[I];
680 RegMasks.assign(2 * NumOfSrcRegs, {});
681 // Check that the values in dest registers are in the one src
682 // register.
683 for (unsigned K = 0; K < SzDest; ++K) {
684 int Idx = I * SzDest + K;
685 if (Idx == Sz)
686 break;
687 if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)
688 continue;
689 int MaskIdx = Mask[Idx] % Sz;
690 int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);
691 // Add a cost of PermuteTwoSrc for each new source register permute,
692 // if we have more than one source registers.
693 if (RegMasks[SrcRegIdx].empty())
694 RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);
695 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
696 }
697 }
698 // Process split mask.
699 for (unsigned I : seq<unsigned>(NumOfUsedRegs)) {
700 auto &Dest = Res[I];
701 int NumSrcRegs =
702 count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
703 switch (NumSrcRegs) {
704 case 0:
705 // No input vectors were used!
706 NoInputAction();
707 break;
708 case 1: {
709 // Find the only mask with at least single undef mask elem.
710 auto *It =
711 find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
712 unsigned SrcReg = std::distance(Dest.begin(), It);
713 SingleInputAction(*It, SrcReg, I);
714 break;
715 }
716 default: {
717 // The first mask is a permutation of a single register. Since we have >2
718 // input registers to shuffle, we merge the masks for 2 first registers
719 // and generate a shuffle of 2 registers rather than the reordering of the
720 // first register and then shuffle with the second register. Next,
721 // generate the shuffles of the resulting register + the remaining
722 // registers from the list.
723 auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
724 ArrayRef<int> SecondMask) {
725 for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
726 if (SecondMask[Idx] != PoisonMaskElem) {
727 assert(FirstMask[Idx] == PoisonMaskElem &&
728 "Expected undefined mask element.");
729 FirstMask[Idx] = SecondMask[Idx] + VF;
730 }
731 }
732 };
733 auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
734 for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
735 if (Mask[Idx] != PoisonMaskElem)
736 Mask[Idx] = Idx;
737 }
738 };
739 int SecondIdx;
740 bool NewReg = true;
741 do {
742 int FirstIdx = -1;
743 SecondIdx = -1;
744 MutableArrayRef<int> FirstMask, SecondMask;
745 for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {
746 SmallVectorImpl<int> &RegMask = Dest[I];
747 if (RegMask.empty())
748 continue;
749
750 if (FirstIdx == SecondIdx) {
751 FirstIdx = I;
752 FirstMask = RegMask;
753 continue;
754 }
755 SecondIdx = I;
756 SecondMask = RegMask;
757 CombineMasks(FirstMask, SecondMask);
758 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);
759 NewReg = false;
760 NormalizeMask(FirstMask);
761 RegMask.clear();
762 SecondMask = FirstMask;
763 SecondIdx = FirstIdx;
764 }
765 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
766 CombineMasks(SecondMask, FirstMask);
767 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);
768 NewReg = false;
769 Dest[FirstIdx].clear();
770 NormalizeMask(SecondMask);
771 }
772 } while (SecondIdx >= 0);
773 break;
774 }
775 }
776 }
777}
778
779void llvm::getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,
780 const APInt &DemandedElts,
781 APInt &DemandedLHS,
782 APInt &DemandedRHS) {
783 assert(VectorBitWidth >= 128 && "Vectors smaller than 128 bit not supported");
784 int NumLanes = VectorBitWidth / 128;
785 int NumElts = DemandedElts.getBitWidth();
786 int NumEltsPerLane = NumElts / NumLanes;
787 int HalfEltsPerLane = NumEltsPerLane / 2;
788
789 DemandedLHS = APInt::getZero(NumElts);
790 DemandedRHS = APInt::getZero(NumElts);
791
792 // Map DemandedElts to the horizontal operands.
793 for (int Idx = 0; Idx != NumElts; ++Idx) {
794 if (!DemandedElts[Idx])
795 continue;
796 int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
797 int LocalIdx = Idx % NumEltsPerLane;
798 if (LocalIdx < HalfEltsPerLane) {
799 DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);
800 } else {
801 LocalIdx -= HalfEltsPerLane;
802 DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);
803 }
804 }
805}
806
809 const TargetTransformInfo *TTI) {
810
811 // DemandedBits will give us every value's live-out bits. But we want
812 // to ensure no extra casts would need to be inserted, so every DAG
813 // of connected values must have the same minimum bitwidth.
819 SmallPtrSet<Instruction *, 4> InstructionSet;
821
822 // Determine the roots. We work bottom-up, from truncs or icmps.
823 bool SeenExtFromIllegalType = false;
824 for (auto *BB : Blocks)
825 for (auto &I : *BB) {
826 InstructionSet.insert(&I);
827
828 if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
829 !TTI->isTypeLegal(I.getOperand(0)->getType()))
830 SeenExtFromIllegalType = true;
831
832 // Only deal with non-vector integers up to 64-bits wide.
833 if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
834 !I.getType()->isVectorTy() &&
835 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
836 // Don't make work for ourselves. If we know the loaded type is legal,
837 // don't add it to the worklist.
838 if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
839 continue;
840
841 Worklist.push_back(&I);
842 Roots.insert(&I);
843 }
844 }
845 // Early exit.
846 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
847 return MinBWs;
848
849 // Now proceed breadth-first, unioning values together.
850 while (!Worklist.empty()) {
851 Instruction *I = Worklist.pop_back_val();
852 Value *Leader = ECs.getOrInsertLeaderValue(I);
853
854 if (!Visited.insert(I).second)
855 continue;
856
857 // If we encounter a type that is larger than 64 bits, we can't represent
858 // it so bail out.
859 if (DB.getDemandedBits(I).getBitWidth() > 64)
861
862 uint64_t V = DB.getDemandedBits(I).getZExtValue();
863 DBits[Leader] |= V;
864 DBits[I] = V;
865
866 // Casts, loads and instructions outside of our range terminate a chain
867 // successfully.
869 !InstructionSet.count(I))
870 continue;
871
872 // Unsafe casts terminate a chain unsuccessfully. We can't do anything
873 // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
874 // transform anything that relies on them.
876 !I->getType()->isIntegerTy()) {
877 DBits[Leader] |= ~0ULL;
878 continue;
879 }
880
881 // We don't modify the types of PHIs. Reductions will already have been
882 // truncated if possible, and inductions' sizes will have been chosen by
883 // indvars.
884 if (isa<PHINode>(I))
885 continue;
886
887 // Don't modify the types of operands of a call, as doing that would cause a
888 // signature mismatch.
889 if (isa<CallBase>(I))
890 continue;
891
892 if (DBits[Leader] == ~0ULL)
893 // All bits demanded, no point continuing.
894 continue;
895
896 for (Value *O : I->operands()) {
897 ECs.unionSets(Leader, O);
898 if (auto *OI = dyn_cast<Instruction>(O))
899 Worklist.push_back(OI);
900 }
901 }
902
903 // Now we've discovered all values, walk them to see if there are
904 // any users we didn't see. If there are, we can't optimize that
905 // chain.
906 for (auto &I : DBits)
907 for (auto *U : I.first->users())
908 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
909 DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
910
911 for (const auto &E : ECs) {
912 if (!E->isLeader())
913 continue;
914 uint64_t LeaderDemandedBits = 0;
915 for (Value *M : ECs.members(*E))
916 LeaderDemandedBits |= DBits[M];
917
918 uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);
919 // Round up to a power of 2
920 MinBW = llvm::bit_ceil(MinBW);
921
922 // We don't modify the types of PHIs. Reductions will already have been
923 // truncated if possible, and inductions' sizes will have been chosen by
924 // indvars.
925 // If we are required to shrink a PHI, abandon this entire equivalence class.
926 bool Abort = false;
927 for (Value *M : ECs.members(*E))
928 if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
929 Abort = true;
930 break;
931 }
932 if (Abort)
933 continue;
934
935 for (Value *M : ECs.members(*E)) {
936 auto *MI = dyn_cast<Instruction>(M);
937 if (!MI)
938 continue;
939 Type *Ty = M->getType();
940 if (Roots.count(MI))
941 Ty = MI->getOperand(0)->getType();
942
943 if (MinBW >= Ty->getScalarSizeInBits())
944 continue;
945
946 // If any of M's operands demand more bits than MinBW then M cannot be
947 // performed safely in MinBW.
948 auto *Call = dyn_cast<CallBase>(MI);
949 auto Ops = Call ? Call->args() : MI->operands();
950 if (any_of(Ops, [&DB, MinBW](Use &U) {
951 auto *CI = dyn_cast<ConstantInt>(U);
952 // For constants shift amounts, check if the shift would result in
953 // poison.
954 if (CI &&
956 U.getOperandNo() == 1)
957 return CI->uge(MinBW);
958 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
959 return bit_ceil(BW) > MinBW;
960 }))
961 continue;
962
963 MinBWs[MI] = MinBW;
964 }
965 }
966
967 return MinBWs;
968}
969
970/// Add all access groups in @p AccGroups to @p List.
971template <typename ListT>
972static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
973 // Interpret an access group as a list containing itself.
974 if (AccGroups->getNumOperands() == 0) {
975 assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
976 List.insert(AccGroups);
977 return;
978 }
979
980 for (const auto &AccGroupListOp : AccGroups->operands()) {
981 auto *Item = cast<MDNode>(AccGroupListOp.get());
982 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
983 List.insert(Item);
984 }
985}
986
987MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
988 if (!AccGroups1)
989 return AccGroups2;
990 if (!AccGroups2)
991 return AccGroups1;
992 if (AccGroups1 == AccGroups2)
993 return AccGroups1;
994
996 addToAccessGroupList(Union, AccGroups1);
997 addToAccessGroupList(Union, AccGroups2);
998
999 if (Union.size() == 0)
1000 return nullptr;
1001 if (Union.size() == 1)
1002 return cast<MDNode>(Union.front());
1003
1004 LLVMContext &Ctx = AccGroups1->getContext();
1005 return MDNode::get(Ctx, Union.getArrayRef());
1006}
1007
1009 const Instruction *Inst2) {
1010 bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
1011 bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
1012
1013 if (!MayAccessMem1 && !MayAccessMem2)
1014 return nullptr;
1015 if (!MayAccessMem1)
1016 return Inst2->getMetadata(LLVMContext::MD_access_group);
1017 if (!MayAccessMem2)
1018 return Inst1->getMetadata(LLVMContext::MD_access_group);
1019
1020 MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
1021 MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
1022 if (!MD1 || !MD2)
1023 return nullptr;
1024 if (MD1 == MD2)
1025 return MD1;
1026
1027 // Use set for scalable 'contains' check.
1028 SmallPtrSet<Metadata *, 4> AccGroupSet2;
1029 addToAccessGroupList(AccGroupSet2, MD2);
1030
1031 SmallVector<Metadata *, 4> Intersection;
1032 if (MD1->getNumOperands() == 0) {
1033 assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
1034 if (AccGroupSet2.count(MD1))
1035 Intersection.push_back(MD1);
1036 } else {
1037 for (const MDOperand &Node : MD1->operands()) {
1038 auto *Item = cast<MDNode>(Node.get());
1039 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
1040 if (AccGroupSet2.count(Item))
1041 Intersection.push_back(Item);
1042 }
1043 }
1044
1045 if (Intersection.size() == 0)
1046 return nullptr;
1047 if (Intersection.size() == 1)
1048 return cast<MDNode>(Intersection.front());
1049
1050 LLVMContext &Ctx = Inst1->getContext();
1051 return MDNode::get(Ctx, Intersection);
1052}
1053
1054/// Add metadata from \p Inst to \p Metadata, if it can be preserved after
1055/// vectorization.
1057 Instruction *Inst,
1058 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata) {
1060 static const unsigned SupportedIDs[] = {
1061 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1062 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
1063 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
1064 LLVMContext::MD_access_group, LLVMContext::MD_mmra};
1065
1066 // Remove any unsupported metadata kinds from Metadata.
1067 for (unsigned Idx = 0; Idx != Metadata.size();) {
1068 if (is_contained(SupportedIDs, Metadata[Idx].first)) {
1069 ++Idx;
1070 } else {
1071 // Swap element to end and remove it.
1072 std::swap(Metadata[Idx], Metadata.back());
1073 Metadata.pop_back();
1074 }
1075 }
1076}
1077
1078/// \returns \p I after propagating metadata from \p VL.
1080 if (VL.empty())
1081 return Inst;
1084
1085 for (auto &[Kind, MD] : Metadata) {
1086 // Skip MMRA metadata if the instruction cannot have it.
1087 if (Kind == LLVMContext::MD_mmra && !canInstructionHaveMMRAs(*Inst))
1088 continue;
1089
1090 for (int J = 1, E = VL.size(); MD && J != E; ++J) {
1091 const Instruction *IJ = cast<Instruction>(VL[J]);
1092 MDNode *IMD = IJ->getMetadata(Kind);
1093
1094 switch (Kind) {
1095 case LLVMContext::MD_mmra: {
1096 MD = MMRAMetadata::combine(Inst->getContext(), MD, IMD);
1097 break;
1098 }
1099 case LLVMContext::MD_tbaa:
1100 MD = MDNode::getMostGenericTBAA(MD, IMD);
1101 break;
1102 case LLVMContext::MD_alias_scope:
1104 break;
1105 case LLVMContext::MD_fpmath:
1106 MD = MDNode::getMostGenericFPMath(MD, IMD);
1107 break;
1108 case LLVMContext::MD_noalias:
1109 case LLVMContext::MD_nontemporal:
1110 case LLVMContext::MD_invariant_load:
1111 MD = MDNode::intersect(MD, IMD);
1112 break;
1113 case LLVMContext::MD_access_group:
1114 MD = intersectAccessGroups(Inst, IJ);
1115 break;
1116 default:
1117 llvm_unreachable("unhandled metadata");
1118 }
1119 }
1120
1121 Inst->setMetadata(Kind, MD);
1122 }
1123
1124 return Inst;
1125}
1126
1127Constant *
1129 const InterleaveGroup<Instruction> &Group) {
1130 // All 1's means mask is not needed.
1131 if (Group.isFull())
1132 return nullptr;
1133
1134 // TODO: support reversed access.
1135 assert(!Group.isReverse() && "Reversed group not supported.");
1136
1138 for (unsigned i = 0; i < VF; i++)
1139 for (unsigned j = 0; j < Group.getFactor(); ++j) {
1140 unsigned HasMember = Group.getMember(j) ? 1 : 0;
1141 Mask.push_back(Builder.getInt1(HasMember));
1142 }
1143
1144 return ConstantVector::get(Mask);
1145}
1146
1148llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
1149 SmallVector<int, 16> MaskVec;
1150 for (unsigned i = 0; i < VF; i++)
1151 for (unsigned j = 0; j < ReplicationFactor; j++)
1152 MaskVec.push_back(i);
1153
1154 return MaskVec;
1155}
1156
1158 unsigned NumVecs) {
1160 for (unsigned i = 0; i < VF; i++)
1161 for (unsigned j = 0; j < NumVecs; j++)
1162 Mask.push_back(j * VF + i);
1163
1164 return Mask;
1165}
1166
1168llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
1170 for (unsigned i = 0; i < VF; i++)
1171 Mask.push_back(Start + i * Stride);
1172
1173 return Mask;
1174}
1175
1177 unsigned NumInts,
1178 unsigned NumUndefs) {
1180 for (unsigned i = 0; i < NumInts; i++)
1181 Mask.push_back(Start + i);
1182
1183 for (unsigned i = 0; i < NumUndefs; i++)
1184 Mask.push_back(-1);
1185
1186 return Mask;
1187}
1188
1190 unsigned NumElts) {
1191 // Avoid casts in the loop and make sure we have a reasonable number.
1192 int NumEltsSigned = NumElts;
1193 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
1194
1195 // If the mask chooses an element from operand 1, reduce it to choose from the
1196 // corresponding element of operand 0. Undef mask elements are unchanged.
1197 SmallVector<int, 16> UnaryMask;
1198 for (int MaskElt : Mask) {
1199 assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
1200 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1201 UnaryMask.push_back(UnaryElt);
1202 }
1203 return UnaryMask;
1204}
1205
1206/// A helper function for concatenating vectors. This function concatenates two
1207/// vectors having the same element type. If the second vector has fewer
1208/// elements than the first, it is padded with undefs.
1210 Value *V2) {
1211 VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
1212 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
1213 assert(VecTy1 && VecTy2 &&
1214 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1215 "Expect two vectors with the same element type");
1216
1217 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1218 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1219 assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
1220
1221 if (NumElts1 > NumElts2) {
1222 // Extend with UNDEFs.
1223 V2 = Builder.CreateShuffleVector(
1224 V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
1225 }
1226
1227 return Builder.CreateShuffleVector(
1228 V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
1229}
1230
1232 ArrayRef<Value *> Vecs) {
1233 unsigned NumVecs = Vecs.size();
1234 assert(NumVecs > 1 && "Should be at least two vectors");
1235
1237 ResList.append(Vecs.begin(), Vecs.end());
1238 do {
1240 for (unsigned i = 0; i < NumVecs - 1; i += 2) {
1241 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1242 assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
1243 "Only the last vector may have a different type");
1244
1245 TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
1246 }
1247
1248 // Push the last vector if the total number of vectors is odd.
1249 if (NumVecs % 2 != 0)
1250 TmpList.push_back(ResList[NumVecs - 1]);
1251
1252 ResList = TmpList;
1253 NumVecs = ResList.size();
1254 } while (NumVecs > 1);
1255
1256 return ResList[0];
1257}
1258
1260 assert(isa<VectorType>(Mask->getType()) &&
1261 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1262 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1263 1 &&
1264 "Mask must be a vector of i1");
1265
1266 auto *ConstMask = dyn_cast<Constant>(Mask);
1267 if (!ConstMask)
1268 return false;
1269 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1270 return true;
1271 if (isa<ScalableVectorType>(ConstMask->getType()))
1272 return false;
1273 for (unsigned
1274 I = 0,
1275 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1276 I != E; ++I) {
1277 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1278 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1279 continue;
1280 return false;
1281 }
1282 return true;
1283}
1284
1286 assert(isa<VectorType>(Mask->getType()) &&
1287 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1288 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1289 1 &&
1290 "Mask must be a vector of i1");
1291
1292 auto *ConstMask = dyn_cast<Constant>(Mask);
1293 if (!ConstMask)
1294 return false;
1295 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1296 return true;
1297 if (isa<ScalableVectorType>(ConstMask->getType()))
1298 return false;
1299 for (unsigned
1300 I = 0,
1301 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1302 I != E; ++I) {
1303 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1304 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1305 continue;
1306 return false;
1307 }
1308 return true;
1309}
1310
1312 assert(isa<VectorType>(Mask->getType()) &&
1313 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1314 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1315 1 &&
1316 "Mask must be a vector of i1");
1317
1318 auto *ConstMask = dyn_cast<Constant>(Mask);
1319 if (!ConstMask)
1320 return false;
1321 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1322 return true;
1323 if (isa<ScalableVectorType>(ConstMask->getType()))
1324 return false;
1325 for (unsigned
1326 I = 0,
1327 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1328 I != E; ++I) {
1329 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1330 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1331 return true;
1332 }
1333 return false;
1334}
1335
1336/// TODO: This is a lot like known bits, but for
1337/// vectors. Is there something we can common this with?
1339 assert(isa<FixedVectorType>(Mask->getType()) &&
1340 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1341 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1342 1 &&
1343 "Mask must be a fixed width vector of i1");
1344
1345 const unsigned VWidth =
1346 cast<FixedVectorType>(Mask->getType())->getNumElements();
1347 APInt DemandedElts = APInt::getAllOnes(VWidth);
1348 if (auto *CV = dyn_cast<ConstantVector>(Mask))
1349 for (unsigned i = 0; i < VWidth; i++)
1350 if (CV->getAggregateElement(i)->isNullValue())
1351 DemandedElts.clearBit(i);
1352 return DemandedElts;
1353}
1354
1355bool InterleavedAccessInfo::isStrided(int Stride) {
1356 unsigned Factor = std::abs(Stride);
1357 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
1358}
1359
1360void InterleavedAccessInfo::collectConstStrideAccesses(
1362 const DenseMap<Value*, const SCEV*> &Strides) {
1363 auto &DL = TheLoop->getHeader()->getDataLayout();
1364
1365 // Since it's desired that the load/store instructions be maintained in
1366 // "program order" for the interleaved access analysis, we have to visit the
1367 // blocks in the loop in reverse postorder (i.e., in a topological order).
1368 // Such an ordering will ensure that any load/store that may be executed
1369 // before a second load/store will precede the second load/store in
1370 // AccessStrideInfo.
1371 LoopBlocksDFS DFS(TheLoop);
1372 DFS.perform(LI);
1373 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
1374 for (auto &I : *BB) {
1376 if (!Ptr)
1377 continue;
1378 Type *ElementTy = getLoadStoreType(&I);
1379
1380 // Currently, codegen doesn't support cases where the type size doesn't
1381 // match the alloc size. Skip them for now.
1382 uint64_t Size = DL.getTypeAllocSize(ElementTy);
1383 if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
1384 continue;
1385
1386 // We don't check wrapping here because we don't know yet if Ptr will be
1387 // part of a full group or a group with gaps. Checking wrapping for all
1388 // pointers (even those that end up in groups with no gaps) will be overly
1389 // conservative. For full groups, wrapping should be ok since if we would
1390 // wrap around the address space we would do a memory access at nullptr
1391 // even without the transformation. The wrapping checks are therefore
1392 // deferred until after we've formed the interleaved groups.
1393 int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,
1394 /*Assume=*/true, /*ShouldCheckWrap=*/false)
1395 .value_or(0);
1396
1397 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1398 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
1400 }
1401}
1402
1403// Analyze interleaved accesses and collect them into interleaved load and
1404// store groups.
1405//
1406// When generating code for an interleaved load group, we effectively hoist all
1407// loads in the group to the location of the first load in program order. When
1408// generating code for an interleaved store group, we sink all stores to the
1409// location of the last store. This code motion can change the order of load
1410// and store instructions and may break dependences.
1411//
1412// The code generation strategy mentioned above ensures that we won't violate
1413// any write-after-read (WAR) dependences.
1414//
1415// E.g., for the WAR dependence: a = A[i]; // (1)
1416// A[i] = b; // (2)
1417//
1418// The store group of (2) is always inserted at or below (2), and the load
1419// group of (1) is always inserted at or above (1). Thus, the instructions will
1420// never be reordered. All other dependences are checked to ensure the
1421// correctness of the instruction reordering.
1422//
1423// The algorithm visits all memory accesses in the loop in bottom-up program
1424// order. Program order is established by traversing the blocks in the loop in
1425// reverse postorder when collecting the accesses.
1426//
1427// We visit the memory accesses in bottom-up order because it can simplify the
1428// construction of store groups in the presence of write-after-write (WAW)
1429// dependences.
1430//
1431// E.g., for the WAW dependence: A[i] = a; // (1)
1432// A[i] = b; // (2)
1433// A[i + 1] = c; // (3)
1434//
1435// We will first create a store group with (3) and (2). (1) can't be added to
1436// this group because it and (2) are dependent. However, (1) can be grouped
1437// with other accesses that may precede it in program order. Note that a
1438// bottom-up order does not imply that WAW dependences should not be checked.
1440 bool EnablePredicatedInterleavedMemAccesses) {
1441 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1442 const auto &Strides = LAI->getSymbolicStrides();
1443
1444 // Holds all accesses with a constant stride.
1446 collectConstStrideAccesses(AccessStrideInfo, Strides);
1447
1448 if (AccessStrideInfo.empty())
1449 return;
1450
1451 // Collect the dependences in the loop.
1452 collectDependences();
1453
1454 // Holds all interleaved store groups temporarily.
1456 // Holds all interleaved load groups temporarily.
1458 // Groups added to this set cannot have new members added.
1459 SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;
1460
1461 // Search in bottom-up program order for pairs of accesses (A and B) that can
1462 // form interleaved load or store groups. In the algorithm below, access A
1463 // precedes access B in program order. We initialize a group for B in the
1464 // outer loop of the algorithm, and then in the inner loop, we attempt to
1465 // insert each A into B's group if:
1466 //
1467 // 1. A and B have the same stride,
1468 // 2. A and B have the same memory object size, and
1469 // 3. A belongs in B's group according to its distance from B.
1470 //
1471 // Special care is taken to ensure group formation will not break any
1472 // dependences.
1473 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1474 BI != E; ++BI) {
1475 Instruction *B = BI->first;
1476 StrideDescriptor DesB = BI->second;
1477
1478 // Initialize a group for B if it has an allowable stride. Even if we don't
1479 // create a group for B, we continue with the bottom-up algorithm to ensure
1480 // we don't break any of B's dependences.
1481 InterleaveGroup<Instruction> *GroupB = nullptr;
1482 if (isStrided(DesB.Stride) &&
1483 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1484 GroupB = getInterleaveGroup(B);
1485 if (!GroupB) {
1486 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1487 << '\n');
1488 GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1489 if (B->mayWriteToMemory())
1490 StoreGroups.insert(GroupB);
1491 else
1492 LoadGroups.insert(GroupB);
1493 }
1494 }
1495
1496 for (auto AI = std::next(BI); AI != E; ++AI) {
1497 Instruction *A = AI->first;
1498 StrideDescriptor DesA = AI->second;
1499
1500 // Our code motion strategy implies that we can't have dependences
1501 // between accesses in an interleaved group and other accesses located
1502 // between the first and last member of the group. Note that this also
1503 // means that a group can't have more than one member at a given offset.
1504 // The accesses in a group can have dependences with other accesses, but
1505 // we must ensure we don't extend the boundaries of the group such that
1506 // we encompass those dependent accesses.
1507 //
1508 // For example, assume we have the sequence of accesses shown below in a
1509 // stride-2 loop:
1510 //
1511 // (1, 2) is a group | A[i] = a; // (1)
1512 // | A[i-1] = b; // (2) |
1513 // A[i-3] = c; // (3)
1514 // A[i] = d; // (4) | (2, 4) is not a group
1515 //
1516 // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1517 // but not with (4). If we did, the dependent access (3) would be within
1518 // the boundaries of the (2, 4) group.
1519 auto DependentMember = [&](InterleaveGroup<Instruction> *Group,
1520 StrideEntry *A) -> Instruction * {
1521 for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {
1522 Instruction *MemberOfGroupB = Group->getMember(Index);
1523 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1524 A, &*AccessStrideInfo.find(MemberOfGroupB)))
1525 return MemberOfGroupB;
1526 }
1527 return nullptr;
1528 };
1529
1530 auto GroupA = getInterleaveGroup(A);
1531 // If A is a load, dependencies are tolerable, there's nothing to do here.
1532 // If both A and B belong to the same (store) group, they are independent,
1533 // even if dependencies have not been recorded.
1534 // If both GroupA and GroupB are null, there's nothing to do here.
1535 if (A->mayWriteToMemory() && GroupA != GroupB) {
1536 Instruction *DependentInst = nullptr;
1537 // If GroupB is a load group, we have to compare AI against all
1538 // members of GroupB because if any load within GroupB has a dependency
1539 // on AI, we need to mark GroupB as complete and also release the
1540 // store GroupA (if A belongs to one). The former prevents incorrect
1541 // hoisting of load B above store A while the latter prevents incorrect
1542 // sinking of store A below load B.
1543 if (GroupB && LoadGroups.contains(GroupB))
1544 DependentInst = DependentMember(GroupB, &*AI);
1545 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1546 DependentInst = B;
1547
1548 if (DependentInst) {
1549 // A has a store dependence on B (or on some load within GroupB) and
1550 // is part of a store group. Release A's group to prevent illegal
1551 // sinking of A below B. A will then be free to form another group
1552 // with instructions that precede it.
1553 if (GroupA && StoreGroups.contains(GroupA)) {
1554 LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1555 "dependence between "
1556 << *A << " and " << *DependentInst << '\n');
1557 StoreGroups.remove(GroupA);
1558 releaseGroup(GroupA);
1559 }
1560 // If B is a load and part of an interleave group, no earlier loads
1561 // can be added to B's interleave group, because this would mean the
1562 // DependentInst would move across store A. Mark the interleave group
1563 // as complete.
1564 if (GroupB && LoadGroups.contains(GroupB)) {
1565 LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
1566 << " as complete.\n");
1567 CompletedLoadGroups.insert(GroupB);
1568 }
1569 }
1570 }
1571 if (CompletedLoadGroups.contains(GroupB)) {
1572 // Skip trying to add A to B, continue to look for other conflicting A's
1573 // in groups to be released.
1574 continue;
1575 }
1576
1577 // At this point, we've checked for illegal code motion. If either A or B
1578 // isn't strided, there's nothing left to do.
1579 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1580 continue;
1581
1582 // Ignore A if it's already in a group or isn't the same kind of memory
1583 // operation as B.
1584 // Note that mayReadFromMemory() isn't mutually exclusive to
1585 // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1586 // here, canVectorizeMemory() should have returned false - except for the
1587 // case we asked for optimization remarks.
1588 if (isInterleaved(A) ||
1589 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1590 (A->mayWriteToMemory() != B->mayWriteToMemory()))
1591 continue;
1592
1593 // Check rules 1 and 2. Ignore A if its stride or size is different from
1594 // that of B.
1595 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1596 continue;
1597
1598 // Ignore A if the memory object of A and B don't belong to the same
1599 // address space
1601 continue;
1602
1603 // Calculate the distance from A to B.
1604 const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1605 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1606 if (!DistToB)
1607 continue;
1608 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1609
1610 // Check rule 3. Ignore A if its distance to B is not a multiple of the
1611 // size.
1612 if (DistanceToB % static_cast<int64_t>(DesB.Size))
1613 continue;
1614
1615 // All members of a predicated interleave-group must have the same predicate,
1616 // and currently must reside in the same BB.
1617 BasicBlock *BlockA = A->getParent();
1618 BasicBlock *BlockB = B->getParent();
1619 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1620 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1621 continue;
1622
1623 // The index of A is the index of B plus A's distance to B in multiples
1624 // of the size.
1625 int IndexA =
1626 GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1627
1628 // Try to insert A into B's group.
1629 if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {
1630 LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1631 << " into the interleave group with" << *B
1632 << '\n');
1633 InterleaveGroupMap[A] = GroupB;
1634
1635 // Set the first load in program order as the insert position.
1636 if (A->mayReadFromMemory())
1637 GroupB->setInsertPos(A);
1638 }
1639 } // Iteration over A accesses.
1640 } // Iteration over B accesses.
1641
1642 auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
1643 int Index,
1644 const char *FirstOrLast) -> bool {
1645 Instruction *Member = Group->getMember(Index);
1646 assert(Member && "Group member does not exist");
1647 Value *MemberPtr = getLoadStorePointerOperand(Member);
1648 Type *AccessTy = getLoadStoreType(Member);
1649 if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,
1650 /*Assume=*/false, /*ShouldCheckWrap=*/true)
1651 .value_or(0))
1652 return false;
1653 LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
1654 << FirstOrLast
1655 << " group member potentially pointer-wrapping.\n");
1656 releaseGroup(Group);
1657 return true;
1658 };
1659
1660 // Remove interleaved groups with gaps whose memory
1661 // accesses may wrap around. We have to revisit the getPtrStride analysis,
1662 // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1663 // not check wrapping (see documentation there).
1664 // FORNOW we use Assume=false;
1665 // TODO: Change to Assume=true but making sure we don't exceed the threshold
1666 // of runtime SCEV assumptions checks (thereby potentially failing to
1667 // vectorize altogether).
1668 // Additional optional optimizations:
1669 // TODO: If we are peeling the loop and we know that the first pointer doesn't
1670 // wrap then we can deduce that all pointers in the group don't wrap.
1671 // This means that we can forcefully peel the loop in order to only have to
1672 // check the first pointer for no-wrap. When we'll change to use Assume=true
1673 // we'll only need at most one runtime check per interleaved group.
1674 for (auto *Group : LoadGroups) {
1675 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1676 // load would wrap around the address space we would do a memory access at
1677 // nullptr even without the transformation.
1678 if (Group->isFull())
1679 continue;
1680
1681 // Case 2: If first and last members of the group don't wrap this implies
1682 // that all the pointers in the group don't wrap.
1683 // So we check only group member 0 (which is always guaranteed to exist),
1684 // and group member Factor - 1; If the latter doesn't exist we rely on
1685 // peeling (if it is a non-reversed access -- see Case 3).
1686 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1687 continue;
1688 if (Group->getMember(Group->getFactor() - 1))
1689 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, "last");
1690 else {
1691 // Case 3: A non-reversed interleaved load group with gaps: We need
1692 // to execute at least one scalar epilogue iteration. This will ensure
1693 // we don't speculatively access memory out-of-bounds. We only need
1694 // to look for a member at index factor - 1, since every group must have
1695 // a member at index zero.
1696 if (Group->isReverse()) {
1697 LLVM_DEBUG(
1698 dbgs() << "LV: Invalidate candidate interleaved group due to "
1699 "a reverse access with gaps.\n");
1700 releaseGroup(Group);
1701 continue;
1702 }
1703 LLVM_DEBUG(
1704 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1705 RequiresScalarEpilogue = true;
1706 }
1707 }
1708
1709 for (auto *Group : StoreGroups) {
1710 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1711 // store would wrap around the address space we would do a memory access at
1712 // nullptr even without the transformation.
1713 if (Group->isFull())
1714 continue;
1715
1716 // Interleave-store-group with gaps is implemented using masked wide store.
1717 // Remove interleaved store groups with gaps if
1718 // masked-interleaved-accesses are not enabled by the target.
1719 if (!EnablePredicatedInterleavedMemAccesses) {
1720 LLVM_DEBUG(
1721 dbgs() << "LV: Invalidate candidate interleaved store group due "
1722 "to gaps.\n");
1723 releaseGroup(Group);
1724 continue;
1725 }
1726
1727 // Case 2: If first and last members of the group don't wrap this implies
1728 // that all the pointers in the group don't wrap.
1729 // So we check only group member 0 (which is always guaranteed to exist),
1730 // and the last group member. Case 3 (scalar epilog) is not relevant for
1731 // stores with gaps, which are implemented with masked-store (rather than
1732 // speculative access, as in loads).
1733 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1734 continue;
1735 for (int Index = Group->getFactor() - 1; Index > 0; Index--)
1736 if (Group->getMember(Index)) {
1737 InvalidateGroupIfMemberMayWrap(Group, Index, "last");
1738 break;
1739 }
1740 }
1741}
1742
1744 // If no group had triggered the requirement to create an epilogue loop,
1745 // there is nothing to do.
1747 return;
1748
1749 // Release groups requiring scalar epilogues. Note that this also removes them
1750 // from InterleaveGroups.
1751 bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {
1752 if (!Group->requiresScalarEpilogue())
1753 return false;
1754 LLVM_DEBUG(
1755 dbgs()
1756 << "LV: Invalidate candidate interleaved group due to gaps that "
1757 "require a scalar epilogue (not allowed under optsize) and cannot "
1758 "be masked (not enabled). \n");
1759 releaseGroupWithoutRemovingFromSet(Group);
1760 return true;
1761 });
1762 assert(ReleasedGroup && "At least one group must be invalidated, as a "
1763 "scalar epilogue was required");
1764 (void)ReleasedGroup;
1765 RequiresScalarEpilogue = false;
1766}
1767
1768template <typename InstT>
1769void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1770 llvm_unreachable("addMetadata can only be used for Instruction");
1771}
1772
1773namespace llvm {
1774template <>
1779} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
Get the first element.
Definition ArrayRef.h:144
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
This represents a collection of equivalence classes and supports three efficient operations: insert a...
iterator_range< member_iterator > members(const ECValue &ECV) const
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
Return the leader for the specified value that is in the set.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
Merge the two equivalence sets for the specified values, inserting them if they do not already exist ...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
bool isReverse() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Metadata node.
Definition Metadata.h:1075
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1437
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1567
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1445
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Definition Metadata.h:1239
Tracking metadata reference owned by Metadata.
Definition Metadata.h:897
static LLVM_ABI MDNode * combine(LLVMContext &Ctx, const MMRAMetadata &A, const MMRAMetadata &B)
Combines A and B according to MMRA semantics.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
iterator find(const KeyT &Key)
Definition MapVector.h:156
bool empty() const
Definition MapVector.h:79
reverse_iterator rend()
Definition MapVector.h:76
reverse_iterator rbegin()
Definition MapVector.h:72
Root of the metadata hierarchy.
Definition Metadata.h:64
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a constant integer value.
const APInt & getAPInt() const
bool remove(const value_type &X)
Remove an item from the set vector.
Definition SetVector.h:181
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
ArrayRef< Type * > subtypes() const
Definition Type.h:381
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
static LLVM_ABI bool isVPCast(Intrinsic::ID ID)
static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isTriviallyScalarizable(ID id)
Returns true if the intrinsic is trivially scalarizable.
LLVM_ABI bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
unsigned M1(unsigned Val)
Definition VE.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially scalarizable.
LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned M0(unsigned Val)
Definition VE.h:376
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
Definition STLExtras.h:1408
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2165
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876