LLVM 23.0.0git
AArch64PerfectShuffle.h
Go to the documentation of this file.
1//===-- AArch64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares data for the optimal way to build a perfect shuffle using
10// AdvSIMD instructions. The data is generated by llvm-PerfectShuffle.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
15#define LLVM_LIB_TARGET_AARCH64_AARCH64PERFECTSHUFFLE_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLExtras.h"
19
20namespace llvm {
21
22extern const unsigned PerfectShuffleTable[6561 + 1];
23
25 assert(M.size() == 4 && "Expected a 4 entry perfect shuffle");
26
27 // Special case zero-cost nop copies, from either LHS or RHS.
28 if (llvm::all_of(llvm::enumerate(M), [](const auto &E) {
29 return E.value() < 0 || E.value() == (int)E.index();
30 }))
31 return 0;
32 if (llvm::all_of(llvm::enumerate(M), [](const auto &E) {
33 return E.value() < 0 || E.value() == (int)E.index() + 4;
34 }))
35 return 0;
36
37 // Get the four mask elementd from the 2 inputs. Perfect shuffles encode undef
38 // elements with value 8.
39 unsigned PFIndexes[4];
40 for (unsigned i = 0; i != 4; ++i) {
41 assert(M[i] < 8 && "Expected a maximum entry of 8 for shuffle mask");
42 if (M[i] < 0)
43 PFIndexes[i] = 8;
44 else
45 PFIndexes[i] = M[i];
46 }
47
48 // Compute the index in the perfect shuffle table.
49 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
50 PFIndexes[2] * 9 + PFIndexes[3];
51 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
52 // And extract the cost from the upper bits. The cost is encoded as Cost-1.
53 return (PFEntry >> 30) + 1;
54}
55
56/// Return true for zip1 or zip2 masks of the form:
57/// <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0, OperandOrderOut = 0) or
58/// <4, 12, 5, 13, 6, 14, 7, 15> (WhichResultOut = 1, OperandOrderOut = 0) or
59/// <8, 0, 9, 1, 10, 2, 11, 3> (WhichResultOut = 0, OperandOrderOut = 1) or
60/// <12, 4, 13, 5, 14, 6, 15, 7> (WhichResultOut = 1, OperandOrderOut = 1)
61inline bool isZIPMask(ArrayRef<int> M, unsigned NumElts,
62 unsigned &WhichResultOut, unsigned &OperandOrderOut) {
63 if (NumElts % 2 != 0)
64 return false;
65
66 // "Result" corresponds to "WhichResultOut", selecting between zip1 and zip2.
67 // "Order" corresponds to "OperandOrderOut", selecting the order of operands
68 // for the instruction (flipped or not).
69 bool Result0Order0 = true; // WhichResultOut = 0, OperandOrderOut = 0
70 bool Result1Order0 = true; // WhichResultOut = 1, OperandOrderOut = 0
71 bool Result0Order1 = true; // WhichResultOut = 0, OperandOrderOut = 1
72 bool Result1Order1 = true; // WhichResultOut = 1, OperandOrderOut = 1
73 // Check all elements match.
74 for (unsigned i = 0; i != NumElts; i += 2) {
75 if (M[i] >= 0) {
76 unsigned EvenElt = (unsigned)M[i];
77 if (EvenElt != i / 2)
78 Result0Order0 = false;
79 if (EvenElt != NumElts / 2 + i / 2)
80 Result1Order0 = false;
81 if (EvenElt != NumElts + i / 2)
82 Result0Order1 = false;
83 if (EvenElt != NumElts + NumElts / 2 + i / 2)
84 Result1Order1 = false;
85 }
86 if (M[i + 1] >= 0) {
87 unsigned OddElt = (unsigned)M[i + 1];
88 if (OddElt != NumElts + i / 2)
89 Result0Order0 = false;
90 if (OddElt != NumElts + NumElts / 2 + i / 2)
91 Result1Order0 = false;
92 if (OddElt != i / 2)
93 Result0Order1 = false;
94 if (OddElt != NumElts / 2 + i / 2)
95 Result1Order1 = false;
96 }
97 }
98
99 if (Result0Order0 + Result1Order0 + Result0Order1 + Result1Order1 != 1)
100 return false;
101
102 WhichResultOut = (Result0Order0 || Result0Order1) ? 0 : 1;
103 OperandOrderOut = (Result0Order0 || Result1Order0) ? 0 : 1;
104 return true;
105}
106
107/// Return true for uzp1 or uzp2 masks of the form:
108/// <0, 2, 4, 6, 8, 10, 12, 14> or
109/// <1, 3, 5, 7, 9, 11, 13, 15>
110inline bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
111 unsigned &WhichResultOut) {
112 // Check the first non-undef element for which half to use.
113 unsigned WhichResult = 2;
114 for (unsigned i = 0; i != NumElts; i++) {
115 if (M[i] >= 0) {
116 WhichResult = ((unsigned)M[i] == i * 2 ? 0 : 1);
117 break;
118 }
119 }
120 if (WhichResult == 2)
121 return false;
122
123 // Check all elements match.
124 for (unsigned i = 0; i != NumElts; ++i) {
125 if (M[i] < 0)
126 continue; // ignore UNDEF indices
127 if ((unsigned)M[i] != 2 * i + WhichResult)
128 return false;
129 }
130 WhichResultOut = WhichResult;
131 return true;
132}
133
134/// Return true for trn1 or trn2 masks of the form:
135/// <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0, OperandOrderOut = 0) or
136/// <1, 9, 3, 11, 5, 13, 7, 15> (WhichResultOut = 1, OperandOrderOut = 0) or
137/// <8, 0, 10, 2, 12, 4, 14, 6> (WhichResultOut = 0, OperandOrderOut = 1) or
138/// <9, 1, 11, 3, 13, 5, 15, 7> (WhichResultOut = 1, OperandOrderOut = 1) or
139inline bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
140 unsigned &WhichResultOut, unsigned &OperandOrderOut) {
141 if (NumElts % 2 != 0)
142 return false;
143
144 // "Result" corresponds to "WhichResultOut", selecting between trn1 and trn2.
145 // "Order" corresponds to "OperandOrderOut", selecting the order of operands
146 // for the instruction (flipped or not).
147 bool Result0Order0 = true; // WhichResultOut = 0, OperandOrderOut = 0
148 bool Result1Order0 = true; // WhichResultOut = 1, OperandOrderOut = 0
149 bool Result0Order1 = true; // WhichResultOut = 0, OperandOrderOut = 1
150 bool Result1Order1 = true; // WhichResultOut = 1, OperandOrderOut = 1
151 // Check all elements match.
152 for (unsigned i = 0; i != NumElts; i += 2) {
153 if (M[i] >= 0) {
154 unsigned EvenElt = (unsigned)M[i];
155 if (EvenElt != i)
156 Result0Order0 = false;
157 if (EvenElt != i + 1)
158 Result1Order0 = false;
159 if (EvenElt != NumElts + i)
160 Result0Order1 = false;
161 if (EvenElt != NumElts + i + 1)
162 Result1Order1 = false;
163 }
164 if (M[i + 1] >= 0) {
165 unsigned OddElt = (unsigned)M[i + 1];
166 if (OddElt != NumElts + i)
167 Result0Order0 = false;
168 if (OddElt != NumElts + i + 1)
169 Result1Order0 = false;
170 if (OddElt != i)
171 Result0Order1 = false;
172 if (OddElt != i + 1)
173 Result1Order1 = false;
174 }
175 }
176
177 if (Result0Order0 + Result1Order0 + Result0Order1 + Result1Order1 != 1)
178 return false;
179
180 WhichResultOut = (Result0Order0 || Result0Order1) ? 0 : 1;
181 OperandOrderOut = (Result0Order0 || Result1Order0) ? 0 : 1;
182 return true;
183}
184
185/// isREVMask - Check if a vector shuffle corresponds to a REV
186/// instruction with the specified blocksize. (The order of the elements
187/// within each block of the vector is reversed.)
188inline bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
189 unsigned BlockSize) {
190 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64 ||
191 BlockSize == 128) &&
192 "Only possible block sizes for REV are: 16, 32, 64, 128");
193
194 unsigned BlockElts = M[0] + 1;
195 // If the first shuffle index is UNDEF, be optimistic.
196 if (M[0] < 0)
197 BlockElts = BlockSize / EltSize;
198
199 if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize)
200 return false;
201
202 for (unsigned i = 0; i < NumElts; ++i) {
203 if (M[i] < 0)
204 continue; // ignore UNDEF indices
205 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
206 return false;
207 }
208
209 return true;
210}
211
212/// isDUPQMask - matches a splat of equivalent lanes within segments of a given
213/// number of elements.
214inline std::optional<unsigned> isDUPQMask(ArrayRef<int> Mask, unsigned Segments,
215 unsigned SegmentSize) {
216 unsigned Lane = unsigned(Mask[0]);
217
218 // Make sure there's no size changes.
219 if (SegmentSize * Segments != Mask.size())
220 return std::nullopt;
221
222 // Check the first index corresponds to one of the lanes in the first segment.
223 if (Lane >= SegmentSize)
224 return std::nullopt;
225
226 // Check that all lanes match the first, adjusted for segment.
227 // Undef/poison lanes (<0) are also accepted.
228 if (all_of(enumerate(Mask), [&](auto P) {
229 const unsigned SegmentIndex = P.index() / SegmentSize;
230 return P.value() < 0 ||
231 unsigned(P.value()) == Lane + SegmentIndex * SegmentSize;
232 }))
233 return Lane;
234
235 return std::nullopt;
236}
237
238/// isDUPFirstSegmentMask - matches a splat of the first 128b segment.
239inline bool isDUPFirstSegmentMask(ArrayRef<int> Mask, unsigned Segments,
240 unsigned SegmentSize) {
241 // Make sure there's no size changes.
242 if (SegmentSize * Segments != Mask.size())
243 return false;
244
245 // Check that all lanes refer to the equivalent lane in the first segment.
246 // Undef/poison lanes (<0) are also accepted.
247 return all_of(enumerate(Mask), [&](auto P) {
248 const unsigned IndexWithinSegment = P.index() % SegmentSize;
249 return P.value() < 0 || unsigned(P.value()) == IndexWithinSegment;
250 });
251}
252
253} // namespace llvm
254
255#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
static const int BlockSize
Definition TarWriter.cpp:33
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
This is an optimization pass for GlobalISel generic memory operations.
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
const unsigned PerfectShuffleTable[6561+1]
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...