LLVM  15.0.0git
X86ShuffleDecode.cpp
Go to the documentation of this file.
1 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Define several functions to decode x86 specific shuffle semantics into a
10 // generic vector mask.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86ShuffleDecode.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallVector.h"
19 
20 //===----------------------------------------------------------------------===//
21 // Vector Mask Decoding
22 //===----------------------------------------------------------------------===//
23 
24 namespace llvm {
25 
26 void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
27  // Defaults the copying the dest value.
28  ShuffleMask.push_back(0);
29  ShuffleMask.push_back(1);
30  ShuffleMask.push_back(2);
31  ShuffleMask.push_back(3);
32 
33  // Decode the immediate.
34  unsigned ZMask = Imm & 15;
35  unsigned CountD = (Imm >> 4) & 3;
36  unsigned CountS = (Imm >> 6) & 3;
37 
38  // CountS selects which input element to use.
39  unsigned InVal = 4 + CountS;
40  // CountD specifies which element of destination to update.
41  ShuffleMask[CountD] = InVal;
42  // ZMask zaps values, potentially overriding the CountD elt.
43  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
44  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
45  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
46  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
47 }
48 
49 void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len,
50  SmallVectorImpl<int> &ShuffleMask) {
51  assert((Idx + Len) <= NumElts && "Insertion out of range");
52 
53  for (unsigned i = 0; i != NumElts; ++i)
54  ShuffleMask.push_back(i);
55  for (unsigned i = 0; i != Len; ++i)
56  ShuffleMask[Idx + i] = NumElts + i;
57 }
58 
59 // <3,1> or <6,7,2,3>
60 void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
61  for (unsigned i = NElts / 2; i != NElts; ++i)
62  ShuffleMask.push_back(NElts + i);
63 
64  for (unsigned i = NElts / 2; i != NElts; ++i)
65  ShuffleMask.push_back(i);
66 }
67 
68 // <0,2> or <0,1,4,5>
69 void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
70  for (unsigned i = 0; i != NElts / 2; ++i)
71  ShuffleMask.push_back(i);
72 
73  for (unsigned i = 0; i != NElts / 2; ++i)
74  ShuffleMask.push_back(NElts + i);
75 }
76 
77 void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
78  for (int i = 0, e = NumElts / 2; i < e; ++i) {
79  ShuffleMask.push_back(2 * i);
80  ShuffleMask.push_back(2 * i);
81  }
82 }
83 
84 void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
85  for (int i = 0, e = NumElts / 2; i < e; ++i) {
86  ShuffleMask.push_back(2 * i + 1);
87  ShuffleMask.push_back(2 * i + 1);
88  }
89 }
90 
91 void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
92  const unsigned NumLaneElts = 2;
93 
94  for (unsigned l = 0; l < NumElts; l += NumLaneElts)
95  for (unsigned i = 0; i < NumLaneElts; ++i)
96  ShuffleMask.push_back(l);
97 }
98 
99 void DecodePSLLDQMask(unsigned NumElts, unsigned Imm,
100  SmallVectorImpl<int> &ShuffleMask) {
101  const unsigned NumLaneElts = 16;
102 
103  for (unsigned l = 0; l < NumElts; l += NumLaneElts)
104  for (unsigned i = 0; i < NumLaneElts; ++i) {
105  int M = SM_SentinelZero;
106  if (i >= Imm) M = i - Imm + l;
107  ShuffleMask.push_back(M);
108  }
109 }
110 
111 void DecodePSRLDQMask(unsigned NumElts, unsigned Imm,
112  SmallVectorImpl<int> &ShuffleMask) {
113  const unsigned NumLaneElts = 16;
114 
115  for (unsigned l = 0; l < NumElts; l += NumLaneElts)
116  for (unsigned i = 0; i < NumLaneElts; ++i) {
117  unsigned Base = i + Imm;
118  int M = Base + l;
119  if (Base >= NumLaneElts) M = SM_SentinelZero;
120  ShuffleMask.push_back(M);
121  }
122 }
123 
124 void DecodePALIGNRMask(unsigned NumElts, unsigned Imm,
125  SmallVectorImpl<int> &ShuffleMask) {
126  const unsigned NumLaneElts = 16;
127 
128  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
129  for (unsigned i = 0; i != NumLaneElts; ++i) {
130  unsigned Base = i + Imm;
131  // if i+imm is out of this lane then we actually need the other source
132  if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
133  ShuffleMask.push_back(Base + l);
134  }
135  }
136 }
137 
138 void DecodeVALIGNMask(unsigned NumElts, unsigned Imm,
139  SmallVectorImpl<int> &ShuffleMask) {
140  // Not all bits of the immediate are used so mask it.
141  assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2");
142  Imm = Imm & (NumElts - 1);
143  for (unsigned i = 0; i != NumElts; ++i)
144  ShuffleMask.push_back(i + Imm);
145 }
146 
147 void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
148  SmallVectorImpl<int> &ShuffleMask) {
149  unsigned Size = NumElts * ScalarBits;
150  unsigned NumLanes = Size / 128;
151  if (NumLanes == 0) NumLanes = 1; // Handle MMX
152  unsigned NumLaneElts = NumElts / NumLanes;
153 
154  uint32_t SplatImm = (Imm & 0xff) * 0x01010101;
155  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
156  for (unsigned i = 0; i != NumLaneElts; ++i) {
157  ShuffleMask.push_back(SplatImm % NumLaneElts + l);
158  SplatImm /= NumLaneElts;
159  }
160  }
161 }
162 
163 void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm,
164  SmallVectorImpl<int> &ShuffleMask) {
165  for (unsigned l = 0; l != NumElts; l += 8) {
166  unsigned NewImm = Imm;
167  for (unsigned i = 0, e = 4; i != e; ++i) {
168  ShuffleMask.push_back(l + i);
169  }
170  for (unsigned i = 4, e = 8; i != e; ++i) {
171  ShuffleMask.push_back(l + 4 + (NewImm & 3));
172  NewImm >>= 2;
173  }
174  }
175 }
176 
177 void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm,
178  SmallVectorImpl<int> &ShuffleMask) {
179  for (unsigned l = 0; l != NumElts; l += 8) {
180  unsigned NewImm = Imm;
181  for (unsigned i = 0, e = 4; i != e; ++i) {
182  ShuffleMask.push_back(l + (NewImm & 3));
183  NewImm >>= 2;
184  }
185  for (unsigned i = 4, e = 8; i != e; ++i) {
186  ShuffleMask.push_back(l + i);
187  }
188  }
189 }
190 
191 void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
192  unsigned NumHalfElts = NumElts / 2;
193 
194  for (unsigned l = 0; l != NumHalfElts; ++l)
195  ShuffleMask.push_back(l + NumHalfElts);
196  for (unsigned h = 0; h != NumHalfElts; ++h)
197  ShuffleMask.push_back(h);
198 }
199 
200 void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits,
201  unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
202  unsigned NumLaneElts = 128 / ScalarBits;
203 
204  unsigned NewImm = Imm;
205  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
206  // each half of a lane comes from different source
207  for (unsigned s = 0; s != NumElts * 2; s += NumElts) {
208  for (unsigned i = 0; i != NumLaneElts / 2; ++i) {
209  ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
210  NewImm /= NumLaneElts;
211  }
212  }
213  if (NumLaneElts == 4) NewImm = Imm; // reload imm
214  }
215 }
216 
217 void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits,
218  SmallVectorImpl<int> &ShuffleMask) {
219  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
220  // independently on 128-bit lanes.
221  unsigned NumLanes = (NumElts * ScalarBits) / 128;
222  if (NumLanes == 0) NumLanes = 1; // Handle MMX
223  unsigned NumLaneElts = NumElts / NumLanes;
224 
225  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
226  for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) {
227  ShuffleMask.push_back(i); // Reads from dest/src1
228  ShuffleMask.push_back(i + NumElts); // Reads from src/src2
229  }
230  }
231 }
232 
233 void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits,
234  SmallVectorImpl<int> &ShuffleMask) {
235  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
236  // independently on 128-bit lanes.
237  unsigned NumLanes = (NumElts * ScalarBits) / 128;
238  if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
239  unsigned NumLaneElts = NumElts / NumLanes;
240 
241  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
242  for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {
243  ShuffleMask.push_back(i); // Reads from dest/src1
244  ShuffleMask.push_back(i + NumElts); // Reads from src/src2
245  }
246  }
247 }
248 
249 void DecodeVectorBroadcast(unsigned NumElts,
250  SmallVectorImpl<int> &ShuffleMask) {
251  ShuffleMask.append(NumElts, 0);
252 }
253 
254 void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
255  SmallVectorImpl<int> &ShuffleMask) {
256  unsigned Scale = DstNumElts / SrcNumElts;
257 
258  for (unsigned i = 0; i != Scale; ++i)
259  for (unsigned j = 0; j != SrcNumElts; ++j)
260  ShuffleMask.push_back(j);
261 }
262 
263 void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize,
264  unsigned Imm,
265  SmallVectorImpl<int> &ShuffleMask) {
266  unsigned NumElementsInLane = 128 / ScalarSize;
267  unsigned NumLanes = NumElts / NumElementsInLane;
268 
269  for (unsigned l = 0; l != NumElts; l += NumElementsInLane) {
270  unsigned Index = (Imm % NumLanes) * NumElementsInLane;
271  Imm /= NumLanes; // Discard the bits we just used.
272  // We actually need the other source.
273  if (l >= (NumElts / 2))
274  Index += NumElts;
275  for (unsigned i = 0; i != NumElementsInLane; ++i)
276  ShuffleMask.push_back(Index + i);
277  }
278 }
279 
280 void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
281  SmallVectorImpl<int> &ShuffleMask) {
282  unsigned HalfSize = NumElts / 2;
283 
284  for (unsigned l = 0; l != 2; ++l) {
285  unsigned HalfMask = Imm >> (l * 4);
286  unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
287  for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
288  ShuffleMask.push_back((HalfMask & 8) ? SM_SentinelZero : (int)i);
289  }
290 }
291 
292 void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
293  SmallVectorImpl<int> &ShuffleMask) {
294  for (int i = 0, e = RawMask.size(); i < e; ++i) {
295  uint64_t M = RawMask[i];
296  if (UndefElts[i]) {
297  ShuffleMask.push_back(SM_SentinelUndef);
298  continue;
299  }
300  // For 256/512-bit vectors the base of the shuffle is the 128-bit
301  // subvector we're inside.
302  int Base = (i / 16) * 16;
303  // If the high bit (7) of the byte is set, the element is zeroed.
304  if (M & (1 << 7))
305  ShuffleMask.push_back(SM_SentinelZero);
306  else {
307  // Only the least significant 4 bits of the byte are used.
308  int Index = Base + (M & 0xf);
309  ShuffleMask.push_back(Index);
310  }
311  }
312 }
313 
314 void DecodeBLENDMask(unsigned NumElts, unsigned Imm,
315  SmallVectorImpl<int> &ShuffleMask) {
316  for (unsigned i = 0; i < NumElts; ++i) {
317  // If there are more than 8 elements in the vector, then any immediate blend
318  // mask wraps around.
319  unsigned Bit = i % 8;
320  ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElts + i : i);
321  }
322 }
323 
324 void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
325  SmallVectorImpl<int> &ShuffleMask) {
326  assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
327 
328  // VPPERM Operation
329  // Bits[4:0] - Byte Index (0 - 31)
330  // Bits[7:5] - Permute Operation
331  //
332  // Permute Operation:
333  // 0 - Source byte (no logical operation).
334  // 1 - Invert source byte.
335  // 2 - Bit reverse of source byte.
336  // 3 - Bit reverse of inverted source byte.
337  // 4 - 00h (zero - fill).
338  // 5 - FFh (ones - fill).
339  // 6 - Most significant bit of source byte replicated in all bit positions.
340  // 7 - Invert most significant bit of source byte and replicate in all bit positions.
341  for (int i = 0, e = RawMask.size(); i < e; ++i) {
342  if (UndefElts[i]) {
343  ShuffleMask.push_back(SM_SentinelUndef);
344  continue;
345  }
346 
347  uint64_t M = RawMask[i];
348  uint64_t PermuteOp = (M >> 5) & 0x7;
349  if (PermuteOp == 4) {
350  ShuffleMask.push_back(SM_SentinelZero);
351  continue;
352  }
353  if (PermuteOp != 0) {
354  ShuffleMask.clear();
355  return;
356  }
357 
358  uint64_t Index = M & 0x1F;
359  ShuffleMask.push_back((int)Index);
360  }
361 }
362 
363 void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
364  SmallVectorImpl<int> &ShuffleMask) {
365  for (unsigned l = 0; l != NumElts; l += 4)
366  for (unsigned i = 0; i != 4; ++i)
367  ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3));
368 }
369 
370 void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
371  unsigned NumDstElts, bool IsAnyExtend,
372  SmallVectorImpl<int> &ShuffleMask) {
373  unsigned Scale = DstScalarBits / SrcScalarBits;
374  assert(SrcScalarBits < DstScalarBits &&
375  "Expected zero extension mask to increase scalar size");
376 
377  int Sentinel = IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero;
378  for (unsigned i = 0; i != NumDstElts; i++) {
379  ShuffleMask.push_back(i);
380  ShuffleMask.append(Scale - 1, Sentinel);
381  }
382 }
383 
384 void DecodeZeroMoveLowMask(unsigned NumElts,
385  SmallVectorImpl<int> &ShuffleMask) {
386  ShuffleMask.push_back(0);
387  ShuffleMask.append(NumElts - 1, SM_SentinelZero);
388 }
389 
390 void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad,
391  SmallVectorImpl<int> &ShuffleMask) {
392  // First element comes from the first element of second source.
393  // Remaining elements: Load zero extends / Move copies from first source.
394  ShuffleMask.push_back(NumElts);
395  for (unsigned i = 1; i < NumElts; i++)
396  ShuffleMask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
397 }
398 
399 void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
400  SmallVectorImpl<int> &ShuffleMask) {
401  unsigned HalfElts = NumElts / 2;
402 
403  // Only the bottom 6 bits are valid for each immediate.
404  Len &= 0x3F;
405  Idx &= 0x3F;
406 
407  // We can only decode this bit extraction instruction as a shuffle if both the
408  // length and index work with whole elements.
409  if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
410  return;
411 
412  // A length of zero is equivalent to a bit length of 64.
413  if (Len == 0)
414  Len = 64;
415 
416  // If the length + index exceeds the bottom 64 bits the result is undefined.
417  if ((Len + Idx) > 64) {
418  ShuffleMask.append(NumElts, SM_SentinelUndef);
419  return;
420  }
421 
422  // Convert index and index to work with elements.
423  Len /= EltSize;
424  Idx /= EltSize;
425 
426  // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
427  // elements of the lower 64-bits. The upper 64-bits are undefined.
428  for (int i = 0; i != Len; ++i)
429  ShuffleMask.push_back(i + Idx);
430  for (int i = Len; i != (int)HalfElts; ++i)
431  ShuffleMask.push_back(SM_SentinelZero);
432  for (int i = HalfElts; i != (int)NumElts; ++i)
433  ShuffleMask.push_back(SM_SentinelUndef);
434 }
435 
436 void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
437  SmallVectorImpl<int> &ShuffleMask) {
438  unsigned HalfElts = NumElts / 2;
439 
440  // Only the bottom 6 bits are valid for each immediate.
441  Len &= 0x3F;
442  Idx &= 0x3F;
443 
444  // We can only decode this bit insertion instruction as a shuffle if both the
445  // length and index work with whole elements.
446  if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
447  return;
448 
449  // A length of zero is equivalent to a bit length of 64.
450  if (Len == 0)
451  Len = 64;
452 
453  // If the length + index exceeds the bottom 64 bits the result is undefined.
454  if ((Len + Idx) > 64) {
455  ShuffleMask.append(NumElts, SM_SentinelUndef);
456  return;
457  }
458 
459  // Convert index and index to work with elements.
460  Len /= EltSize;
461  Idx /= EltSize;
462 
463  // INSERTQ: Extract lowest Len elements from lower half of second source and
464  // insert over first source starting at Idx element. The upper 64-bits are
465  // undefined.
466  for (int i = 0; i != Idx; ++i)
467  ShuffleMask.push_back(i);
468  for (int i = 0; i != Len; ++i)
469  ShuffleMask.push_back(i + NumElts);
470  for (int i = Idx + Len; i != (int)HalfElts; ++i)
471  ShuffleMask.push_back(i);
472  for (int i = HalfElts; i != (int)NumElts; ++i)
473  ShuffleMask.push_back(SM_SentinelUndef);
474 }
475 
476 void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
477  ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
478  SmallVectorImpl<int> &ShuffleMask) {
479  unsigned VecSize = NumElts * ScalarBits;
480  unsigned NumLanes = VecSize / 128;
481  unsigned NumEltsPerLane = NumElts / NumLanes;
482  assert((VecSize == 128 || VecSize == 256 || VecSize == 512) &&
483  "Unexpected vector size");
484  assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
485 
486  for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
487  if (UndefElts[i]) {
488  ShuffleMask.push_back(SM_SentinelUndef);
489  continue;
490  }
491  uint64_t M = RawMask[i];
492  M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
493  unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
494  ShuffleMask.push_back((int)(LaneOffset + M));
495  }
496 }
497 
498 void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
499  ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
500  SmallVectorImpl<int> &ShuffleMask) {
501  unsigned VecSize = NumElts * ScalarBits;
502  unsigned NumLanes = VecSize / 128;
503  unsigned NumEltsPerLane = NumElts / NumLanes;
504  assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size");
505  assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
506  assert((NumElts == RawMask.size()) && "Unexpected mask size");
507 
508  for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
509  if (UndefElts[i]) {
510  ShuffleMask.push_back(SM_SentinelUndef);
511  continue;
512  }
513 
514  // VPERMIL2 Operation.
515  // Bits[3] - Match Bit.
516  // Bits[2:1] - (Per Lane) PD Shuffle Mask.
517  // Bits[2:0] - (Per Lane) PS Shuffle Mask.
518  uint64_t Selector = RawMask[i];
519  unsigned MatchBit = (Selector >> 3) & 0x1;
520 
521  // M2Z[0:1] MatchBit
522  // 0Xb X Source selected by Selector index.
523  // 10b 0 Source selected by Selector index.
524  // 10b 1 Zero.
525  // 11b 0 Zero.
526  // 11b 1 Source selected by Selector index.
527  if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) {
528  ShuffleMask.push_back(SM_SentinelZero);
529  continue;
530  }
531 
532  int Index = i & ~(NumEltsPerLane - 1);
533  if (ScalarBits == 64)
534  Index += (Selector >> 1) & 0x1;
535  else
536  Index += Selector & 0x3;
537 
538  int Src = (Selector >> 2) & 0x1;
539  Index += Src * NumElts;
540  ShuffleMask.push_back(Index);
541  }
542 }
543 
544 void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
545  SmallVectorImpl<int> &ShuffleMask) {
546  uint64_t EltMaskSize = RawMask.size() - 1;
547  for (int i = 0, e = RawMask.size(); i != e; ++i) {
548  if (UndefElts[i]) {
549  ShuffleMask.push_back(SM_SentinelUndef);
550  continue;
551  }
552  uint64_t M = RawMask[i];
553  M &= EltMaskSize;
554  ShuffleMask.push_back((int)M);
555  }
556 }
557 
558 void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
559  SmallVectorImpl<int> &ShuffleMask) {
560  uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
561  for (int i = 0, e = RawMask.size(); i != e; ++i) {
562  if (UndefElts[i]) {
563  ShuffleMask.push_back(SM_SentinelUndef);
564  continue;
565  }
566  uint64_t M = RawMask[i];
567  M &= EltMaskSize;
568  ShuffleMask.push_back((int)M);
569  }
570 }
571 
572 } // namespace llvm
i
i
Definition: README.txt:29
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DecodePALIGNRMask
void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:124
llvm::DecodeEXTRQIMask
void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl< int > &ShuffleMask)
Decode a SSE4A EXTRQ instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:399
X86ShuffleDecode.h
llvm::DecodeUNPCKHMask
void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for unpckhps/unpckhpd and punpckh*.
Definition: X86ShuffleDecode.cpp:217
llvm::DecodeMOVSHDUPMask
void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:84
APInt.h
llvm::DecodePSHUFLWMask
void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshuflw.
Definition: X86ShuffleDecode.cpp:177
llvm::SM_SentinelUndef
@ SM_SentinelUndef
Definition: X86ShuffleDecode.h:28
llvm::DecodeINSERTPSMask
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a 128-bit INSERTPS instruction as a v4f32 shuffle mask.
Definition: X86ShuffleDecode.cpp:26
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::DecodeVPERMV3Mask
void DecodeVPERMV3Mask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
Definition: X86ShuffleDecode.cpp:558
llvm::DecodeMOVSLDUPMask
void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:77
llvm::DecodePSHUFBMask
void DecodePSHUFBMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a PSHUFB mask from a raw array of constants such as from BUILD_VECTOR.
Definition: X86ShuffleDecode.cpp:292
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:667
llvm::DecodePSWAPMask
void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decodes a PSWAPD 3DNow! instruction.
Definition: X86ShuffleDecode.cpp:191
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::DecodePSRLDQMask
void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:111
llvm::DecodeVPERMMask
void DecodeVPERMMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for VPERMQ/VPERMPD.
Definition: X86ShuffleDecode.cpp:363
l
This requires reassociating to forms of expressions that are already something that reassoc doesn t think about yet These two functions should generate the same code on big endian int * l
Definition: README.txt:100
llvm::DecodePSHUFHWMask
void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshufhw.
Definition: X86ShuffleDecode.cpp:163
llvm::DecodeVPPERMMask
void DecodeVPPERMMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPPERM mask from a raw array of constants such as from BUILD_VECTOR.
Definition: X86ShuffleDecode.cpp:324
llvm::DecodeMOVDDUPMask
void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:91
llvm::SM_SentinelZero
@ SM_SentinelZero
Definition: X86ShuffleDecode.h:28
llvm::DecodePSHUFMask
void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps.
Definition: X86ShuffleDecode.cpp:147
llvm::DecodeSHUFPMask
void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for shufp*.
Definition: X86ShuffleDecode.cpp:200
x2
gcc mainline compiles it x2(%rip)
llvm::DecodeInsertElementMask
void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:49
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
uint64_t
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
ArrayRef.h
llvm::DecodeVectorBroadcast
void DecodeVectorBroadcast(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decodes a broadcast of the first element of a vector.
Definition: X86ShuffleDecode.cpp:249
llvm::DecodeSubVectorBroadcast
void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, SmallVectorImpl< int > &ShuffleMask)
Decodes a broadcast of a subvector to a larger vector type.
Definition: X86ShuffleDecode.cpp:254
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::DecodeVPERMILPMask
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
Definition: X86ShuffleDecode.cpp:476
llvm::DecodeScalarMoveMask
void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, SmallVectorImpl< int > &ShuffleMask)
Decode a scalar float move instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:390
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::ArrayRef< uint64_t >
uint32_t
llvm::DecodeMOVHLPSMask
void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
Decode a MOVHLPS instruction as a v2f64/v4f32 shuffle mask.
Definition: X86ShuffleDecode.cpp:60
llvm::DecodeVPERM2X128Mask
void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:280
j
return j(j<< 16)
llvm::DecodeVPERMVMask
void DecodeVPERMVMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
Definition: X86ShuffleDecode.cpp:544
llvm::DecodeMOVLHPSMask
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
Decode a MOVLHPS instruction as a v2f64/v4f32 shuffle mask.
Definition: X86ShuffleDecode.cpp:69
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::DecodeVPERMIL2PMask
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
Definition: X86ShuffleDecode.cpp:498
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::DecodeINSERTQIMask
void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl< int > &ShuffleMask)
Decode a SSE4A INSERTQ instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:436
SmallVector.h
llvm::DecodePSLLDQMask
void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:99
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
llvm::SmallVectorImpl< int >
llvm::DecodeZeroMoveLowMask
void DecodeZeroMoveLowMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decode a move lower and zero upper instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:384
h
the multiplication has a latency of four as opposed to two cycles for the movl lea variant It appears gcc place string data with linkonce linkage in section coalesced instead of section coalesced Take a look at darwin h
Definition: README.txt:261
llvm::DecodeVALIGNMask
void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:138
llvm::DecodeBLENDMask
void DecodeBLENDMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a BLEND immediate mask into a shuffle mask.
Definition: X86ShuffleDecode.cpp:314
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::decodeVSHUF64x2FamilyMask
void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a shuffle packed values at 128-bit granularity (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) immed...
Definition: X86ShuffleDecode.cpp:263
llvm::DecodeUNPCKLMask
void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for unpcklps/unpcklpd and punpckl*.
Definition: X86ShuffleDecode.cpp:233
llvm::DecodeZeroExtendMask
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, unsigned NumDstElts, bool IsAnyExtend, SmallVectorImpl< int > &ShuffleMask)
Decode a zero extension instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:370