LLVM  12.0.0git
X86InterleavedAccess.cpp
Go to the documentation of this file.
1 //===- X86InterleavedAccess.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file contains the X86 implementation of the interleaved accesses
11 /// optimization generating X86-specific instructions/intrinsics for
12 /// interleaved access groups.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "X86ISelLowering.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/IR/Value.h"
30 #include "llvm/Support/Casting.h"
32 #include <algorithm>
33 #include <cassert>
34 #include <cmath>
35 #include <cstdint>
36 
37 using namespace llvm;
38 
39 namespace {
40 
41 /// This class holds necessary information to represent an interleaved
42 /// access group and supports utilities to lower the group into
43 /// X86-specific instructions/intrinsics.
44 /// E.g. A group of interleaving access loads (Factor = 2; accessing every
45 /// other element)
46 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
47 /// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
48 /// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
49 class X86InterleavedAccessGroup {
50  /// Reference to the wide-load instruction of an interleaved access
51  /// group.
52  Instruction *const Inst;
53 
54  /// Reference to the shuffle(s), consumer(s) of the (load) 'Inst'.
56 
57  /// Reference to the starting index of each user-shuffle.
58  ArrayRef<unsigned> Indices;
59 
60  /// Reference to the interleaving stride in terms of elements.
61  const unsigned Factor;
62 
63  /// Reference to the underlying target.
64  const X86Subtarget &Subtarget;
65 
66  const DataLayout &DL;
67 
69 
70  /// Breaks down a vector \p 'Inst' of N elements into \p NumSubVectors
71  /// sub vectors of type \p T. Returns the sub-vectors in \p DecomposedVectors.
72  void decompose(Instruction *Inst, unsigned NumSubVectors, FixedVectorType *T,
73  SmallVectorImpl<Instruction *> &DecomposedVectors);
74 
75  /// Performs matrix transposition on a 4x4 matrix \p InputVectors and
76  /// returns the transposed-vectors in \p TransposedVectors.
77  /// E.g.
78  /// InputVectors:
79  /// In-V0 = p1, p2, p3, p4
80  /// In-V1 = q1, q2, q3, q4
81  /// In-V2 = r1, r2, r3, r4
82  /// In-V3 = s1, s2, s3, s4
83  /// OutputVectors:
84  /// Out-V0 = p1, q1, r1, s1
85  /// Out-V1 = p2, q2, r2, s2
86  /// Out-V2 = p3, q3, r3, s3
87  /// Out-V3 = P4, q4, r4, s4
88  void transpose_4x4(ArrayRef<Instruction *> InputVectors,
89  SmallVectorImpl<Value *> &TransposedMatrix);
90  void interleave8bitStride4(ArrayRef<Instruction *> InputVectors,
91  SmallVectorImpl<Value *> &TransposedMatrix,
92  unsigned NumSubVecElems);
93  void interleave8bitStride4VF8(ArrayRef<Instruction *> InputVectors,
94  SmallVectorImpl<Value *> &TransposedMatrix);
95  void interleave8bitStride3(ArrayRef<Instruction *> InputVectors,
96  SmallVectorImpl<Value *> &TransposedMatrix,
97  unsigned NumSubVecElems);
98  void deinterleave8bitStride3(ArrayRef<Instruction *> InputVectors,
99  SmallVectorImpl<Value *> &TransposedMatrix,
100  unsigned NumSubVecElems);
101 
102 public:
103  /// In order to form an interleaved access group X86InterleavedAccessGroup
104  /// requires a wide-load instruction \p 'I', a group of interleaved-vectors
105  /// \p Shuffs, reference to the first indices of each interleaved-vector
106  /// \p 'Ind' and the interleaving stride factor \p F. In order to generate
107  /// X86-specific instructions/intrinsics it also requires the underlying
108  /// target information \p STarget.
109  explicit X86InterleavedAccessGroup(Instruction *I,
111  ArrayRef<unsigned> Ind, const unsigned F,
112  const X86Subtarget &STarget,
113  IRBuilder<> &B)
114  : Inst(I), Shuffles(Shuffs), Indices(Ind), Factor(F), Subtarget(STarget),
115  DL(Inst->getModule()->getDataLayout()), Builder(B) {}
116 
117  /// Returns true if this interleaved access group can be lowered into
118  /// x86-specific instructions/intrinsics, false otherwise.
119  bool isSupported() const;
120 
121  /// Lowers this interleaved access group into X86-specific
122  /// instructions/intrinsics.
123  bool lowerIntoOptimizedSequence();
124 };
125 
126 } // end anonymous namespace
127 
128 bool X86InterleavedAccessGroup::isSupported() const {
129  VectorType *ShuffleVecTy = Shuffles[0]->getType();
130  Type *ShuffleEltTy = ShuffleVecTy->getElementType();
131  unsigned ShuffleElemSize = DL.getTypeSizeInBits(ShuffleEltTy);
132  unsigned WideInstSize;
133 
134  // Currently, lowering is supported for the following vectors:
135  // Stride 4:
136  // 1. Store and load of 4-element vectors of 64 bits on AVX.
137  // 2. Store of 16/32-element vectors of 8 bits on AVX.
138  // Stride 3:
139  // 1. Load of 16/32-element vectors of 8 bits on AVX.
140  if (!Subtarget.hasAVX() || (Factor != 4 && Factor != 3))
141  return false;
142 
143  if (isa<LoadInst>(Inst)) {
144  WideInstSize = DL.getTypeSizeInBits(Inst->getType());
145  if (cast<LoadInst>(Inst)->getPointerAddressSpace())
146  return false;
147  } else
148  WideInstSize = DL.getTypeSizeInBits(Shuffles[0]->getType());
149 
150  // We support shuffle represents stride 4 for byte type with size of
151  // WideInstSize.
152  if (ShuffleElemSize == 64 && WideInstSize == 1024 && Factor == 4)
153  return true;
154 
155  if (ShuffleElemSize == 8 && isa<StoreInst>(Inst) && Factor == 4 &&
156  (WideInstSize == 256 || WideInstSize == 512 || WideInstSize == 1024 ||
157  WideInstSize == 2048))
158  return true;
159 
160  if (ShuffleElemSize == 8 && Factor == 3 &&
161  (WideInstSize == 384 || WideInstSize == 768 || WideInstSize == 1536))
162  return true;
163 
164  return false;
165 }
166 
167 void X86InterleavedAccessGroup::decompose(
168  Instruction *VecInst, unsigned NumSubVectors, FixedVectorType *SubVecTy,
169  SmallVectorImpl<Instruction *> &DecomposedVectors) {
170  assert((isa<LoadInst>(VecInst) || isa<ShuffleVectorInst>(VecInst)) &&
171  "Expected Load or Shuffle");
172 
173  Type *VecWidth = VecInst->getType();
174  (void)VecWidth;
175  assert(VecWidth->isVectorTy() &&
176  DL.getTypeSizeInBits(VecWidth) >=
177  DL.getTypeSizeInBits(SubVecTy) * NumSubVectors &&
178  "Invalid Inst-size!!!");
179 
180  if (auto *SVI = dyn_cast<ShuffleVectorInst>(VecInst)) {
181  Value *Op0 = SVI->getOperand(0);
182  Value *Op1 = SVI->getOperand(1);
183 
184  // Generate N(= NumSubVectors) shuffles of T(= SubVecTy) type.
185  for (unsigned i = 0; i < NumSubVectors; ++i)
186  DecomposedVectors.push_back(
187  cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
188  Op0, Op1,
189  createSequentialMask(Indices[i], SubVecTy->getNumElements(),
190  0))));
191  return;
192  }
193 
194  // Decompose the load instruction.
195  LoadInst *LI = cast<LoadInst>(VecInst);
196  Type *VecBaseTy, *VecBasePtrTy;
197  Value *VecBasePtr;
198  unsigned int NumLoads = NumSubVectors;
199  // In the case of stride 3 with a vector of 32 elements load the information
200  // in the following way:
201  // [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
202  unsigned VecLength = DL.getTypeSizeInBits(VecWidth);
203  if (VecLength == 768 || VecLength == 1536) {
204  VecBaseTy = FixedVectorType::get(Type::getInt8Ty(LI->getContext()), 16);
205  VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
206  VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
207  NumLoads = NumSubVectors * (VecLength / 384);
208  } else {
209  VecBaseTy = SubVecTy;
210  VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
211  VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
212  }
213  // Generate N loads of T type.
214  assert(VecBaseTy->getPrimitiveSizeInBits().isByteSized() &&
215  "VecBaseTy's size must be a multiple of 8");
216  const Align FirstAlignment = LI->getAlign();
217  const Align SubsequentAlignment = commonAlignment(
218  FirstAlignment, VecBaseTy->getPrimitiveSizeInBits().getFixedSize() / 8);
219  Align Alignment = FirstAlignment;
220  for (unsigned i = 0; i < NumLoads; i++) {
221  // TODO: Support inbounds GEP.
222  Value *NewBasePtr =
223  Builder.CreateGEP(VecBaseTy, VecBasePtr, Builder.getInt32(i));
224  Instruction *NewLoad =
225  Builder.CreateAlignedLoad(VecBaseTy, NewBasePtr, Alignment);
226  DecomposedVectors.push_back(NewLoad);
227  Alignment = SubsequentAlignment;
228  }
229 }
230 
231 // Changing the scale of the vector type by reducing the number of elements and
232 // doubling the scalar size.
233 static MVT scaleVectorType(MVT VT) {
234  unsigned ScalarSize = VT.getVectorElementType().getScalarSizeInBits() * 2;
235  return MVT::getVectorVT(MVT::getIntegerVT(ScalarSize),
236  VT.getVectorNumElements() / 2);
237 }
238 
239 static constexpr int Concat[] = {
240  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
241  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
242  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
243  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
244 
245 // genShuffleBland - Creates shuffle according to two vectors.This function is
246 // only works on instructions with lane inside 256 registers. According to
247 // the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The
248 // offset amount depends on the two integer, 'LowOffset' and 'HighOffset'.
249 // Where the 'LowOffset' refers to the first vector and the highOffset refers to
250 // the second vector.
251 // |a0....a5,b0....b4,c0....c4|a16..a21,b16..b20,c16..c20|
252 // |c5...c10,a5....a9,b5....b9|c21..c26,a22..a26,b21..b25|
253 // |b10..b15,c11..c15,a10..a15|b26..b31,c27..c31,a27..a31|
254 // For the sequence to work as a mirror to the load.
255 // We must consider the elements order as above.
256 // In this function we are combining two types of shuffles.
257 // The first one is vpshufed and the second is a type of "blend" shuffle.
258 // By computing the shuffle on a sequence of 16 elements(one lane) and add the
259 // correct offset. We are creating a vpsuffed + blend sequence between two
260 // shuffles.
262  SmallVectorImpl<int> &Out, int LowOffset,
263  int HighOffset) {
264  assert(VT.getSizeInBits() >= 256 &&
265  "This function doesn't accept width smaller then 256");
266  unsigned NumOfElm = VT.getVectorNumElements();
267  for (unsigned i = 0; i < Mask.size(); i++)
268  Out.push_back(Mask[i] + LowOffset);
269  for (unsigned i = 0; i < Mask.size(); i++)
270  Out.push_back(Mask[i] + HighOffset + NumOfElm);
271 }
272 
273 // reorderSubVector returns the data to is the original state. And de-facto is
274 // the opposite of the function concatSubVector.
275 
276 // For VecElems = 16
277 // Invec[0] - |0| TransposedMatrix[0] - |0|
278 // Invec[1] - |1| => TransposedMatrix[1] - |1|
279 // Invec[2] - |2| TransposedMatrix[2] - |2|
280 
281 // For VecElems = 32
282 // Invec[0] - |0|3| TransposedMatrix[0] - |0|1|
283 // Invec[1] - |1|4| => TransposedMatrix[1] - |2|3|
284 // Invec[2] - |2|5| TransposedMatrix[2] - |4|5|
285 
286 // For VecElems = 64
287 // Invec[0] - |0|3|6|9 | TransposedMatrix[0] - |0|1|2 |3 |
288 // Invec[1] - |1|4|7|10| => TransposedMatrix[1] - |4|5|6 |7 |
289 // Invec[2] - |2|5|8|11| TransposedMatrix[2] - |8|9|10|11|
290 
291 static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
292  ArrayRef<Value *> Vec, ArrayRef<int> VPShuf,
293  unsigned VecElems, unsigned Stride,
294  IRBuilder<> &Builder) {
295 
296  if (VecElems == 16) {
297  for (unsigned i = 0; i < Stride; i++)
298  TransposedMatrix[i] = Builder.CreateShuffleVector(
299  Vec[i], UndefValue::get(Vec[i]->getType()), VPShuf);
300  return;
301  }
302 
303  SmallVector<int, 32> OptimizeShuf;
304  Value *Temp[8];
305 
306  for (unsigned i = 0; i < (VecElems / 16) * Stride; i += 2) {
307  genShuffleBland(VT, VPShuf, OptimizeShuf, (i / Stride) * 16,
308  (i + 1) / Stride * 16);
309  Temp[i / 2] = Builder.CreateShuffleVector(
310  Vec[i % Stride], Vec[(i + 1) % Stride], OptimizeShuf);
311  OptimizeShuf.clear();
312  }
313 
314  if (VecElems == 32) {
315  std::copy(Temp, Temp + Stride, TransposedMatrix.begin());
316  return;
317  } else
318  for (unsigned i = 0; i < Stride; i++)
319  TransposedMatrix[i] =
320  Builder.CreateShuffleVector(Temp[2 * i], Temp[2 * i + 1], Concat);
321 }
322 
323 void X86InterleavedAccessGroup::interleave8bitStride4VF8(
325  SmallVectorImpl<Value *> &TransposedMatrix) {
326  // Assuming we start from the following vectors:
327  // Matrix[0]= c0 c1 c2 c3 c4 ... c7
328  // Matrix[1]= m0 m1 m2 m3 m4 ... m7
329  // Matrix[2]= y0 y1 y2 y3 y4 ... y7
330  // Matrix[3]= k0 k1 k2 k3 k4 ... k7
331 
332  MVT VT = MVT::v8i16;
333  TransposedMatrix.resize(2);
334  SmallVector<int, 16> MaskLow;
335  SmallVector<int, 32> MaskLowTemp1, MaskLowWord;
336  SmallVector<int, 32> MaskHighTemp1, MaskHighWord;
337 
338  for (unsigned i = 0; i < 8; ++i) {
339  MaskLow.push_back(i);
340  MaskLow.push_back(i + 8);
341  }
342 
343  createUnpackShuffleMask(VT, MaskLowTemp1, true, false);
344  createUnpackShuffleMask(VT, MaskHighTemp1, false, false);
345  narrowShuffleMaskElts(2, MaskHighTemp1, MaskHighWord);
346  narrowShuffleMaskElts(2, MaskLowTemp1, MaskLowWord);
347  // IntrVec1Low = c0 m0 c1 m1 c2 m2 c3 m3 c4 m4 c5 m5 c6 m6 c7 m7
348  // IntrVec2Low = y0 k0 y1 k1 y2 k2 y3 k3 y4 k4 y5 k5 y6 k6 y7 k7
349  Value *IntrVec1Low =
350  Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskLow);
351  Value *IntrVec2Low =
352  Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskLow);
353 
354  // TransposedMatrix[0] = c0 m0 y0 k0 c1 m1 y1 k1 c2 m2 y2 k2 c3 m3 y3 k3
355  // TransposedMatrix[1] = c4 m4 y4 k4 c5 m5 y5 k5 c6 m6 y6 k6 c7 m7 y7 k7
356 
357  TransposedMatrix[0] =
358  Builder.CreateShuffleVector(IntrVec1Low, IntrVec2Low, MaskLowWord);
359  TransposedMatrix[1] =
360  Builder.CreateShuffleVector(IntrVec1Low, IntrVec2Low, MaskHighWord);
361 }
362 
363 void X86InterleavedAccessGroup::interleave8bitStride4(
364  ArrayRef<Instruction *> Matrix, SmallVectorImpl<Value *> &TransposedMatrix,
365  unsigned NumOfElm) {
366  // Example: Assuming we start from the following vectors:
367  // Matrix[0]= c0 c1 c2 c3 c4 ... c31
368  // Matrix[1]= m0 m1 m2 m3 m4 ... m31
369  // Matrix[2]= y0 y1 y2 y3 y4 ... y31
370  // Matrix[3]= k0 k1 k2 k3 k4 ... k31
371 
372  MVT VT = MVT::getVectorVT(MVT::i8, NumOfElm);
373  MVT HalfVT = scaleVectorType(VT);
374 
375  TransposedMatrix.resize(4);
376  SmallVector<int, 32> MaskHigh;
377  SmallVector<int, 32> MaskLow;
378  SmallVector<int, 32> LowHighMask[2];
379  SmallVector<int, 32> MaskHighTemp;
380  SmallVector<int, 32> MaskLowTemp;
381 
382  // MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86
383  // shuffle pattern.
384 
385  createUnpackShuffleMask(VT, MaskLow, true, false);
386  createUnpackShuffleMask(VT, MaskHigh, false, false);
387 
388  // MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86
389  // shuffle pattern.
390 
391  createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false);
392  createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false);
393  narrowShuffleMaskElts(2, MaskLowTemp, LowHighMask[0]);
394  narrowShuffleMaskElts(2, MaskHighTemp, LowHighMask[1]);
395 
396  // IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23
397  // IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31
398  // IntrVec2Low = y0 k0 y1 k1 ... y7 k7 | y16 k16 y17 k17 ... y23 k23
399  // IntrVec2High = y8 k8 y9 k9 ... y15 k15 | y24 k24 y25 k25 ... y31 k31
400  Value *IntrVec[4];
401 
402  IntrVec[0] = Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskLow);
403  IntrVec[1] = Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskHigh);
404  IntrVec[2] = Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskLow);
405  IntrVec[3] = Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskHigh);
406 
407  // cmyk4 cmyk5 cmyk6 cmyk7 | cmyk20 cmyk21 cmyk22 cmyk23
408  // cmyk12 cmyk13 cmyk14 cmyk15 | cmyk28 cmyk29 cmyk30 cmyk31
409  // cmyk0 cmyk1 cmyk2 cmyk3 | cmyk16 cmyk17 cmyk18 cmyk19
410  // cmyk8 cmyk9 cmyk10 cmyk11 | cmyk24 cmyk25 cmyk26 cmyk27
411 
412  Value *VecOut[4];
413  for (int i = 0; i < 4; i++)
414  VecOut[i] = Builder.CreateShuffleVector(IntrVec[i / 2], IntrVec[i / 2 + 2],
415  LowHighMask[i % 2]);
416 
417  // cmyk0 cmyk1 cmyk2 cmyk3 | cmyk4 cmyk5 cmyk6 cmyk7
418  // cmyk8 cmyk9 cmyk10 cmyk11 | cmyk12 cmyk13 cmyk14 cmyk15
419  // cmyk16 cmyk17 cmyk18 cmyk19 | cmyk20 cmyk21 cmyk22 cmyk23
420  // cmyk24 cmyk25 cmyk26 cmyk27 | cmyk28 cmyk29 cmyk30 cmyk31
421 
422  if (VT == MVT::v16i8) {
423  std::copy(VecOut, VecOut + 4, TransposedMatrix.begin());
424  return;
425  }
426 
427  reorderSubVector(VT, TransposedMatrix, VecOut, makeArrayRef(Concat, 16),
428  NumOfElm, 4, Builder);
429 }
430 
431 // createShuffleStride returns shuffle mask of size N.
432 // The shuffle pattern is as following :
433 // {0, Stride%(VF/Lane), (2*Stride%(VF/Lane))...(VF*Stride/Lane)%(VF/Lane),
434 // (VF/ Lane) ,(VF / Lane)+Stride%(VF/Lane),...,
435 // (VF / Lane)+(VF*Stride/Lane)%(VF/Lane)}
436 // Where Lane is the # of lanes in a register:
437 // VectorSize = 128 => Lane = 1
438 // VectorSize = 256 => Lane = 2
439 // For example shuffle pattern for VF 16 register size 256 -> lanes = 2
440 // {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>}
441 static void createShuffleStride(MVT VT, int Stride,
443  int VectorSize = VT.getSizeInBits();
444  int VF = VT.getVectorNumElements();
445  int LaneCount = std::max(VectorSize / 128, 1);
446  for (int Lane = 0; Lane < LaneCount; Lane++)
447  for (int i = 0, LaneSize = VF / LaneCount; i != LaneSize; ++i)
448  Mask.push_back((i * Stride) % LaneSize + LaneSize * Lane);
449 }
450 
451 // setGroupSize sets 'SizeInfo' to the size(number of elements) of group
452 // inside mask a shuffleMask. A mask contains exactly 3 groups, where
453 // each group is a monotonically increasing sequence with stride 3.
454 // For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2}
455 static void setGroupSize(MVT VT, SmallVectorImpl<int> &SizeInfo) {
456  int VectorSize = VT.getSizeInBits();
457  int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1);
458  for (int i = 0, FirstGroupElement = 0; i < 3; i++) {
459  int GroupSize = std::ceil((VF - FirstGroupElement) / 3.0);
460  SizeInfo.push_back(GroupSize);
461  FirstGroupElement = ((GroupSize)*3 + FirstGroupElement) % VF;
462  }
463 }
464 
465 // DecodePALIGNRMask returns the shuffle mask of vpalign instruction.
466 // vpalign works according to lanes
467 // Where Lane is the # of lanes in a register:
468 // VectorWide = 128 => Lane = 1
469 // VectorWide = 256 => Lane = 2
470 // For Lane = 1 shuffle pattern is: {DiffToJump,...,DiffToJump+VF-1}.
471 // For Lane = 2 shuffle pattern is:
472 // {DiffToJump,...,VF/2-1,VF,...,DiffToJump+VF-1}.
473 // Imm variable sets the offset amount. The result of the
474 // function is stored inside ShuffleMask vector and it built as described in
475 // the begin of the description. AlignDirection is a boolean that indicates the
476 // direction of the alignment. (false - align to the "right" side while true -
477 // align to the "left" side)
478 static void DecodePALIGNRMask(MVT VT, unsigned Imm,
479  SmallVectorImpl<int> &ShuffleMask,
480  bool AlignDirection = true, bool Unary = false) {
481  unsigned NumElts = VT.getVectorNumElements();
482  unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1);
483  unsigned NumLaneElts = NumElts / NumLanes;
484 
485  Imm = AlignDirection ? Imm : (NumLaneElts - Imm);
486  unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8);
487 
488  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
489  for (unsigned i = 0; i != NumLaneElts; ++i) {
490  unsigned Base = i + Offset;
491  // if i+offset is out of this lane then we actually need the other source
492  // If Unary the other source is the first source.
493  if (Base >= NumLaneElts)
494  Base = Unary ? Base % NumLaneElts : Base + NumElts - NumLaneElts;
495  ShuffleMask.push_back(Base + l);
496  }
497  }
498 }
499 
500 // concatSubVector - The function rebuilds the data to a correct expected
501 // order. An assumption(The shape of the matrix) was taken for the
502 // deinterleaved to work with lane's instructions like 'vpalign' or 'vphuf'.
503 // This function ensures that the data is built in correct way for the lane
504 // instructions. Each lane inside the vector is a 128-bit length.
505 //
506 // The 'InVec' argument contains the data in increasing order. In InVec[0] You
507 // can find the first 128 bit data. The number of different lanes inside a
508 // vector depends on the 'VecElems'.In general, the formula is
509 // VecElems * type / 128. The size of the array 'InVec' depends and equal to
510 // 'VecElems'.
511 
512 // For VecElems = 16
513 // Invec[0] - |0| Vec[0] - |0|
514 // Invec[1] - |1| => Vec[1] - |1|
515 // Invec[2] - |2| Vec[2] - |2|
516 
517 // For VecElems = 32
518 // Invec[0] - |0|1| Vec[0] - |0|3|
519 // Invec[1] - |2|3| => Vec[1] - |1|4|
520 // Invec[2] - |4|5| Vec[2] - |2|5|
521 
522 // For VecElems = 64
523 // Invec[0] - |0|1|2 |3 | Vec[0] - |0|3|6|9 |
524 // Invec[1] - |4|5|6 |7 | => Vec[1] - |1|4|7|10|
525 // Invec[2] - |8|9|10|11| Vec[2] - |2|5|8|11|
526 
528  unsigned VecElems, IRBuilder<> &Builder) {
529  if (VecElems == 16) {
530  for (int i = 0; i < 3; i++)
531  Vec[i] = InVec[i];
532  return;
533  }
534 
535  for (unsigned j = 0; j < VecElems / 32; j++)
536  for (int i = 0; i < 3; i++)
537  Vec[i + j * 3] = Builder.CreateShuffleVector(
538  InVec[j * 6 + i], InVec[j * 6 + i + 3], makeArrayRef(Concat, 32));
539 
540  if (VecElems == 32)
541  return;
542 
543  for (int i = 0; i < 3; i++)
544  Vec[i] = Builder.CreateShuffleVector(Vec[i], Vec[i + 3], Concat);
545 }
546 
547 void X86InterleavedAccessGroup::deinterleave8bitStride3(
548  ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix,
549  unsigned VecElems) {
550  // Example: Assuming we start from the following vectors:
551  // Matrix[0]= a0 b0 c0 a1 b1 c1 a2 b2
552  // Matrix[1]= c2 a3 b3 c3 a4 b4 c4 a5
553  // Matrix[2]= b5 c5 a6 b6 c6 a7 b7 c7
554 
555  TransposedMatrix.resize(3);
556  SmallVector<int, 32> VPShuf;
557  SmallVector<int, 32> VPAlign[2];
558  SmallVector<int, 32> VPAlign2;
559  SmallVector<int, 32> VPAlign3;
560  SmallVector<int, 3> GroupSize;
561  Value *Vec[6], *TempVector[3];
562 
563  MVT VT = MVT::getVT(Shuffles[0]->getType());
564 
565  createShuffleStride(VT, 3, VPShuf);
566  setGroupSize(VT, GroupSize);
567 
568  for (int i = 0; i < 2; i++)
569  DecodePALIGNRMask(VT, GroupSize[2 - i], VPAlign[i], false);
570 
571  DecodePALIGNRMask(VT, GroupSize[2] + GroupSize[1], VPAlign2, true, true);
572  DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, true, true);
573 
574  concatSubVector(Vec, InVec, VecElems, Builder);
575  // Vec[0]= a0 a1 a2 b0 b1 b2 c0 c1
576  // Vec[1]= c2 c3 c4 a3 a4 a5 b3 b4
577  // Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7
578 
579  for (int i = 0; i < 3; i++)
580  Vec[i] = Builder.CreateShuffleVector(
581  Vec[i], UndefValue::get(Vec[0]->getType()), VPShuf);
582 
583  // TempVector[0]= a6 a7 a0 a1 a2 b0 b1 b2
584  // TempVector[1]= c0 c1 c2 c3 c4 a3 a4 a5
585  // TempVector[2]= b3 b4 b5 b6 b7 c5 c6 c7
586 
587  for (int i = 0; i < 3; i++)
588  TempVector[i] =
589  Builder.CreateShuffleVector(Vec[(i + 2) % 3], Vec[i], VPAlign[0]);
590 
591  // Vec[0]= a3 a4 a5 a6 a7 a0 a1 a2
592  // Vec[1]= c5 c6 c7 c0 c1 c2 c3 c4
593  // Vec[2]= b0 b1 b2 b3 b4 b5 b6 b7
594 
595  for (int i = 0; i < 3; i++)
596  Vec[i] = Builder.CreateShuffleVector(TempVector[(i + 1) % 3], TempVector[i],
597  VPAlign[1]);
598 
599  // TransposedMatrix[0]= a0 a1 a2 a3 a4 a5 a6 a7
600  // TransposedMatrix[1]= b0 b1 b2 b3 b4 b5 b6 b7
601  // TransposedMatrix[2]= c0 c1 c2 c3 c4 c5 c6 c7
602 
603  Value *TempVec = Builder.CreateShuffleVector(
604  Vec[1], UndefValue::get(Vec[1]->getType()), VPAlign3);
605  TransposedMatrix[0] = Builder.CreateShuffleVector(
606  Vec[0], UndefValue::get(Vec[1]->getType()), VPAlign2);
607  TransposedMatrix[1] = VecElems == 8 ? Vec[2] : TempVec;
608  TransposedMatrix[2] = VecElems == 8 ? TempVec : Vec[2];
609 }
610 
611 // group2Shuffle reorder the shuffle stride back into continuous order.
612 // For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} =>
613 // MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}.
615  SmallVectorImpl<int> &Output) {
616  int IndexGroup[3] = {0, 0, 0};
617  int Index = 0;
618  int VectorWidth = VT.getSizeInBits();
619  int VF = VT.getVectorNumElements();
620  // Find the index of the different groups.
621  int Lane = (VectorWidth / 128 > 0) ? VectorWidth / 128 : 1;
622  for (int i = 0; i < 3; i++) {
623  IndexGroup[(Index * 3) % (VF / Lane)] = Index;
624  Index += Mask[i];
625  }
626  // According to the index compute the convert mask.
627  for (int i = 0; i < VF / Lane; i++) {
628  Output.push_back(IndexGroup[i % 3]);
629  IndexGroup[i % 3]++;
630  }
631 }
632 
633 void X86InterleavedAccessGroup::interleave8bitStride3(
634  ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix,
635  unsigned VecElems) {
636  // Example: Assuming we start from the following vectors:
637  // Matrix[0]= a0 a1 a2 a3 a4 a5 a6 a7
638  // Matrix[1]= b0 b1 b2 b3 b4 b5 b6 b7
639  // Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7
640 
641  TransposedMatrix.resize(3);
642  SmallVector<int, 3> GroupSize;
643  SmallVector<int, 32> VPShuf;
644  SmallVector<int, 32> VPAlign[3];
645  SmallVector<int, 32> VPAlign2;
646  SmallVector<int, 32> VPAlign3;
647 
648  Value *Vec[3], *TempVector[3];
649  MVT VT = MVT::getVectorVT(MVT::i8, VecElems);
650 
651  setGroupSize(VT, GroupSize);
652 
653  for (int i = 0; i < 3; i++)
654  DecodePALIGNRMask(VT, GroupSize[i], VPAlign[i]);
655 
656  DecodePALIGNRMask(VT, GroupSize[1] + GroupSize[2], VPAlign2, false, true);
657  DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, false, true);
658 
659  // Vec[0]= a3 a4 a5 a6 a7 a0 a1 a2
660  // Vec[1]= c5 c6 c7 c0 c1 c2 c3 c4
661  // Vec[2]= b0 b1 b2 b3 b4 b5 b6 b7
662 
663  Vec[0] = Builder.CreateShuffleVector(
664  InVec[0], UndefValue::get(InVec[0]->getType()), VPAlign2);
665  Vec[1] = Builder.CreateShuffleVector(
666  InVec[1], UndefValue::get(InVec[1]->getType()), VPAlign3);
667  Vec[2] = InVec[2];
668 
669  // Vec[0]= a6 a7 a0 a1 a2 b0 b1 b2
670  // Vec[1]= c0 c1 c2 c3 c4 a3 a4 a5
671  // Vec[2]= b3 b4 b5 b6 b7 c5 c6 c7
672 
673  for (int i = 0; i < 3; i++)
674  TempVector[i] =
675  Builder.CreateShuffleVector(Vec[i], Vec[(i + 2) % 3], VPAlign[1]);
676 
677  // Vec[0]= a0 a1 a2 b0 b1 b2 c0 c1
678  // Vec[1]= c2 c3 c4 a3 a4 a5 b3 b4
679  // Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7
680 
681  for (int i = 0; i < 3; i++)
682  Vec[i] = Builder.CreateShuffleVector(TempVector[i], TempVector[(i + 1) % 3],
683  VPAlign[2]);
684 
685  // TransposedMatrix[0] = a0 b0 c0 a1 b1 c1 a2 b2
686  // TransposedMatrix[1] = c2 a3 b3 c3 a4 b4 c4 a5
687  // TransposedMatrix[2] = b5 c5 a6 b6 c6 a7 b7 c7
688 
689  unsigned NumOfElm = VT.getVectorNumElements();
690  group2Shuffle(VT, GroupSize, VPShuf);
691  reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm, 3, Builder);
692 }
693 
694 void X86InterleavedAccessGroup::transpose_4x4(
696  SmallVectorImpl<Value *> &TransposedMatrix) {
697  assert(Matrix.size() == 4 && "Invalid matrix size");
698  TransposedMatrix.resize(4);
699 
700  // dst = src1[0,1],src2[0,1]
701  static constexpr int IntMask1[] = {0, 1, 4, 5};
702  ArrayRef<int> Mask = makeArrayRef(IntMask1, 4);
703  Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
704  Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
705 
706  // dst = src1[2,3],src2[2,3]
707  static constexpr int IntMask2[] = {2, 3, 6, 7};
708  Mask = makeArrayRef(IntMask2, 4);
709  Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
710  Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
711 
712  // dst = src1[0],src2[0],src1[2],src2[2]
713  static constexpr int IntMask3[] = {0, 4, 2, 6};
714  Mask = makeArrayRef(IntMask3, 4);
715  TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
716  TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
717 
718  // dst = src1[1],src2[1],src1[3],src2[3]
719  static constexpr int IntMask4[] = {1, 5, 3, 7};
720  Mask = makeArrayRef(IntMask4, 4);
721  TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
722  TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
723 }
724 
725 // Lowers this interleaved access group into X86-specific
726 // instructions/intrinsics.
727 bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
728  SmallVector<Instruction *, 4> DecomposedVectors;
729  SmallVector<Value *, 4> TransposedVectors;
730  auto *ShuffleTy = cast<FixedVectorType>(Shuffles[0]->getType());
731 
732  if (isa<LoadInst>(Inst)) {
733  // Try to generate target-sized register(/instruction).
734  decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
735 
736  auto *ShuffleEltTy = cast<FixedVectorType>(Inst->getType());
737  unsigned NumSubVecElems = ShuffleEltTy->getNumElements() / Factor;
738  // Perform matrix-transposition in order to compute interleaved
739  // results by generating some sort of (optimized) target-specific
740  // instructions.
741 
742  switch (NumSubVecElems) {
743  default:
744  return false;
745  case 4:
746  transpose_4x4(DecomposedVectors, TransposedVectors);
747  break;
748  case 8:
749  case 16:
750  case 32:
751  case 64:
752  deinterleave8bitStride3(DecomposedVectors, TransposedVectors,
753  NumSubVecElems);
754  break;
755  }
756 
757  // Now replace the unoptimized-interleaved-vectors with the
758  // transposed-interleaved vectors.
759  for (unsigned i = 0, e = Shuffles.size(); i < e; ++i)
760  Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]);
761 
762  return true;
763  }
764 
765  Type *ShuffleEltTy = ShuffleTy->getElementType();
766  unsigned NumSubVecElems = ShuffleTy->getNumElements() / Factor;
767 
768  // Lower the interleaved stores:
769  // 1. Decompose the interleaved wide shuffle into individual shuffle
770  // vectors.
771  decompose(Shuffles[0], Factor,
772  FixedVectorType::get(ShuffleEltTy, NumSubVecElems),
773  DecomposedVectors);
774 
775  // 2. Transpose the interleaved-vectors into vectors of contiguous
776  // elements.
777  switch (NumSubVecElems) {
778  case 4:
779  transpose_4x4(DecomposedVectors, TransposedVectors);
780  break;
781  case 8:
782  interleave8bitStride4VF8(DecomposedVectors, TransposedVectors);
783  break;
784  case 16:
785  case 32:
786  case 64:
787  if (Factor == 4)
788  interleave8bitStride4(DecomposedVectors, TransposedVectors,
789  NumSubVecElems);
790  if (Factor == 3)
791  interleave8bitStride3(DecomposedVectors, TransposedVectors,
792  NumSubVecElems);
793  break;
794  default:
795  return false;
796  }
797 
798  // 3. Concatenate the contiguous-vectors back into a wide vector.
799  Value *WideVec = concatenateVectors(Builder, TransposedVectors);
800 
801  // 4. Generate a store instruction for wide-vec.
802  StoreInst *SI = cast<StoreInst>(Inst);
803  Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(), SI->getAlign());
804 
805  return true;
806 }
807 
808 // Lower interleaved load(s) into target specific instructions/
809 // intrinsics. Lowering sequence varies depending on the vector-types, factor,
810 // number of shuffles and ISA.
811 // Currently, lowering is supported for 4x64 bits with Factor = 4 on AVX.
814  ArrayRef<unsigned> Indices, unsigned Factor) const {
815  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
816  "Invalid interleave factor");
817  assert(!Shuffles.empty() && "Empty shufflevector input");
818  assert(Shuffles.size() == Indices.size() &&
819  "Unmatched number of shufflevectors and indices");
820 
821  // Create an interleaved access group.
822  IRBuilder<> Builder(LI);
823  X86InterleavedAccessGroup Grp(LI, Shuffles, Indices, Factor, Subtarget,
824  Builder);
825 
826  return Grp.isSupported() && Grp.lowerIntoOptimizedSequence();
827 }
828 
830  ShuffleVectorInst *SVI,
831  unsigned Factor) const {
832  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
833  "Invalid interleave factor");
834 
835  assert(cast<FixedVectorType>(SVI->getType())->getNumElements() % Factor ==
836  0 &&
837  "Invalid interleaved store");
838 
839  // Holds the indices of SVI that correspond to the starting index of each
840  // interleaved shuffle.
841  SmallVector<unsigned, 4> Indices;
842  auto Mask = SVI->getShuffleMask();
843  for (unsigned i = 0; i < Factor; i++)
844  Indices.push_back(Mask[i]);
845 
847 
848  // Create an interleaved access group.
849  IRBuilder<> Builder(SI);
850  X86InterleavedAccessGroup Grp(SI, Shuffles, Indices, Factor, Subtarget,
851  Builder);
852 
853  return Grp.isSupported() && Grp.lowerIntoOptimizedSequence();
854 }
static MVT getIntegerVT(unsigned BitWidth)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static void createShuffleStride(MVT VT, int Stride, SmallVectorImpl< int > &Mask)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static MVT getVectorVT(MVT VT, unsigned NumElements)
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:616
unsigned getVectorNumElements() const
This instruction constructs a fixed permutation of two input vectors.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:826
F(f)
static void group2Shuffle(MVT VT, SmallVectorImpl< int > &Mask, SmallVectorImpl< int > &Output)
An instruction for reading from memory.
Definition: Instructions.h:173
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:231
Live Register Matrix
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst *> Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower interleaved load(s) into target specific instructions/intrinsics.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:458
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:681
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:221
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static void genShuffleBland(MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &Out, int LowOffset, int HighOffset)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
static void concatSubVector(Value **Vec, ArrayRef< Instruction *> InVec, unsigned VecElems, IRBuilder<> &Builder)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
An instruction for storing to memory.
Definition: Instructions.h:302
static constexpr int Concat[]
MVT getVectorElementType() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:156
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Type * getElementType() const
Definition: DerivedTypes.h:442
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:557
constexpr double e
Definition: MathExtras.h:58
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2402
Value * getPointerOperand()
Definition: Instructions.h:265
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:476
static void reorderSubVector(MVT VT, SmallVectorImpl< Value *> &TransposedMatrix, ArrayRef< Value *> Vec, ArrayRef< int > VPShuf, unsigned VecElems, unsigned Stride, IRBuilder<> &Builder)
assume Assume Builder
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1665
static wasm::ValType getType(const TargetRegisterClass *RC)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower interleaved store(s) into target specific instructions/intrinsics.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
Module.h This file contains the declarations for the Module class.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static MVT scaleVectorType(MVT VT)
TypeSize getScalarSizeInBits() const
Base class of all SIMD vector types.
Definition: DerivedTypes.h:390
void createUnpackShuffleMask(MVT VT, SmallVectorImpl< int > &Mask, bool Lo, bool Unary)
Generate unpacklo/unpackhi shuffle mask.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
static void setGroupSize(MVT VT, SmallVectorImpl< int > &SizeInfo)
unsigned getNumElements() const
Get the number of elements in this vector.
Definition: DerivedTypes.h:426
#define I(x, y, z)
Definition: MD5.cpp:59
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:221
static Value * getNumElements(BasicBlock *Preheader, Value *BTC)
Align getAlign() const
Definition: Instructions.h:352
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:271
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:74
void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1542
VectorType * getType() const
Overload to return most specific vector type.
Value * getPointerOperand()
Definition: Instructions.h:400
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:184
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:151
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
void resize(size_type N)
Definition: SmallVector.h:390