LLVM  9.0.0svn
AMDGPULegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPUTargetMachine.h"
17 #include "SIMachineFunctionInfo.h"
18 
22 #include "llvm/IR/DerivedTypes.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/Support/Debug.h"
25 
26 using namespace llvm;
27 using namespace LegalizeActions;
28 using namespace LegalizeMutations;
29 using namespace LegalityPredicates;
30 
31 
32 static LegalityPredicate isMultiple32(unsigned TypeIdx,
33  unsigned MaxSize = 512) {
34  return [=](const LegalityQuery &Query) {
35  const LLT Ty = Query.Types[TypeIdx];
36  const LLT EltTy = Ty.getScalarType();
37  return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
38  };
39 }
40 
41 static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
42  return [=](const LegalityQuery &Query) {
43  const LLT Ty = Query.Types[TypeIdx];
44  return Ty.isVector() &&
45  Ty.getNumElements() % 2 != 0 &&
46  Ty.getElementType().getSizeInBits() < 32;
47  };
48 }
49 
50 static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
51  return [=](const LegalityQuery &Query) {
52  const LLT Ty = Query.Types[TypeIdx];
53  const LLT EltTy = Ty.getElementType();
54  return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
55  };
56 }
57 
58 static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
59  return [=](const LegalityQuery &Query) {
60  const LLT Ty = Query.Types[TypeIdx];
61  const LLT EltTy = Ty.getElementType();
62  unsigned Size = Ty.getSizeInBits();
63  unsigned Pieces = (Size + 63) / 64;
64  unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
65  return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
66  };
67 }
68 
69 static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
70  return [=](const LegalityQuery &Query) {
71  const LLT QueryTy = Query.Types[TypeIdx];
72  return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
73  };
74 }
75 
76 static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
77  return [=](const LegalityQuery &Query) {
78  const LLT QueryTy = Query.Types[TypeIdx];
79  return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
80  };
81 }
82 
84  const GCNTargetMachine &TM) {
85  using namespace TargetOpcode;
86 
87  auto GetAddrSpacePtr = [&TM](unsigned AS) {
88  return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
89  };
90 
91  const LLT S1 = LLT::scalar(1);
92  const LLT S8 = LLT::scalar(8);
93  const LLT S16 = LLT::scalar(16);
94  const LLT S32 = LLT::scalar(32);
95  const LLT S64 = LLT::scalar(64);
96  const LLT S128 = LLT::scalar(128);
97  const LLT S256 = LLT::scalar(256);
98  const LLT S512 = LLT::scalar(512);
99 
100  const LLT V2S16 = LLT::vector(2, 16);
101  const LLT V4S16 = LLT::vector(4, 16);
102  const LLT V8S16 = LLT::vector(8, 16);
103 
104  const LLT V2S32 = LLT::vector(2, 32);
105  const LLT V3S32 = LLT::vector(3, 32);
106  const LLT V4S32 = LLT::vector(4, 32);
107  const LLT V5S32 = LLT::vector(5, 32);
108  const LLT V6S32 = LLT::vector(6, 32);
109  const LLT V7S32 = LLT::vector(7, 32);
110  const LLT V8S32 = LLT::vector(8, 32);
111  const LLT V9S32 = LLT::vector(9, 32);
112  const LLT V10S32 = LLT::vector(10, 32);
113  const LLT V11S32 = LLT::vector(11, 32);
114  const LLT V12S32 = LLT::vector(12, 32);
115  const LLT V13S32 = LLT::vector(13, 32);
116  const LLT V14S32 = LLT::vector(14, 32);
117  const LLT V15S32 = LLT::vector(15, 32);
118  const LLT V16S32 = LLT::vector(16, 32);
119 
120  const LLT V2S64 = LLT::vector(2, 64);
121  const LLT V3S64 = LLT::vector(3, 64);
122  const LLT V4S64 = LLT::vector(4, 64);
123  const LLT V5S64 = LLT::vector(5, 64);
124  const LLT V6S64 = LLT::vector(6, 64);
125  const LLT V7S64 = LLT::vector(7, 64);
126  const LLT V8S64 = LLT::vector(8, 64);
127 
128  std::initializer_list<LLT> AllS32Vectors =
129  {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
130  V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
131  std::initializer_list<LLT> AllS64Vectors =
132  {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
133 
134  const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
135  const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
136  const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
137  const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
138  const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
139 
140  const LLT CodePtr = FlatPtr;
141 
142  const std::initializer_list<LLT> AddrSpaces64 = {
143  GlobalPtr, ConstantPtr, FlatPtr
144  };
145 
146  const std::initializer_list<LLT> AddrSpaces32 = {
147  LocalPtr, PrivatePtr
148  };
149 
150  setAction({G_BRCOND, S1}, Legal);
151 
152  // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
153  // elements for v3s16
154  getActionDefinitionsBuilder(G_PHI)
155  .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
156  .legalFor(AllS32Vectors)
157  .legalFor(AllS64Vectors)
158  .legalFor(AddrSpaces64)
159  .legalFor(AddrSpaces32)
160  .clampScalar(0, S32, S256)
161  .widenScalarToNextPow2(0, 32)
162  .clampMaxNumElements(0, S32, 16)
163  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
164  .legalIf(isPointer(0));
165 
166 
167  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_UMULH, G_SMULH})
168  .legalFor({S32})
169  .clampScalar(0, S32, S32)
170  .scalarize(0);
171 
172  // Report legal for any types we can handle anywhere. For the cases only legal
173  // on the SALU, RegBankSelect will be able to re-legalize.
174  getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
175  .legalFor({S32, S1, S64, V2S32, V2S16, V4S16})
176  .clampScalar(0, S32, S64)
177  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
178  .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0))
179  .widenScalarToNextPow2(0)
180  .scalarize(0);
181 
182  getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
183  G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
184  .legalFor({{S32, S1}})
185  .clampScalar(0, S32, S32);
186 
187  getActionDefinitionsBuilder(G_BITCAST)
188  .legalForCartesianProduct({S32, V2S16})
189  .legalForCartesianProduct({S64, V2S32, V4S16})
190  .legalForCartesianProduct({V2S64, V4S32})
191  // Don't worry about the size constraint.
192  .legalIf(all(isPointer(0), isPointer(1)));
193 
194  if (ST.has16BitInsts()) {
195  getActionDefinitionsBuilder(G_FCONSTANT)
196  .legalFor({S32, S64, S16})
197  .clampScalar(0, S16, S64);
198  } else {
199  getActionDefinitionsBuilder(G_FCONSTANT)
200  .legalFor({S32, S64})
201  .clampScalar(0, S32, S64);
202  }
203 
204  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
205  .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
206  ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
207  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
208  .clampScalarOrElt(0, S32, S512)
209  .legalIf(isMultiple32(0))
210  .widenScalarToNextPow2(0, 32)
211  .clampMaxNumElements(0, S32, 16);
212 
213 
214  // FIXME: i1 operands to intrinsics should always be legal, but other i1
215  // values may not be legal. We need to figure out how to distinguish
216  // between these two scenarios.
217  getActionDefinitionsBuilder(G_CONSTANT)
218  .legalFor({S1, S32, S64, GlobalPtr,
219  LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
220  .clampScalar(0, S32, S64)
221  .widenScalarToNextPow2(0)
222  .legalIf(isPointer(0));
223 
224  setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
225 
226  auto &FPOpActions = getActionDefinitionsBuilder(
227  { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
228  .legalFor({S32, S64});
229 
230  if (ST.has16BitInsts()) {
231  if (ST.hasVOP3PInsts())
232  FPOpActions.legalFor({S16, V2S16});
233  else
234  FPOpActions.legalFor({S16});
235  }
236 
237  if (ST.hasVOP3PInsts())
238  FPOpActions.clampMaxNumElements(0, S16, 2);
239  FPOpActions
240  .scalarize(0)
241  .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
242 
243  if (ST.has16BitInsts()) {
244  getActionDefinitionsBuilder(G_FSQRT)
245  .legalFor({S32, S64, S16})
246  .scalarize(0)
247  .clampScalar(0, S16, S64);
248  } else {
249  getActionDefinitionsBuilder(G_FSQRT)
250  .legalFor({S32, S64})
251  .scalarize(0)
252  .clampScalar(0, S32, S64);
253  }
254 
255  getActionDefinitionsBuilder(G_FPTRUNC)
256  .legalFor({{S32, S64}, {S16, S32}})
257  .scalarize(0);
258 
259  getActionDefinitionsBuilder(G_FPEXT)
260  .legalFor({{S64, S32}, {S32, S16}})
261  .lowerFor({{S64, S16}}) // FIXME: Implement
262  .scalarize(0);
263 
264  getActionDefinitionsBuilder(G_FCOPYSIGN)
265  .legalForCartesianProduct({S16, S32, S64}, {S16, S32, S64})
266  .scalarize(0);
267 
268  getActionDefinitionsBuilder(G_FSUB)
269  // Use actual fsub instruction
270  .legalFor({S32})
271  // Must use fadd + fneg
272  .lowerFor({S64, S16, V2S16})
273  .scalarize(0)
274  .clampScalar(0, S32, S64);
275 
276  getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
277  .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
278  {S32, S1}, {S64, S1}, {S16, S1},
279  // FIXME: Hack
280  {S64, LLT::scalar(33)},
281  {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
282  .scalarize(0);
283 
284  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
285  .legalFor({{S32, S32}, {S64, S32}})
286  .lowerFor({{S32, S64}})
287  .customFor({{S64, S64}})
288  .scalarize(0);
289 
290  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
291  .legalFor({{S32, S32}, {S32, S64}})
292  .scalarize(0);
293 
294  getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
295  .legalFor({S32, S64})
296  .scalarize(0);
297 
299  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
300  .legalFor({S32, S64})
301  .clampScalar(0, S32, S64)
302  .scalarize(0);
303  } else {
304  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
305  .legalFor({S32})
306  .customFor({S64})
307  .clampScalar(0, S32, S64)
308  .scalarize(0);
309  }
310 
311  getActionDefinitionsBuilder(G_GEP)
312  .legalForCartesianProduct(AddrSpaces64, {S64})
313  .legalForCartesianProduct(AddrSpaces32, {S32})
314  .scalarize(0);
315 
316  setAction({G_BLOCK_ADDR, CodePtr}, Legal);
317 
318  getActionDefinitionsBuilder(G_ICMP)
319  .legalForCartesianProduct(
320  {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
321  .legalFor({{S1, S32}, {S1, S64}})
322  .widenScalarToNextPow2(1)
323  .clampScalar(1, S32, S64)
324  .scalarize(0)
325  .legalIf(all(typeIs(0, S1), isPointer(1)));
326 
327  getActionDefinitionsBuilder(G_FCMP)
328  .legalFor({{S1, S32}, {S1, S64}})
329  .widenScalarToNextPow2(1)
330  .clampScalar(1, S32, S64)
331  .scalarize(0);
332 
333  // FIXME: fexp, flog2, flog10 needs to be custom lowered.
334  getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
335  G_FLOG, G_FLOG2, G_FLOG10})
336  .legalFor({S32})
337  .scalarize(0);
338 
339  // The 64-bit versions produce 32-bit results, but only on the SALU.
340  getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
341  G_CTTZ, G_CTTZ_ZERO_UNDEF,
342  G_CTPOP})
343  .legalFor({{S32, S32}, {S32, S64}})
344  .clampScalar(0, S32, S32)
345  .clampScalar(1, S32, S64)
346  .scalarize(0)
347  .widenScalarToNextPow2(0, 32)
348  .widenScalarToNextPow2(1, 32);
349 
350  // TODO: Expand for > s32
351  getActionDefinitionsBuilder(G_BSWAP)
352  .legalFor({S32})
353  .clampScalar(0, S32, S32)
354  .scalarize(0);
355 
356  if (ST.has16BitInsts()) {
357  if (ST.hasVOP3PInsts()) {
358  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
359  .legalFor({S32, S16, V2S16})
360  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
361  .clampMaxNumElements(0, S16, 2)
362  .clampScalar(0, S16, S32)
363  .widenScalarToNextPow2(0)
364  .scalarize(0);
365  } else {
366  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
367  .legalFor({S32, S16})
368  .widenScalarToNextPow2(0)
369  .clampScalar(0, S16, S32)
370  .scalarize(0);
371  }
372  } else {
373  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
374  .legalFor({S32})
375  .clampScalar(0, S32, S32)
376  .widenScalarToNextPow2(0)
377  .scalarize(0);
378  }
379 
380  auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
381  return [=](const LegalityQuery &Query) {
382  return Query.Types[TypeIdx0].getSizeInBits() <
383  Query.Types[TypeIdx1].getSizeInBits();
384  };
385  };
386 
387  auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
388  return [=](const LegalityQuery &Query) {
389  return Query.Types[TypeIdx0].getSizeInBits() >
390  Query.Types[TypeIdx1].getSizeInBits();
391  };
392  };
393 
394  getActionDefinitionsBuilder(G_INTTOPTR)
395  // List the common cases
396  .legalForCartesianProduct(AddrSpaces64, {S64})
397  .legalForCartesianProduct(AddrSpaces32, {S32})
398  .scalarize(0)
399  // Accept any address space as long as the size matches
400  .legalIf(sameSize(0, 1))
401  .widenScalarIf(smallerThan(1, 0),
402  [](const LegalityQuery &Query) {
403  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
404  })
405  .narrowScalarIf(greaterThan(1, 0),
406  [](const LegalityQuery &Query) {
407  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
408  });
409 
410  getActionDefinitionsBuilder(G_PTRTOINT)
411  // List the common cases
412  .legalForCartesianProduct(AddrSpaces64, {S64})
413  .legalForCartesianProduct(AddrSpaces32, {S32})
414  .scalarize(0)
415  // Accept any address space as long as the size matches
416  .legalIf(sameSize(0, 1))
417  .widenScalarIf(smallerThan(0, 1),
418  [](const LegalityQuery &Query) {
419  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
420  })
421  .narrowScalarIf(
422  greaterThan(0, 1),
423  [](const LegalityQuery &Query) {
424  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
425  });
426 
427  if (ST.hasFlatAddressSpace()) {
428  getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
429  .scalarize(0)
430  .custom();
431  }
432 
433  getActionDefinitionsBuilder({G_LOAD, G_STORE})
434  .narrowScalarIf([](const LegalityQuery &Query) {
435  unsigned Size = Query.Types[0].getSizeInBits();
436  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
437  return (Size > 32 && MemSize < Size);
438  },
439  [](const LegalityQuery &Query) {
440  return std::make_pair(0, LLT::scalar(32));
441  })
442  .fewerElementsIf([=, &ST](const LegalityQuery &Query) {
443  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
444  return (MemSize == 96) &&
445  Query.Types[0].isVector() &&
446  !ST.hasDwordx3LoadStores();
447  },
448  [=](const LegalityQuery &Query) {
449  return std::make_pair(0, V2S32);
450  })
451  .legalIf([=, &ST](const LegalityQuery &Query) {
452  const LLT &Ty0 = Query.Types[0];
453 
454  unsigned Size = Ty0.getSizeInBits();
455  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
456  if (Size < 32 || (Size > 32 && MemSize < Size))
457  return false;
458 
459  if (Ty0.isVector() && Size != MemSize)
460  return false;
461 
462  // TODO: Decompose private loads into 4-byte components.
463  // TODO: Illegal flat loads on SI
464  switch (MemSize) {
465  case 8:
466  case 16:
467  return Size == 32;
468  case 32:
469  case 64:
470  case 128:
471  return true;
472 
473  case 96:
474  return ST.hasDwordx3LoadStores();
475 
476  case 256:
477  case 512:
478  // TODO: constant loads
479  default:
480  return false;
481  }
482  })
483  .clampScalar(0, S32, S64);
484 
485 
486  // FIXME: Handle alignment requirements.
487  auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
488  .legalForTypesWithMemDesc({
489  {S32, GlobalPtr, 8, 8},
490  {S32, GlobalPtr, 16, 8},
491  {S32, LocalPtr, 8, 8},
492  {S32, LocalPtr, 16, 8},
493  {S32, PrivatePtr, 8, 8},
494  {S32, PrivatePtr, 16, 8}});
495  if (ST.hasFlatAddressSpace()) {
496  ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
497  {S32, FlatPtr, 16, 8}});
498  }
499 
500  ExtLoads.clampScalar(0, S32, S32)
501  .widenScalarToNextPow2(0)
502  .unsupportedIfMemSizeNotPow2()
503  .lower();
504 
505  auto &Atomics = getActionDefinitionsBuilder(
506  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
507  G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
508  G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
509  G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
510  .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
511  {S64, GlobalPtr}, {S64, LocalPtr}});
512  if (ST.hasFlatAddressSpace()) {
513  Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
514  }
515 
516  // TODO: Pointer types, any 32-bit or 64-bit vector
517  getActionDefinitionsBuilder(G_SELECT)
518  .legalForCartesianProduct({S32, S64, V2S32, V2S16, V4S16,
519  GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
520  LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
521  .clampScalar(0, S32, S64)
522  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
523  .fewerElementsIf(numElementsNotEven(0), scalarize(0))
524  .scalarize(1)
525  .clampMaxNumElements(0, S32, 2)
526  .clampMaxNumElements(0, LocalPtr, 2)
527  .clampMaxNumElements(0, PrivatePtr, 2)
528  .scalarize(0)
529  .widenScalarToNextPow2(0)
530  .legalIf(all(isPointer(0), typeIs(1, S1)));
531 
532  // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
533  // be more flexible with the shift amount type.
534  auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
535  .legalFor({{S32, S32}, {S64, S32}});
536  if (ST.has16BitInsts()) {
537  if (ST.hasVOP3PInsts()) {
538  Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
539  .clampMaxNumElements(0, S16, 2);
540  } else
541  Shifts.legalFor({{S16, S32}, {S16, S16}});
542 
543  Shifts.clampScalar(1, S16, S32);
544  Shifts.clampScalar(0, S16, S64);
545  Shifts.widenScalarToNextPow2(0, 16);
546  } else {
547  // Make sure we legalize the shift amount type first, as the general
548  // expansion for the shifted type will produce much worse code if it hasn't
549  // been truncated already.
550  Shifts.clampScalar(1, S32, S32);
551  Shifts.clampScalar(0, S32, S64);
552  Shifts.widenScalarToNextPow2(0, 32);
553  }
554  Shifts.scalarize(0);
555 
556  for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
557  unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
558  unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
559  unsigned IdxTypeIdx = 2;
560 
561  getActionDefinitionsBuilder(Op)
562  .legalIf([=](const LegalityQuery &Query) {
563  const LLT &VecTy = Query.Types[VecTypeIdx];
564  const LLT &IdxTy = Query.Types[IdxTypeIdx];
565  return VecTy.getSizeInBits() % 32 == 0 &&
566  VecTy.getSizeInBits() <= 512 &&
567  IdxTy.getSizeInBits() == 32;
568  })
569  .clampScalar(EltTypeIdx, S32, S64)
570  .clampScalar(VecTypeIdx, S32, S64)
571  .clampScalar(IdxTypeIdx, S32, S32);
572  }
573 
574  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
575  .unsupportedIf([=](const LegalityQuery &Query) {
576  const LLT &EltTy = Query.Types[1].getElementType();
577  return Query.Types[0] != EltTy;
578  });
579 
580  for (unsigned Op : {G_EXTRACT, G_INSERT}) {
581  unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
582  unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
583 
584  // FIXME: Doesn't handle extract of illegal sizes.
585  getActionDefinitionsBuilder(Op)
586  .legalIf([=](const LegalityQuery &Query) {
587  const LLT BigTy = Query.Types[BigTyIdx];
588  const LLT LitTy = Query.Types[LitTyIdx];
589  return (BigTy.getSizeInBits() % 32 == 0) &&
590  (LitTy.getSizeInBits() % 16 == 0);
591  })
592  .widenScalarIf(
593  [=](const LegalityQuery &Query) {
594  const LLT BigTy = Query.Types[BigTyIdx];
595  return (BigTy.getScalarSizeInBits() < 16);
596  },
598  .widenScalarIf(
599  [=](const LegalityQuery &Query) {
600  const LLT LitTy = Query.Types[LitTyIdx];
601  return (LitTy.getScalarSizeInBits() < 16);
602  },
604  .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
605  .widenScalarToNextPow2(BigTyIdx, 32);
606 
607  }
608 
609  // TODO: vectors of pointers
610  getActionDefinitionsBuilder(G_BUILD_VECTOR)
611  .legalForCartesianProduct(AllS32Vectors, {S32})
612  .legalForCartesianProduct(AllS64Vectors, {S64})
613  .clampNumElements(0, V16S32, V16S32)
614  .clampNumElements(0, V2S64, V8S64)
615  .minScalarSameAs(1, 0)
616  // FIXME: Sort of a hack to make progress on other legalizations.
617  .legalIf([=](const LegalityQuery &Query) {
618  return Query.Types[0].getScalarSizeInBits() <= 32 ||
619  Query.Types[0].getScalarSizeInBits() == 64;
620  });
621 
622  // TODO: Support any combination of v2s32
623  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
624  .legalFor({{V4S32, V2S32},
625  {V8S32, V2S32},
626  {V8S32, V4S32},
627  {V4S64, V2S64},
628  {V4S16, V2S16},
629  {V8S16, V2S16},
630  {V8S16, V4S16},
631  {LLT::vector(4, LocalPtr), LLT::vector(2, LocalPtr)},
632  {LLT::vector(4, PrivatePtr), LLT::vector(2, PrivatePtr)}});
633 
634  // Merge/Unmerge
635  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
636  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
637  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
638 
639  auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
640  const LLT &Ty = Query.Types[TypeIdx];
641  if (Ty.isVector()) {
642  const LLT &EltTy = Ty.getElementType();
643  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
644  return true;
645  if (!isPowerOf2_32(EltTy.getSizeInBits()))
646  return true;
647  }
648  return false;
649  };
650 
651  getActionDefinitionsBuilder(Op)
652  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
653  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
654  // worth considering the multiples of 64 since 2*192 and 2*384 are not
655  // valid.
656  .clampScalar(LitTyIdx, S16, S256)
657  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
658 
659  // Break up vectors with weird elements into scalars
660  .fewerElementsIf(
661  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
662  scalarize(0))
663  .fewerElementsIf(
664  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
665  scalarize(1))
666  .clampScalar(BigTyIdx, S32, S512)
667  .widenScalarIf(
668  [=](const LegalityQuery &Query) {
669  const LLT &Ty = Query.Types[BigTyIdx];
670  return !isPowerOf2_32(Ty.getSizeInBits()) &&
671  Ty.getSizeInBits() % 16 != 0;
672  },
673  [=](const LegalityQuery &Query) {
674  // Pick the next power of 2, or a multiple of 64 over 128.
675  // Whichever is smaller.
676  const LLT &Ty = Query.Types[BigTyIdx];
677  unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
678  if (NewSizeInBits >= 256) {
679  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
680  if (RoundedTo < NewSizeInBits)
681  NewSizeInBits = RoundedTo;
682  }
683  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
684  })
685  .legalIf([=](const LegalityQuery &Query) {
686  const LLT &BigTy = Query.Types[BigTyIdx];
687  const LLT &LitTy = Query.Types[LitTyIdx];
688 
689  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
690  return false;
691  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
692  return false;
693 
694  return BigTy.getSizeInBits() % 16 == 0 &&
695  LitTy.getSizeInBits() % 16 == 0 &&
696  BigTy.getSizeInBits() <= 512;
697  })
698  // Any vectors left are the wrong size. Scalarize them.
699  .scalarize(0)
700  .scalarize(1);
701  }
702 
703  computeTables();
704  verify(*ST.getInstrInfo());
705 }
706 
709  MachineIRBuilder &MIRBuilder,
710  GISelChangeObserver &Observer) const {
711  switch (MI.getOpcode()) {
712  case TargetOpcode::G_ADDRSPACE_CAST:
713  return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
714  case TargetOpcode::G_FRINT:
715  return legalizeFrint(MI, MRI, MIRBuilder);
716  case TargetOpcode::G_FCEIL:
717  return legalizeFceil(MI, MRI, MIRBuilder);
718  case TargetOpcode::G_INTRINSIC_TRUNC:
719  return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder);
720  case TargetOpcode::G_SITOFP:
721  return legalizeITOFP(MI, MRI, MIRBuilder, true);
722  case TargetOpcode::G_UITOFP:
723  return legalizeITOFP(MI, MRI, MIRBuilder, false);
724  default:
725  return false;
726  }
727 
728  llvm_unreachable("expected switch to return");
729 }
730 
732  unsigned AS,
734  MachineIRBuilder &MIRBuilder) const {
735  MachineFunction &MF = MIRBuilder.getMF();
736  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
737  const LLT S32 = LLT::scalar(32);
738 
739  if (ST.hasApertureRegs()) {
740  // FIXME: Use inline constants (src_{shared, private}_base) instead of
741  // getreg.
742  unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
745  unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
748  unsigned Encoding =
750  Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
752 
753  unsigned ApertureReg = MRI.createGenericVirtualRegister(S32);
754  unsigned GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
755 
756  MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
757  .addDef(GetReg)
758  .addImm(Encoding);
759  MRI.setType(GetReg, S32);
760 
761  auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1);
762  MIRBuilder.buildInstr(TargetOpcode::G_SHL)
763  .addDef(ApertureReg)
764  .addUse(GetReg)
765  .addUse(ShiftAmt.getReg(0));
766 
767  return ApertureReg;
768  }
769 
770  unsigned QueuePtr = MRI.createGenericVirtualRegister(
772 
773  // FIXME: Placeholder until we can track the input registers.
774  MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
775 
776  // Offset into amd_queue_t for group_segment_aperture_base_hi /
777  // private_segment_aperture_base_hi.
778  uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
779 
780  // FIXME: Don't use undef
784 
785  MachinePointerInfo PtrInfo(V, StructOffset);
787  PtrInfo,
791  4,
792  MinAlign(64, StructOffset));
793 
794  unsigned LoadResult = MRI.createGenericVirtualRegister(S32);
795  unsigned LoadAddr = AMDGPU::NoRegister;
796 
797  MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
798  MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
799  return LoadResult;
800 }
801 
804  MachineIRBuilder &MIRBuilder) const {
805  MachineFunction &MF = MIRBuilder.getMF();
806 
807  MIRBuilder.setInstr(MI);
808 
809  unsigned Dst = MI.getOperand(0).getReg();
810  unsigned Src = MI.getOperand(1).getReg();
811 
812  LLT DstTy = MRI.getType(Dst);
813  LLT SrcTy = MRI.getType(Src);
814  unsigned DestAS = DstTy.getAddressSpace();
815  unsigned SrcAS = SrcTy.getAddressSpace();
816 
817  // TODO: Avoid reloading from the queue ptr for each cast, or at least each
818  // vector element.
819  assert(!DstTy.isVector());
820 
821  const AMDGPUTargetMachine &TM
822  = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
823 
824  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
825  if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
826  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST));
827  return true;
828  }
829 
830  if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
831  assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
832  DestAS == AMDGPUAS::PRIVATE_ADDRESS);
833  unsigned NullVal = TM.getNullPointerValue(DestAS);
834 
835  auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal);
836  auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0);
837 
838  unsigned PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
839 
840  // Extract low 32-bits of the pointer.
841  MIRBuilder.buildExtract(PtrLo32, Src, 0);
842 
843  unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
844  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
845  MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
846 
847  MI.eraseFromParent();
848  return true;
849  }
850 
851  assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
852  SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
853 
854  auto SegmentNull =
855  MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
856  auto FlatNull =
857  MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
858 
859  unsigned ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
860 
861  unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
862  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
863 
864  unsigned BuildPtr = MRI.createGenericVirtualRegister(DstTy);
865 
866  // Coerce the type of the low half of the result so we can use merge_values.
867  unsigned SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32));
868  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
869  .addDef(SrcAsInt)
870  .addUse(Src);
871 
872  // TODO: Should we allow mismatched types but matching sizes in merges to
873  // avoid the ptrtoint?
874  MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
875  MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
876 
877  MI.eraseFromParent();
878  return true;
879 }
880 
883  MachineIRBuilder &MIRBuilder) const {
884  MIRBuilder.setInstr(MI);
885 
886  unsigned Src = MI.getOperand(1).getReg();
887  LLT Ty = MRI.getType(Src);
888  assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
889 
890  APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
891  APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
892 
893  auto C1 = MIRBuilder.buildFConstant(Ty, C1Val);
894  auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src);
895 
896  // TODO: Should this propagate fast-math-flags?
897  auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign);
898  auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign);
899 
900  auto C2 = MIRBuilder.buildFConstant(Ty, C2Val);
901  auto Fabs = MIRBuilder.buildFAbs(Ty, Src);
902 
903  auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
904  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
905  return true;
906 }
907 
910  MachineIRBuilder &B) const {
911  B.setInstr(MI);
912 
913  const LLT S1 = LLT::scalar(1);
914  const LLT S64 = LLT::scalar(64);
915 
916  unsigned Src = MI.getOperand(1).getReg();
917  assert(MRI.getType(Src) == S64);
918 
919  // result = trunc(src)
920  // if (src > 0.0 && src != result)
921  // result += 1.0
922 
923  auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
924 
925  const auto Zero = B.buildFConstant(S64, 0.0);
926  const auto One = B.buildFConstant(S64, 1.0);
927  auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero);
928  auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc);
929  auto And = B.buildAnd(S1, Lt0, NeTrunc);
930  auto Add = B.buildSelect(S64, And, One, Zero);
931 
932  // TODO: Should this propagate fast-math-flags?
933  B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
934  return true;
935 }
936 
938  MachineIRBuilder &B) {
939  const unsigned FractBits = 52;
940  const unsigned ExpBits = 11;
941  LLT S32 = LLT::scalar(32);
942 
943  auto Const0 = B.buildConstant(S32, FractBits - 32);
944  auto Const1 = B.buildConstant(S32, ExpBits);
945 
946  auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
947  .addUse(Const0.getReg(0))
948  .addUse(Const1.getReg(0));
949 
950  return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
951 }
952 
955  MachineIRBuilder &B) const {
956  B.setInstr(MI);
957 
958  const LLT S1 = LLT::scalar(1);
959  const LLT S32 = LLT::scalar(32);
960  const LLT S64 = LLT::scalar(64);
961 
962  unsigned Src = MI.getOperand(1).getReg();
963  assert(MRI.getType(Src) == S64);
964 
965  // TODO: Should this use extract since the low half is unused?
966  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
967  unsigned Hi = Unmerge.getReg(1);
968 
969  // Extract the upper half, since this is where we will find the sign and
970  // exponent.
971  auto Exp = extractF64Exponent(Hi, B);
972 
973  const unsigned FractBits = 52;
974 
975  // Extract the sign bit.
976  const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
977  auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
978 
979  const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
980 
981  const auto Zero32 = B.buildConstant(S32, 0);
982 
983  // Extend back to 64-bits.
984  auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
985 
986  auto Shr = B.buildAShr(S64, FractMask, Exp);
987  auto Not = B.buildNot(S64, Shr);
988  auto Tmp0 = B.buildAnd(S64, Src, Not);
989  auto FiftyOne = B.buildConstant(S32, FractBits - 1);
990 
991  auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32);
992  auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne);
993 
994  auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
995  B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
996  return true;
997 }
998 
1001  MachineIRBuilder &B, bool Signed) const {
1002  B.setInstr(MI);
1003 
1004  unsigned Dst = MI.getOperand(0).getReg();
1005  unsigned Src = MI.getOperand(1).getReg();
1006 
1007  const LLT S64 = LLT::scalar(64);
1008  const LLT S32 = LLT::scalar(32);
1009 
1010  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
1011 
1012  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
1013 
1014  auto CvtHi = Signed ?
1015  B.buildSITOFP(S64, Unmerge.getReg(1)) :
1016  B.buildUITOFP(S64, Unmerge.getReg(1));
1017 
1018  auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
1019 
1020  auto ThirtyTwo = B.buildConstant(S32, 32);
1021  auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
1022  .addUse(CvtHi.getReg(0))
1023  .addUse(ThirtyTwo.getReg(0));
1024 
1025  // TODO: Should this propagate fast-math-flags?
1026  B.buildFAdd(Dst, LdExp, CvtLo);
1027  MI.eraseFromParent();
1028  return true;
1029 }
Address space for flat memory.
Definition: AMDGPU.h:262
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:551
Address space for local memory.
Definition: AMDGPU.h:267
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
bool hasApertureRegs() const
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getScalarSizeInBits() const
static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx)
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< unsigned > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
unsigned getReg() const
getReg - Returns the register number.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:628
LLT getScalarType() const
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
const SIInstrInfo * getInstrInfo() const override
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
static LegalizeMutation oneMoreElement(unsigned TypeIdx)
LegalityPredicate isPointer(unsigned TypeIdx)
True iff the specified type index is a pointer (with any address space).
bool hasVOP3PInsts() const
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
bool isVector() const
A description of a memory reference used in the backend.
bool hasDwordx3LoadStores() const
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FSUB Op0, Op1.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
MachineFunction & getMF()
Getter for the function we currently build.
unsigned getPointerSizeInBits(unsigned AS) const
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:158
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, bool Signed) const
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
Address space for private memory.
Definition: AMDGPU.h:268
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
static MachineInstrBuilder extractF64Exponent(unsigned Hi, MachineIRBuilder &B)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
bool has16BitInsts() const
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:263
Helper class to build MachineInstr.
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FADD Op0, Op1.
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size)
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Generation getGeneration() const
AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM)
unsigned getAddressSpace() const
The AMDGPU TargetMachine interface definition for hw codgen targets.
std::function< std::pair< unsigned, LLT >(const LegalityQuery &)> LegalizeMutation
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
static LegalityPredicate isMultiple32(unsigned TypeIdx, unsigned MaxSize=512)
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
signed greater than
Definition: InstrTypes.h:759
bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
bool hasFlatAddressSpace() const
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
Address space for constant memory (VTX2).
Definition: AMDGPU.h:266
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
signed less than
Definition: InstrTypes.h:761
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
This file declares the MachineIRBuilder class.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
Representation of each machine instruction.
Definition: MachineInstr.h:63
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_FCMP PredOp0, Op1.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx)
ArrayRef< LLT > Types
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
The memory access always returns the same value (or traps).
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:740
uint32_t Size
Definition: Profile.cpp:46
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override
Optional< MachineInstrBuilder > materializeGEP(unsigned &Res, unsigned Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
IRTranslator LLVM IR MI
std::function< bool(const LegalityQuery &)> LegalityPredicate
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
static LegalityPredicate numElementsNotEven(unsigned TypeIdx)
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
const SITargetLowering * getTargetLowering() const override
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the specified type indices are both the same bit size.