LLVM  10.0.0svn
AMDGPULegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPUTargetMachine.h"
17 #include "SIMachineFunctionInfo.h"
22 #include "llvm/IR/DerivedTypes.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/Support/Debug.h"
25 
26 #define DEBUG_TYPE "amdgpu-legalinfo"
27 
28 using namespace llvm;
29 using namespace LegalizeActions;
30 using namespace LegalizeMutations;
31 using namespace LegalityPredicates;
32 
33 
34 static LegalityPredicate isMultiple32(unsigned TypeIdx,
35  unsigned MaxSize = 512) {
36  return [=](const LegalityQuery &Query) {
37  const LLT Ty = Query.Types[TypeIdx];
38  const LLT EltTy = Ty.getScalarType();
39  return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
40  };
41 }
42 
43 static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
44  return [=](const LegalityQuery &Query) {
45  const LLT Ty = Query.Types[TypeIdx];
46  return Ty.isVector() &&
47  Ty.getNumElements() % 2 != 0 &&
48  Ty.getElementType().getSizeInBits() < 32;
49  };
50 }
51 
52 static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
53  return [=](const LegalityQuery &Query) {
54  const LLT Ty = Query.Types[TypeIdx];
55  const LLT EltTy = Ty.getElementType();
56  return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
57  };
58 }
59 
60 static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
61  return [=](const LegalityQuery &Query) {
62  const LLT Ty = Query.Types[TypeIdx];
63  const LLT EltTy = Ty.getElementType();
64  unsigned Size = Ty.getSizeInBits();
65  unsigned Pieces = (Size + 63) / 64;
66  unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
67  return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
68  };
69 }
70 
71 static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
72  return [=](const LegalityQuery &Query) {
73  const LLT QueryTy = Query.Types[TypeIdx];
74  return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
75  };
76 }
77 
78 static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
79  return [=](const LegalityQuery &Query) {
80  const LLT QueryTy = Query.Types[TypeIdx];
81  return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
82  };
83 }
84 
85 // Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
86 // v2s16.
87 static LegalityPredicate isRegisterType(unsigned TypeIdx) {
88  return [=](const LegalityQuery &Query) {
89  const LLT Ty = Query.Types[TypeIdx];
90  if (Ty.isVector()) {
91  const int EltSize = Ty.getElementType().getSizeInBits();
92  return EltSize == 32 || EltSize == 64 ||
93  (EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
94  EltSize == 128 || EltSize == 256;
95  }
96 
97  return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
98  };
99 }
100 
101 static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
102  return [=](const LegalityQuery &Query) {
103  return Query.Types[TypeIdx].getElementType() == Type;
104  };
105 }
106 
108  const GCNTargetMachine &TM)
109  : ST(ST_) {
110  using namespace TargetOpcode;
111 
112  auto GetAddrSpacePtr = [&TM](unsigned AS) {
113  return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
114  };
115 
116  const LLT S1 = LLT::scalar(1);
117  const LLT S8 = LLT::scalar(8);
118  const LLT S16 = LLT::scalar(16);
119  const LLT S32 = LLT::scalar(32);
120  const LLT S64 = LLT::scalar(64);
121  const LLT S128 = LLT::scalar(128);
122  const LLT S256 = LLT::scalar(256);
123  const LLT S512 = LLT::scalar(512);
124 
125  const LLT V2S16 = LLT::vector(2, 16);
126  const LLT V4S16 = LLT::vector(4, 16);
127 
128  const LLT V2S32 = LLT::vector(2, 32);
129  const LLT V3S32 = LLT::vector(3, 32);
130  const LLT V4S32 = LLT::vector(4, 32);
131  const LLT V5S32 = LLT::vector(5, 32);
132  const LLT V6S32 = LLT::vector(6, 32);
133  const LLT V7S32 = LLT::vector(7, 32);
134  const LLT V8S32 = LLT::vector(8, 32);
135  const LLT V9S32 = LLT::vector(9, 32);
136  const LLT V10S32 = LLT::vector(10, 32);
137  const LLT V11S32 = LLT::vector(11, 32);
138  const LLT V12S32 = LLT::vector(12, 32);
139  const LLT V13S32 = LLT::vector(13, 32);
140  const LLT V14S32 = LLT::vector(14, 32);
141  const LLT V15S32 = LLT::vector(15, 32);
142  const LLT V16S32 = LLT::vector(16, 32);
143 
144  const LLT V2S64 = LLT::vector(2, 64);
145  const LLT V3S64 = LLT::vector(3, 64);
146  const LLT V4S64 = LLT::vector(4, 64);
147  const LLT V5S64 = LLT::vector(5, 64);
148  const LLT V6S64 = LLT::vector(6, 64);
149  const LLT V7S64 = LLT::vector(7, 64);
150  const LLT V8S64 = LLT::vector(8, 64);
151 
152  std::initializer_list<LLT> AllS32Vectors =
153  {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
154  V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
155  std::initializer_list<LLT> AllS64Vectors =
156  {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
157 
158  const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
159  const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
160  const LLT Constant32Ptr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS_32BIT);
161  const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
162  const LLT RegionPtr = GetAddrSpacePtr(AMDGPUAS::REGION_ADDRESS);
163  const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
164  const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
165 
166  const LLT CodePtr = FlatPtr;
167 
168  const std::initializer_list<LLT> AddrSpaces64 = {
169  GlobalPtr, ConstantPtr, FlatPtr
170  };
171 
172  const std::initializer_list<LLT> AddrSpaces32 = {
173  LocalPtr, PrivatePtr, Constant32Ptr, RegionPtr
174  };
175 
176  const std::initializer_list<LLT> FPTypesBase = {
177  S32, S64
178  };
179 
180  const std::initializer_list<LLT> FPTypes16 = {
181  S32, S64, S16
182  };
183 
184  const std::initializer_list<LLT> FPTypesPK16 = {
185  S32, S64, S16, V2S16
186  };
187 
188  setAction({G_BRCOND, S1}, Legal);
189 
190  // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
191  // elements for v3s16
193  .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
194  .legalFor(AllS32Vectors)
195  .legalFor(AllS64Vectors)
196  .legalFor(AddrSpaces64)
197  .legalFor(AddrSpaces32)
198  .clampScalar(0, S32, S256)
199  .widenScalarToNextPow2(0, 32)
200  .clampMaxNumElements(0, S32, 16)
202  .legalIf(isPointer(0));
203 
204  if (ST.has16BitInsts()) {
205  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
206  .legalFor({S32, S16})
207  .clampScalar(0, S16, S32)
208  .scalarize(0);
209  } else {
210  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
211  .legalFor({S32})
212  .clampScalar(0, S32, S32)
213  .scalarize(0);
214  }
215 
216  getActionDefinitionsBuilder({G_UMULH, G_SMULH})
217  .legalFor({S32})
218  .clampScalar(0, S32, S32)
219  .scalarize(0);
220 
221  // Report legal for any types we can handle anywhere. For the cases only legal
222  // on the SALU, RegBankSelect will be able to re-legalize.
223  getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
224  .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
225  .clampScalar(0, S32, S64)
229  .scalarize(0);
230 
231  getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
232  G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
233  .legalFor({{S32, S1}})
234  .clampScalar(0, S32, S32);
235 
236  getActionDefinitionsBuilder(G_BITCAST)
237  .legalForCartesianProduct({S32, V2S16})
238  .legalForCartesianProduct({S64, V2S32, V4S16})
239  .legalForCartesianProduct({V2S64, V4S32})
240  // Don't worry about the size constraint.
241  .legalIf(all(isPointer(0), isPointer(1)));
242 
243  if (ST.has16BitInsts()) {
244  getActionDefinitionsBuilder(G_FCONSTANT)
245  .legalFor({S32, S64, S16})
246  .clampScalar(0, S16, S64);
247  } else {
248  getActionDefinitionsBuilder(G_FCONSTANT)
249  .legalFor({S32, S64})
250  .clampScalar(0, S32, S64);
251  }
252 
253  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
254  .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
255  ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
256  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
257  .clampScalarOrElt(0, S32, S512)
258  .legalIf(isMultiple32(0))
259  .widenScalarToNextPow2(0, 32)
260  .clampMaxNumElements(0, S32, 16);
261 
262 
263  // FIXME: i1 operands to intrinsics should always be legal, but other i1
264  // values may not be legal. We need to figure out how to distinguish
265  // between these two scenarios.
266  getActionDefinitionsBuilder(G_CONSTANT)
267  .legalFor({S1, S32, S64, GlobalPtr,
268  LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
269  .clampScalar(0, S32, S64)
271  .legalIf(isPointer(0));
272 
273  setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
274 
275  auto &FPOpActions = getActionDefinitionsBuilder(
276  { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
277  .legalFor({S32, S64});
278 
279  if (ST.has16BitInsts()) {
280  if (ST.hasVOP3PInsts())
281  FPOpActions.legalFor({S16, V2S16});
282  else
283  FPOpActions.legalFor({S16});
284  }
285 
286  auto &MinNumMaxNum = getActionDefinitionsBuilder({
287  G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
288 
289  if (ST.hasVOP3PInsts()) {
290  MinNumMaxNum.customFor(FPTypesPK16)
291  .clampMaxNumElements(0, S16, 2)
292  .clampScalar(0, S16, S64)
293  .scalarize(0);
294  } else if (ST.has16BitInsts()) {
295  MinNumMaxNum.customFor(FPTypes16)
296  .clampScalar(0, S16, S64)
297  .scalarize(0);
298  } else {
299  MinNumMaxNum.customFor(FPTypesBase)
300  .clampScalar(0, S32, S64)
301  .scalarize(0);
302  }
303 
304  // TODO: Implement
305  getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
306 
307  if (ST.hasVOP3PInsts())
308  FPOpActions.clampMaxNumElements(0, S16, 2);
309  FPOpActions
310  .scalarize(0)
311  .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
312 
313  if (ST.has16BitInsts()) {
315  .legalFor({S32, S64, S16})
316  .scalarize(0)
317  .clampScalar(0, S16, S64);
318  } else {
320  .legalFor({S32, S64})
321  .scalarize(0)
322  .clampScalar(0, S32, S64);
323  }
324 
325  getActionDefinitionsBuilder(G_FPTRUNC)
326  .legalFor({{S32, S64}, {S16, S32}})
327  .scalarize(0);
328 
330  .legalFor({{S64, S32}, {S32, S16}})
331  .lowerFor({{S64, S16}}) // FIXME: Implement
332  .scalarize(0);
333 
334  // TODO: Verify V_BFI_B32 is generated from expanded bit ops.
335  getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
336 
338  // Use actual fsub instruction
339  .legalFor({S32})
340  // Must use fadd + fneg
341  .lowerFor({S64, S16, V2S16})
342  .scalarize(0)
343  .clampScalar(0, S32, S64);
344 
345  getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
346  .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
347  {S32, S1}, {S64, S1}, {S16, S1},
348  // FIXME: Hack
349  {S64, LLT::scalar(33)},
350  {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
351  .scalarize(0);
352 
353  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
354  .legalFor({{S32, S32}, {S64, S32}})
355  .lowerFor({{S32, S64}})
356  .customFor({{S64, S64}})
357  .scalarize(0);
358 
359  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
360  .legalFor({{S32, S32}, {S32, S64}})
361  .scalarize(0);
362 
363  getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
364  .legalFor({S32, S64})
365  .scalarize(0);
366 
367  if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
368  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
369  .legalFor({S32, S64})
370  .clampScalar(0, S32, S64)
371  .scalarize(0);
372  } else {
373  getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
374  .legalFor({S32})
375  .customFor({S64})
376  .clampScalar(0, S32, S64)
377  .scalarize(0);
378  }
379 
381  .legalForCartesianProduct(AddrSpaces64, {S64})
382  .legalForCartesianProduct(AddrSpaces32, {S32})
383  .scalarize(0);
384 
385  setAction({G_BLOCK_ADDR, CodePtr}, Legal);
386 
387  auto &CmpBuilder =
390  {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
391  .legalFor({{S1, S32}, {S1, S64}});
392  if (ST.has16BitInsts()) {
393  CmpBuilder.legalFor({{S1, S16}});
394  }
395 
396  CmpBuilder
398  .clampScalar(1, S32, S64)
399  .scalarize(0)
400  .legalIf(all(typeIs(0, S1), isPointer(1)));
401 
403  .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
404  .widenScalarToNextPow2(1)
405  .clampScalar(1, S32, S64)
406  .scalarize(0);
407 
408  // FIXME: fexp, flog2, flog10 needs to be custom lowered.
409  getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
410  G_FLOG, G_FLOG2, G_FLOG10})
411  .legalFor({S32})
412  .scalarize(0);
413 
414  // The 64-bit versions produce 32-bit results, but only on the SALU.
415  getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
416  G_CTTZ, G_CTTZ_ZERO_UNDEF,
417  G_CTPOP})
418  .legalFor({{S32, S32}, {S32, S64}})
419  .clampScalar(0, S32, S32)
420  .clampScalar(1, S32, S64)
421  .scalarize(0)
422  .widenScalarToNextPow2(0, 32)
423  .widenScalarToNextPow2(1, 32);
424 
425  // TODO: Expand for > s32
427  .legalFor({S32})
428  .clampScalar(0, S32, S32)
429  .scalarize(0);
430 
431  if (ST.has16BitInsts()) {
432  if (ST.hasVOP3PInsts()) {
433  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
434  .legalFor({S32, S16, V2S16})
435  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
436  .clampMaxNumElements(0, S16, 2)
437  .clampScalar(0, S16, S32)
439  .scalarize(0);
440  } else {
441  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
442  .legalFor({S32, S16})
443  .widenScalarToNextPow2(0)
444  .clampScalar(0, S16, S32)
445  .scalarize(0);
446  }
447  } else {
448  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
449  .legalFor({S32})
450  .clampScalar(0, S32, S32)
452  .scalarize(0);
453  }
454 
455  auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
456  return [=](const LegalityQuery &Query) {
457  return Query.Types[TypeIdx0].getSizeInBits() <
458  Query.Types[TypeIdx1].getSizeInBits();
459  };
460  };
461 
462  auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
463  return [=](const LegalityQuery &Query) {
464  return Query.Types[TypeIdx0].getSizeInBits() >
465  Query.Types[TypeIdx1].getSizeInBits();
466  };
467  };
468 
469  getActionDefinitionsBuilder(G_INTTOPTR)
470  // List the common cases
471  .legalForCartesianProduct(AddrSpaces64, {S64})
472  .legalForCartesianProduct(AddrSpaces32, {S32})
473  .scalarize(0)
474  // Accept any address space as long as the size matches
475  .legalIf(sameSize(0, 1))
476  .widenScalarIf(smallerThan(1, 0),
477  [](const LegalityQuery &Query) {
478  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
479  })
480  .narrowScalarIf(greaterThan(1, 0),
481  [](const LegalityQuery &Query) {
482  return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
483  });
484 
485  getActionDefinitionsBuilder(G_PTRTOINT)
486  // List the common cases
487  .legalForCartesianProduct(AddrSpaces64, {S64})
488  .legalForCartesianProduct(AddrSpaces32, {S32})
489  .scalarize(0)
490  // Accept any address space as long as the size matches
491  .legalIf(sameSize(0, 1))
492  .widenScalarIf(smallerThan(0, 1),
493  [](const LegalityQuery &Query) {
494  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
495  })
496  .narrowScalarIf(
497  greaterThan(0, 1),
498  [](const LegalityQuery &Query) {
499  return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
500  });
501 
502  if (ST.hasFlatAddressSpace()) {
503  getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
504  .scalarize(0)
505  .custom();
506  }
507 
508  // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
509  // handle some operations by just promoting the register during
510  // selection. There are also d16 loads on GFX9+ which preserve the high bits.
511  getActionDefinitionsBuilder({G_LOAD, G_STORE})
512  .narrowScalarIf([](const LegalityQuery &Query) {
513  unsigned Size = Query.Types[0].getSizeInBits();
514  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
515  return (Size > 32 && MemSize < Size);
516  },
517  [](const LegalityQuery &Query) {
518  return std::make_pair(0, LLT::scalar(32));
519  })
520  .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
521  .fewerElementsIf([=](const LegalityQuery &Query) {
522  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
523  return (MemSize == 96) &&
524  Query.Types[0].isVector() &&
525  !ST.hasDwordx3LoadStores();
526  },
527  [=](const LegalityQuery &Query) {
528  return std::make_pair(0, V2S32);
529  })
530  .legalIf([=](const LegalityQuery &Query) {
531  const LLT &Ty0 = Query.Types[0];
532 
533  unsigned Size = Ty0.getSizeInBits();
534  unsigned MemSize = Query.MMODescrs[0].SizeInBits;
535  if (Size < 32 || (Size > 32 && MemSize < Size))
536  return false;
537 
538  if (Ty0.isVector() && Size != MemSize)
539  return false;
540 
541  // TODO: Decompose private loads into 4-byte components.
542  // TODO: Illegal flat loads on SI
543  switch (MemSize) {
544  case 8:
545  case 16:
546  return Size == 32;
547  case 32:
548  case 64:
549  case 128:
550  return true;
551 
552  case 96:
553  return ST.hasDwordx3LoadStores();
554 
555  case 256:
556  case 512:
557  // TODO: Possibly support loads of i256 and i512 . This will require
558  // adding i256 and i512 types to MVT in order for to be able to use
559  // TableGen.
560  // TODO: Add support for other vector types, this will require
561  // defining more value mappings for the new types.
562  return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 ||
563  Ty0.getScalarType().getSizeInBits() == 64);
564 
565  default:
566  return false;
567  }
568  })
569  .clampScalar(0, S32, S64);
570 
571 
572  // FIXME: Handle alignment requirements.
573  auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
574  .legalForTypesWithMemDesc({
575  {S32, GlobalPtr, 8, 8},
576  {S32, GlobalPtr, 16, 8},
577  {S32, LocalPtr, 8, 8},
578  {S32, LocalPtr, 16, 8},
579  {S32, PrivatePtr, 8, 8},
580  {S32, PrivatePtr, 16, 8}});
581  if (ST.hasFlatAddressSpace()) {
582  ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
583  {S32, FlatPtr, 16, 8}});
584  }
585 
586  ExtLoads.clampScalar(0, S32, S32)
589  .lower();
590 
591  auto &Atomics = getActionDefinitionsBuilder(
592  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
593  G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
594  G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
595  G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
596  .legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
597  {S64, GlobalPtr}, {S64, LocalPtr}});
598  if (ST.hasFlatAddressSpace()) {
599  Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
600  }
601 
602  getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
603  .legalFor({{S32, LocalPtr}});
604 
605  // TODO: Pointer types, any 32-bit or 64-bit vector
607  .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
608  GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
609  LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
610  .clampScalar(0, S16, S64)
613  .scalarize(1)
614  .clampMaxNumElements(0, S32, 2)
615  .clampMaxNumElements(0, LocalPtr, 2)
616  .clampMaxNumElements(0, PrivatePtr, 2)
617  .scalarize(0)
619  .legalIf(all(isPointer(0), typeIs(1, S1)));
620 
621  // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
622  // be more flexible with the shift amount type.
623  auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
624  .legalFor({{S32, S32}, {S64, S32}});
625  if (ST.has16BitInsts()) {
626  if (ST.hasVOP3PInsts()) {
627  Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
628  .clampMaxNumElements(0, S16, 2);
629  } else
630  Shifts.legalFor({{S16, S32}, {S16, S16}});
631 
632  Shifts.clampScalar(1, S16, S32);
633  Shifts.clampScalar(0, S16, S64);
634  Shifts.widenScalarToNextPow2(0, 16);
635  } else {
636  // Make sure we legalize the shift amount type first, as the general
637  // expansion for the shifted type will produce much worse code if it hasn't
638  // been truncated already.
639  Shifts.clampScalar(1, S32, S32);
640  Shifts.clampScalar(0, S32, S64);
641  Shifts.widenScalarToNextPow2(0, 32);
642  }
643  Shifts.scalarize(0);
644 
645  for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
646  unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
647  unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
648  unsigned IdxTypeIdx = 2;
649 
651  .customIf([=](const LegalityQuery &Query) {
652  const LLT EltTy = Query.Types[EltTypeIdx];
653  const LLT VecTy = Query.Types[VecTypeIdx];
654  const LLT IdxTy = Query.Types[IdxTypeIdx];
655  return (EltTy.getSizeInBits() == 16 ||
656  EltTy.getSizeInBits() % 32 == 0) &&
657  VecTy.getSizeInBits() % 32 == 0 &&
658  VecTy.getSizeInBits() <= 512 &&
659  IdxTy.getSizeInBits() == 32;
660  })
661  .clampScalar(EltTypeIdx, S32, S64)
662  .clampScalar(VecTypeIdx, S32, S64)
663  .clampScalar(IdxTypeIdx, S32, S32);
664  }
665 
666  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
667  .unsupportedIf([=](const LegalityQuery &Query) {
668  const LLT &EltTy = Query.Types[1].getElementType();
669  return Query.Types[0] != EltTy;
670  });
671 
672  for (unsigned Op : {G_EXTRACT, G_INSERT}) {
673  unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
674  unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
675 
676  // FIXME: Doesn't handle extract of illegal sizes.
678  .legalIf([=](const LegalityQuery &Query) {
679  const LLT BigTy = Query.Types[BigTyIdx];
680  const LLT LitTy = Query.Types[LitTyIdx];
681  return (BigTy.getSizeInBits() % 32 == 0) &&
682  (LitTy.getSizeInBits() % 16 == 0);
683  })
684  .widenScalarIf(
685  [=](const LegalityQuery &Query) {
686  const LLT BigTy = Query.Types[BigTyIdx];
687  return (BigTy.getScalarSizeInBits() < 16);
688  },
690  .widenScalarIf(
691  [=](const LegalityQuery &Query) {
692  const LLT LitTy = Query.Types[LitTyIdx];
693  return (LitTy.getScalarSizeInBits() < 16);
694  },
696  .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
697  .widenScalarToNextPow2(BigTyIdx, 32);
698 
699  }
700 
701  getActionDefinitionsBuilder(G_BUILD_VECTOR)
702  .legalForCartesianProduct(AllS32Vectors, {S32})
703  .legalForCartesianProduct(AllS64Vectors, {S64})
704  .clampNumElements(0, V16S32, V16S32)
705  .clampNumElements(0, V2S64, V8S64)
706  .minScalarSameAs(1, 0)
708  .minScalarOrElt(0, S32);
709 
710  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
711  .legalIf(isRegisterType(0));
712 
713  // TODO: Don't fully scalarize v2s16 pieces
714  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
715 
716  // Merge/Unmerge
717  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
718  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
719  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
720 
721  auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
722  const LLT &Ty = Query.Types[TypeIdx];
723  if (Ty.isVector()) {
724  const LLT &EltTy = Ty.getElementType();
725  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
726  return true;
727  if (!isPowerOf2_32(EltTy.getSizeInBits()))
728  return true;
729  }
730  return false;
731  };
732 
734  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
735  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
736  // worth considering the multiples of 64 since 2*192 and 2*384 are not
737  // valid.
738  .clampScalar(LitTyIdx, S16, S256)
739  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
740  .legalIf(all(typeIs(0, S16), typeIs(1, LLT::vector(3, 16)))) // FIXME: Testing hack
741  .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
742  elementTypeIs(1, S16)),
743  changeTo(1, V2S16))
744  // Break up vectors with weird elements into scalars
746  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
747  scalarize(0))
748  .fewerElementsIf(
749  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
750  scalarize(1))
751  .clampScalar(BigTyIdx, S32, S512)
752  .lowerFor({{S16, V2S16}})
753  .widenScalarIf(
754  [=](const LegalityQuery &Query) {
755  const LLT &Ty = Query.Types[BigTyIdx];
756  return !isPowerOf2_32(Ty.getSizeInBits()) &&
757  Ty.getSizeInBits() % 16 != 0;
758  },
759  [=](const LegalityQuery &Query) {
760  // Pick the next power of 2, or a multiple of 64 over 128.
761  // Whichever is smaller.
762  const LLT &Ty = Query.Types[BigTyIdx];
763  unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
764  if (NewSizeInBits >= 256) {
765  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
766  if (RoundedTo < NewSizeInBits)
767  NewSizeInBits = RoundedTo;
768  }
769  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
770  })
771  .legalIf([=](const LegalityQuery &Query) {
772  const LLT &BigTy = Query.Types[BigTyIdx];
773  const LLT &LitTy = Query.Types[LitTyIdx];
774 
775  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
776  return false;
777  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
778  return false;
779 
780  return BigTy.getSizeInBits() % 16 == 0 &&
781  LitTy.getSizeInBits() % 16 == 0 &&
782  BigTy.getSizeInBits() <= 512;
783  })
784  // Any vectors left are the wrong size. Scalarize them.
785  .scalarize(0)
786  .scalarize(1);
787  }
788 
789  getActionDefinitionsBuilder(G_SEXT_INREG).lower();
790 
791  computeTables();
792  verify(*ST.getInstrInfo());
793 }
794 
797  MachineIRBuilder &MIRBuilder,
798  GISelChangeObserver &Observer) const {
799  switch (MI.getOpcode()) {
800  case TargetOpcode::G_ADDRSPACE_CAST:
801  return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
802  case TargetOpcode::G_FRINT:
803  return legalizeFrint(MI, MRI, MIRBuilder);
804  case TargetOpcode::G_FCEIL:
805  return legalizeFceil(MI, MRI, MIRBuilder);
806  case TargetOpcode::G_INTRINSIC_TRUNC:
807  return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder);
808  case TargetOpcode::G_SITOFP:
809  return legalizeITOFP(MI, MRI, MIRBuilder, true);
810  case TargetOpcode::G_UITOFP:
811  return legalizeITOFP(MI, MRI, MIRBuilder, false);
812  case TargetOpcode::G_FMINNUM:
813  case TargetOpcode::G_FMAXNUM:
814  case TargetOpcode::G_FMINNUM_IEEE:
815  case TargetOpcode::G_FMAXNUM_IEEE:
816  return legalizeMinNumMaxNum(MI, MRI, MIRBuilder);
817  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
818  return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
819  case TargetOpcode::G_INSERT_VECTOR_ELT:
820  return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
821  default:
822  return false;
823  }
824 
825  llvm_unreachable("expected switch to return");
826 }
827 
829  unsigned AS,
831  MachineIRBuilder &MIRBuilder) const {
832  MachineFunction &MF = MIRBuilder.getMF();
833  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
834  const LLT S32 = LLT::scalar(32);
835 
836  if (ST.hasApertureRegs()) {
837  // FIXME: Use inline constants (src_{shared, private}_base) instead of
838  // getreg.
839  unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
842  unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
845  unsigned Encoding =
847  Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
849 
850  Register ApertureReg = MRI.createGenericVirtualRegister(S32);
851  Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
852 
853  MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
854  .addDef(GetReg)
855  .addImm(Encoding);
856  MRI.setType(GetReg, S32);
857 
858  auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1);
859  MIRBuilder.buildInstr(TargetOpcode::G_SHL)
860  .addDef(ApertureReg)
861  .addUse(GetReg)
862  .addUse(ShiftAmt.getReg(0));
863 
864  return ApertureReg;
865  }
866 
867  Register QueuePtr = MRI.createGenericVirtualRegister(
869 
870  // FIXME: Placeholder until we can track the input registers.
871  MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
872 
873  // Offset into amd_queue_t for group_segment_aperture_base_hi /
874  // private_segment_aperture_base_hi.
875  uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
876 
877  // FIXME: Don't use undef
881 
882  MachinePointerInfo PtrInfo(V, StructOffset);
884  PtrInfo,
888  4,
889  MinAlign(64, StructOffset));
890 
891  Register LoadResult = MRI.createGenericVirtualRegister(S32);
892  Register LoadAddr;
893 
894  MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
895  MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
896  return LoadResult;
897 }
898 
901  MachineIRBuilder &MIRBuilder) const {
902  MachineFunction &MF = MIRBuilder.getMF();
903 
904  MIRBuilder.setInstr(MI);
905 
906  Register Dst = MI.getOperand(0).getReg();
907  Register Src = MI.getOperand(1).getReg();
908 
909  LLT DstTy = MRI.getType(Dst);
910  LLT SrcTy = MRI.getType(Src);
911  unsigned DestAS = DstTy.getAddressSpace();
912  unsigned SrcAS = SrcTy.getAddressSpace();
913 
914  // TODO: Avoid reloading from the queue ptr for each cast, or at least each
915  // vector element.
916  assert(!DstTy.isVector());
917 
918  const AMDGPUTargetMachine &TM
919  = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
920 
921  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
922  if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
923  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST));
924  return true;
925  }
926 
927  if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
928  assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
929  DestAS == AMDGPUAS::PRIVATE_ADDRESS);
930  unsigned NullVal = TM.getNullPointerValue(DestAS);
931 
932  auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal);
933  auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0);
934 
935  Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
936 
937  // Extract low 32-bits of the pointer.
938  MIRBuilder.buildExtract(PtrLo32, Src, 0);
939 
941  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
942  MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
943 
944  MI.eraseFromParent();
945  return true;
946  }
947 
948  assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
949  SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
950 
951  auto SegmentNull =
952  MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
953  auto FlatNull =
954  MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
955 
956  Register ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
957 
959  MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
960 
961  Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
962 
963  // Coerce the type of the low half of the result so we can use merge_values.
965  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
966  .addDef(SrcAsInt)
967  .addUse(Src);
968 
969  // TODO: Should we allow mismatched types but matching sizes in merges to
970  // avoid the ptrtoint?
971  MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
972  MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
973 
974  MI.eraseFromParent();
975  return true;
976 }
977 
980  MachineIRBuilder &MIRBuilder) const {
981  MIRBuilder.setInstr(MI);
982 
983  Register Src = MI.getOperand(1).getReg();
984  LLT Ty = MRI.getType(Src);
985  assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
986 
987  APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
988  APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
989 
990  auto C1 = MIRBuilder.buildFConstant(Ty, C1Val);
991  auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src);
992 
993  // TODO: Should this propagate fast-math-flags?
994  auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign);
995  auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign);
996 
997  auto C2 = MIRBuilder.buildFConstant(Ty, C2Val);
998  auto Fabs = MIRBuilder.buildFAbs(Ty, Src);
999 
1000  auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
1001  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
1002  return true;
1003 }
1004 
1007  MachineIRBuilder &B) const {
1008  B.setInstr(MI);
1009 
1010  const LLT S1 = LLT::scalar(1);
1011  const LLT S64 = LLT::scalar(64);
1012 
1013  Register Src = MI.getOperand(1).getReg();
1014  assert(MRI.getType(Src) == S64);
1015 
1016  // result = trunc(src)
1017  // if (src > 0.0 && src != result)
1018  // result += 1.0
1019 
1020  auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
1021 
1022  const auto Zero = B.buildFConstant(S64, 0.0);
1023  const auto One = B.buildFConstant(S64, 1.0);
1024  auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero);
1025  auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc);
1026  auto And = B.buildAnd(S1, Lt0, NeTrunc);
1027  auto Add = B.buildSelect(S64, And, One, Zero);
1028 
1029  // TODO: Should this propagate fast-math-flags?
1030  B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
1031  return true;
1032 }
1033 
1035  MachineIRBuilder &B) {
1036  const unsigned FractBits = 52;
1037  const unsigned ExpBits = 11;
1038  LLT S32 = LLT::scalar(32);
1039 
1040  auto Const0 = B.buildConstant(S32, FractBits - 32);
1041  auto Const1 = B.buildConstant(S32, ExpBits);
1042 
1043  auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
1044  .addUse(Const0.getReg(0))
1045  .addUse(Const1.getReg(0));
1046 
1047  return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
1048 }
1049 
1052  MachineIRBuilder &B) const {
1053  B.setInstr(MI);
1054 
1055  const LLT S1 = LLT::scalar(1);
1056  const LLT S32 = LLT::scalar(32);
1057  const LLT S64 = LLT::scalar(64);
1058 
1059  Register Src = MI.getOperand(1).getReg();
1060  assert(MRI.getType(Src) == S64);
1061 
1062  // TODO: Should this use extract since the low half is unused?
1063  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
1064  Register Hi = Unmerge.getReg(1);
1065 
1066  // Extract the upper half, since this is where we will find the sign and
1067  // exponent.
1068  auto Exp = extractF64Exponent(Hi, B);
1069 
1070  const unsigned FractBits = 52;
1071 
1072  // Extract the sign bit.
1073  const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
1074  auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
1075 
1076  const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
1077 
1078  const auto Zero32 = B.buildConstant(S32, 0);
1079 
1080  // Extend back to 64-bits.
1081  auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
1082 
1083  auto Shr = B.buildAShr(S64, FractMask, Exp);
1084  auto Not = B.buildNot(S64, Shr);
1085  auto Tmp0 = B.buildAnd(S64, Src, Not);
1086  auto FiftyOne = B.buildConstant(S32, FractBits - 1);
1087 
1088  auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32);
1089  auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne);
1090 
1091  auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
1092  B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
1093  return true;
1094 }
1095 
1098  MachineIRBuilder &B, bool Signed) const {
1099  B.setInstr(MI);
1100 
1101  Register Dst = MI.getOperand(0).getReg();
1102  Register Src = MI.getOperand(1).getReg();
1103 
1104  const LLT S64 = LLT::scalar(64);
1105  const LLT S32 = LLT::scalar(32);
1106 
1107  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
1108 
1109  auto Unmerge = B.buildUnmerge({S32, S32}, Src);
1110 
1111  auto CvtHi = Signed ?
1112  B.buildSITOFP(S64, Unmerge.getReg(1)) :
1113  B.buildUITOFP(S64, Unmerge.getReg(1));
1114 
1115  auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
1116 
1117  auto ThirtyTwo = B.buildConstant(S32, 32);
1118  auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
1119  .addUse(CvtHi.getReg(0))
1120  .addUse(ThirtyTwo.getReg(0));
1121 
1122  // TODO: Should this propagate fast-math-flags?
1123  B.buildFAdd(Dst, LdExp, CvtLo);
1124  MI.eraseFromParent();
1125  return true;
1126 }
1127 
1130  MachineIRBuilder &B) const {
1131  MachineFunction &MF = B.getMF();
1133 
1134  const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
1135  MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
1136 
1137  // With ieee_mode disabled, the instructions have the correct behavior
1138  // already for G_FMINNUM/G_FMAXNUM
1139  if (!MFI->getMode().IEEE)
1140  return !IsIEEEOp;
1141 
1142  if (IsIEEEOp)
1143  return true;
1144 
1145  MachineIRBuilder HelperBuilder(MI);
1146  GISelObserverWrapper DummyObserver;
1147  LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
1148  HelperBuilder.setMBB(*MI.getParent());
1149  return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
1150 }
1151 
1154  MachineIRBuilder &B) const {
1155  // TODO: Should move some of this into LegalizerHelper.
1156 
1157  // TODO: Promote dynamic indexing of s16 to s32
1158  // TODO: Dynamic s64 indexing is only legal for SGPR.
1160  if (!IdxVal) // Dynamic case will be selected to register indexing.
1161  return true;
1162 
1163  Register Dst = MI.getOperand(0).getReg();
1164  Register Vec = MI.getOperand(1).getReg();
1165 
1166  LLT VecTy = MRI.getType(Vec);
1167  LLT EltTy = VecTy.getElementType();
1168  assert(EltTy == MRI.getType(Dst));
1169 
1170  B.setInstr(MI);
1171 
1172  if (IdxVal.getValue() < VecTy.getNumElements())
1173  B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
1174  else
1175  B.buildUndef(Dst);
1176 
1177  MI.eraseFromParent();
1178  return true;
1179 }
1180 
1183  MachineIRBuilder &B) const {
1184  // TODO: Should move some of this into LegalizerHelper.
1185 
1186  // TODO: Promote dynamic indexing of s16 to s32
1187  // TODO: Dynamic s64 indexing is only legal for SGPR.
1189  if (!IdxVal) // Dynamic case will be selected to register indexing.
1190  return true;
1191 
1192  Register Dst = MI.getOperand(0).getReg();
1193  Register Vec = MI.getOperand(1).getReg();
1194  Register Ins = MI.getOperand(2).getReg();
1195 
1196  LLT VecTy = MRI.getType(Vec);
1197  LLT EltTy = VecTy.getElementType();
1198  assert(EltTy == MRI.getType(Ins));
1199 
1200  B.setInstr(MI);
1201 
1202  if (IdxVal.getValue() < VecTy.getNumElements())
1203  B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
1204  else
1205  B.buildUndef(Dst);
1206 
1207  MI.eraseFromParent();
1208  return true;
1209 }
1210 
1211 // Return the use branch instruction, otherwise null if the usage is invalid.
1214  Register CondDef = MI.getOperand(0).getReg();
1215  if (!MRI.hasOneNonDBGUse(CondDef))
1216  return nullptr;
1217 
1218  MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
1219  return UseMI.getParent() == MI.getParent() &&
1220  UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
1221 }
1222 
1224  Register Reg, LLT Ty) const {
1225  Register LiveIn = MRI.getLiveInVirtReg(Reg);
1226  if (LiveIn)
1227  return LiveIn;
1228 
1229  Register NewReg = MRI.createGenericVirtualRegister(Ty);
1230  MRI.addLiveIn(Reg, NewReg);
1231  return NewReg;
1232 }
1233 
1235  const ArgDescriptor *Arg) const {
1236  if (!Arg->isRegister())
1237  return false; // TODO: Handle these
1238 
1239  assert(Arg->getRegister() != 0);
1240  assert(Arg->getRegister().isPhysical());
1241 
1242  MachineRegisterInfo &MRI = *B.getMRI();
1243 
1244  LLT Ty = MRI.getType(DstReg);
1245  Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
1246 
1247  if (Arg->isMasked()) {
1248  // TODO: Should we try to emit this once in the entry block?
1249  const LLT S32 = LLT::scalar(32);
1250  const unsigned Mask = Arg->getMask();
1251  const unsigned Shift = countTrailingZeros<unsigned>(Mask);
1252 
1253  auto ShiftAmt = B.buildConstant(S32, Shift);
1254  auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt);
1255  B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift));
1256  } else
1257  B.buildCopy(DstReg, LiveIn);
1258 
1259  // Insert the argument copy if it doens't already exist.
1260  // FIXME: It seems EmitLiveInCopies isn't called anywhere?
1261  if (!MRI.getVRegDef(LiveIn)) {
1262  MachineBasicBlock &EntryMBB = B.getMF().front();
1263  EntryMBB.addLiveIn(Arg->getRegister());
1264  B.setInsertPt(EntryMBB, EntryMBB.begin());
1265  B.buildCopy(LiveIn, Arg->getRegister());
1266  }
1267 
1268  return true;
1269 }
1270 
1272  MachineInstr &MI,
1275  AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
1276  B.setInstr(MI);
1277 
1279 
1280  const ArgDescriptor *Arg;
1281  const TargetRegisterClass *RC;
1282  std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
1283  if (!Arg) {
1284  LLVM_DEBUG(dbgs() << "Required arg register missing\n");
1285  return false;
1286  }
1287 
1288  if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
1289  MI.eraseFromParent();
1290  return true;
1291  }
1292 
1293  return false;
1294 }
1295 
1298  MachineIRBuilder &B) const {
1299  B.setInstr(MI);
1300  Register Res = MI.getOperand(0).getReg();
1301  Register LHS = MI.getOperand(2).getReg();
1302  Register RHS = MI.getOperand(3).getReg();
1303  uint16_t Flags = MI.getFlags();
1304 
1305  LLT S32 = LLT::scalar(32);
1306  LLT S1 = LLT::scalar(1);
1307 
1308  auto Abs = B.buildFAbs(S32, RHS, Flags);
1309  const APFloat C0Val(1.0f);
1310 
1311  auto C0 = B.buildConstant(S32, 0x6f800000);
1312  auto C1 = B.buildConstant(S32, 0x2f800000);
1313  auto C2 = B.buildConstant(S32, FloatToBits(1.0f));
1314 
1315  auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags);
1316  auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
1317 
1318  auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
1319 
1320  auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
1321  .addUse(Mul0.getReg(0))
1322  .setMIFlags(Flags);
1323 
1324  auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
1325 
1326  B.buildFMul(Res, Sel, Mul1, Flags);
1327 
1328  MI.eraseFromParent();
1329  return true;
1330 }
1331 
1334  MachineIRBuilder &B) const {
1336  if (!MFI->isEntryFunction()) {
1337  return legalizePreloadedArgIntrin(MI, MRI, B,
1339  }
1340 
1341  B.setInstr(MI);
1342 
1343  uint64_t Offset =
1344  ST.getTargetLowering()->getImplicitParameterOffset(
1346  Register DstReg = MI.getOperand(0).getReg();
1347  LLT DstTy = MRI.getType(DstReg);
1348  LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
1349 
1350  const ArgDescriptor *Arg;
1351  const TargetRegisterClass *RC;
1352  std::tie(Arg, RC)
1354  if (!Arg)
1355  return false;
1356 
1357  Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
1358  if (!loadInputValue(KernargPtrReg, B, Arg))
1359  return false;
1360 
1361  B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
1362  MI.eraseFromParent();
1363  return true;
1364 }
1365 
1368  MachineIRBuilder &B) const {
1369  // Replace the use G_BRCOND with the exec manipulate and branch pseudos.
1370  switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
1371  case Intrinsic::amdgcn_if: {
1372  if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
1373  const SIRegisterInfo *TRI
1374  = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
1375 
1376  B.setInstr(*BrCond);
1377  Register Def = MI.getOperand(1).getReg();
1378  Register Use = MI.getOperand(3).getReg();
1379  B.buildInstr(AMDGPU::SI_IF)
1380  .addDef(Def)
1381  .addUse(Use)
1382  .addMBB(BrCond->getOperand(1).getMBB());
1383 
1384  MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
1385  MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
1386  MI.eraseFromParent();
1387  BrCond->eraseFromParent();
1388  return true;
1389  }
1390 
1391  return false;
1392  }
1393  case Intrinsic::amdgcn_loop: {
1394  if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
1395  const SIRegisterInfo *TRI
1396  = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
1397 
1398  B.setInstr(*BrCond);
1399  Register Reg = MI.getOperand(2).getReg();
1400  B.buildInstr(AMDGPU::SI_LOOP)
1401  .addUse(Reg)
1402  .addMBB(BrCond->getOperand(1).getMBB());
1403  MI.eraseFromParent();
1404  BrCond->eraseFromParent();
1405  MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
1406  return true;
1407  }
1408 
1409  return false;
1410  }
1411  case Intrinsic::amdgcn_kernarg_segment_ptr:
1414  case Intrinsic::amdgcn_implicitarg_ptr:
1415  return legalizeImplicitArgPtr(MI, MRI, B);
1416  case Intrinsic::amdgcn_workitem_id_x:
1417  return legalizePreloadedArgIntrin(MI, MRI, B,
1419  case Intrinsic::amdgcn_workitem_id_y:
1420  return legalizePreloadedArgIntrin(MI, MRI, B,
1422  case Intrinsic::amdgcn_workitem_id_z:
1423  return legalizePreloadedArgIntrin(MI, MRI, B,
1425  case Intrinsic::amdgcn_workgroup_id_x:
1426  return legalizePreloadedArgIntrin(MI, MRI, B,
1428  case Intrinsic::amdgcn_workgroup_id_y:
1429  return legalizePreloadedArgIntrin(MI, MRI, B,
1431  case Intrinsic::amdgcn_workgroup_id_z:
1432  return legalizePreloadedArgIntrin(MI, MRI, B,
1434  case Intrinsic::amdgcn_dispatch_ptr:
1435  return legalizePreloadedArgIntrin(MI, MRI, B,
1437  case Intrinsic::amdgcn_queue_ptr:
1438  return legalizePreloadedArgIntrin(MI, MRI, B,
1440  case Intrinsic::amdgcn_implicit_buffer_ptr:
1443  case Intrinsic::amdgcn_dispatch_id:
1444  return legalizePreloadedArgIntrin(MI, MRI, B,
1446  case Intrinsic::amdgcn_fdiv_fast:
1447  return legalizeFDIVFast(MI, MRI, B);
1448  default:
1449  return true;
1450  }
1451 
1452  return true;
1453 }
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:551
bool loadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg) const
Register getRegister() const
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
bool hasApertureRegs() const
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MachineInstrBuilder buildInsert(Register Res, Register Src, Register Op, unsigned Index)
Address space for region memory. (GDS)
Definition: AMDGPU.h:271
Register getReg(unsigned Idx) const
Get the register for the operand index.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned getScalarSizeInBits() const
Address space for local memory.
Definition: AMDGPU.h:274
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, Optional< unsigned > Flags=None)
Build and insert Res = G_FABS Op0.
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx)
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
unsigned Reg
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
LLT getScalarType() const
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
unsigned const TargetRegisterInfo * TRI
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
static LegalizeMutation oneMoreElement(unsigned TypeIdx)
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
LegalityPredicate isPointer(unsigned TypeIdx)
True iff the specified type index is a pointer (with any address space).
static LegalityPredicate isRegisterType(unsigned TypeIdx)
Optional< MachineInstrBuilder > materializeGEP(Register &Res, Register Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
AMDGPU::SIModeRegisterDefaults getMode() const
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
static MachineInstr * verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI)
LegalizeRuleSet & custom()
Unconditionally custom lower.
bool isVector() const
A description of a memory reference used in the backend.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:270
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT &Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
bool legalizeFDIVFast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type)
void verify(const MCInstrInfo &MII) const
Perform simple self-diagnostic and assert if there is anything obviously wrong with the actions set u...
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, Optional< unsigned > Flags=None)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT &EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Address space for flat memory.
Definition: AMDGPU.h:269
MachineFunction & getMF()
Getter for the function we currently build.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Register getSegmentAperture(unsigned AddrSpace, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
unsigned getPointerSizeInBits(unsigned AS) const
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:158
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, bool Signed) const
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:605
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
MachineRegisterInfo * getMRI()
Getter for MRI.
Abstract class that contains various methods for clients to notify about changes. ...
const TargetRegisterInfo * getTargetRegisterInfo() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
static MachineInstrBuilder extractF64Exponent(unsigned Hi, MachineIRBuilder &B)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
bool legalizePreloadedArgIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const
MachineInstrBuilder & UseMI
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
Helper class to build MachineInstr.
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const
void setType(unsigned VReg, LLT Ty)
Set the low-level type of VReg to Ty.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FADD Op0, Op1.
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size)
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT &MinTy, const LLT &MaxTy)
Limit the number of elements for the given vectors to at least MinTy&#39;s number of elements and at most...
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM)
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT &MinTy, const LLT &MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
unsigned getAddressSpace() const
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
The AMDGPU TargetMachine interface definition for hw codgen targets.
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
std::function< std::pair< unsigned, LLT >(const LegalityQuery &)> LegalizeMutation
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
Address space for private memory.
Definition: AMDGPU.h:275
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1433
const MachineBasicBlock & front() const
static LegalityPredicate isMultiple32(unsigned TypeIdx, unsigned MaxSize=512)
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
This class contains a discriminated union of information about pointers in memory operands...
LegalizeRuleSet & unsupportedIfMemSizeNotPow2()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
signed greater than
Definition: InstrTypes.h:759
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:736
Address space for 32-bit constant memory.
Definition: AMDGPU.h:277
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
static unsigned getIntrinsicID(const SDNode *N)
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
signed less than
Definition: InstrTypes.h:761
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override
Return true if MI is either legal or has been legalized and false if not legal.
const TargetInstrInfo & getTII()
const Function & getFunction() const
Return the LLVM function that this machine code represents.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, Optional< unsigned > Flags=None)
Build and insert a Res = G_FCMP PredOp0, Op1.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Optional< int64_t > getConstantVRegVal(unsigned VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition: Utils.cpp:207
bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
std::pair< const ArgDescriptor *, const TargetRegisterClass * > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
Representation of each machine instruction.
Definition: MachineInstr.h:64
Instruction has been legalized and the MachineFunction changed.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx)
LegalizeRuleSet & lower()
The instruction is lowered.
ArrayRef< LLT > Types
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
void setMBB(MachineBasicBlock &MBB)
Set the insertion point to the end of MBB.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
The memory access always returns the same value (or traps).
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:740
uint32_t Size
Definition: Profile.cpp:46
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
bool hasOneNonDBGUse(unsigned RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register...
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:292
const TargetRegisterClass * getWaveMaskRegClass() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
LLVM Value Representation.
Definition: Value.h:73
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT &MinTy, const LLT &MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
IRTranslator LLVM IR MI
void setRegClass(unsigned Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
std::function< bool(const LegalityQuery &)> LegalityPredicate
static LegalityPredicate numElementsNotEven(unsigned TypeIdx)
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Simple wrapper observer that takes several observers, and calls each one for each event...
Register getReg() const
getReg - Returns the register number.
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
const SITargetLowering * getTargetLowering() const override
#define LLVM_DEBUG(X)
Definition: Debug.h:122
unsigned getLiveInVirtReg(unsigned PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in physical ...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
void setAction(const InstrAspect &Aspect, LegalizeAction Action)
More friendly way to set an action for common types that have an LLT representation.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the specified type indices are both the same bit size.