LLVM  10.0.0svn
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1 //===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the Machinelegalizer class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64LegalizerInfo.h"
15 #include "AArch64Subtarget.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Type.h"
25 
26 #define DEBUG_TYPE "aarch64-legalinfo"
27 
28 using namespace llvm;
29 using namespace LegalizeActions;
30 using namespace LegalizeMutations;
31 using namespace LegalityPredicates;
32 
34  using namespace TargetOpcode;
35  const LLT p0 = LLT::pointer(0, 64);
36  const LLT s1 = LLT::scalar(1);
37  const LLT s8 = LLT::scalar(8);
38  const LLT s16 = LLT::scalar(16);
39  const LLT s32 = LLT::scalar(32);
40  const LLT s64 = LLT::scalar(64);
41  const LLT s128 = LLT::scalar(128);
42  const LLT s256 = LLT::scalar(256);
43  const LLT s512 = LLT::scalar(512);
44  const LLT v16s8 = LLT::vector(16, 8);
45  const LLT v8s8 = LLT::vector(8, 8);
46  const LLT v4s8 = LLT::vector(4, 8);
47  const LLT v8s16 = LLT::vector(8, 16);
48  const LLT v4s16 = LLT::vector(4, 16);
49  const LLT v2s16 = LLT::vector(2, 16);
50  const LLT v2s32 = LLT::vector(2, 32);
51  const LLT v4s32 = LLT::vector(4, 32);
52  const LLT v2s64 = LLT::vector(2, 64);
53  const LLT v2p0 = LLT::vector(2, p0);
54 
55  // FIXME: support subtargets which have neon/fp-armv8 disabled.
56  if (!ST.hasNEON() || !ST.hasFPARMv8()) {
57  computeTables();
58  return;
59  }
60 
61  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
62  .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
63  .clampScalar(0, s1, s64)
64  .widenScalarToNextPow2(0, 8)
65  .fewerElementsIf(
66  [=](const LegalityQuery &Query) {
67  return Query.Types[0].isVector() &&
68  (Query.Types[0].getElementType() != s64 ||
69  Query.Types[0].getNumElements() != 2);
70  },
71  [=](const LegalityQuery &Query) {
72  LLT EltTy = Query.Types[0].getElementType();
73  if (EltTy == s64)
74  return std::make_pair(0, LLT::vector(2, 64));
75  return std::make_pair(0, EltTy);
76  });
77 
78  getActionDefinitionsBuilder(G_PHI)
79  .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
80  .clampScalar(0, s16, s64)
81  .widenScalarToNextPow2(0);
82 
83  getActionDefinitionsBuilder(G_BSWAP)
84  .legalFor({s32, s64, v4s32, v2s32, v2s64})
85  .clampScalar(0, s16, s64)
86  .widenScalarToNextPow2(0);
87 
88  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
89  .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
90  .clampScalar(0, s32, s64)
91  .widenScalarToNextPow2(0)
92  .clampNumElements(0, v2s32, v4s32)
93  .clampNumElements(0, v2s64, v2s64)
94  .moreElementsToNextPow2(0);
95 
96  getActionDefinitionsBuilder(G_SHL)
97  .legalFor({{s32, s32}, {s64, s64},
98  {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
99  .clampScalar(1, s32, s64)
100  .clampScalar(0, s32, s64)
101  .widenScalarToNextPow2(0)
102  .clampNumElements(0, v2s32, v4s32)
103  .clampNumElements(0, v2s64, v2s64)
104  .moreElementsToNextPow2(0)
105  .minScalarSameAs(1, 0);
106 
107  getActionDefinitionsBuilder(G_GEP)
108  .legalFor({{p0, s64}})
109  .clampScalar(1, s64, s64);
110 
111  getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
112 
113  getActionDefinitionsBuilder({G_SDIV, G_UDIV})
114  .legalFor({s32, s64})
115  .libcallFor({s128})
116  .clampScalar(0, s32, s64)
117  .widenScalarToNextPow2(0)
118  .scalarize(0);
119 
120  getActionDefinitionsBuilder({G_LSHR, G_ASHR})
121  .customIf([=](const LegalityQuery &Query) {
122  const auto &SrcTy = Query.Types[0];
123  const auto &AmtTy = Query.Types[1];
124  return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
125  AmtTy.getSizeInBits() == 32;
126  })
127  .legalFor(
128  {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
129  .clampScalar(1, s32, s64)
130  .clampScalar(0, s32, s64)
131  .minScalarSameAs(1, 0);
132 
133  getActionDefinitionsBuilder({G_SREM, G_UREM})
134  .lowerFor({s1, s8, s16, s32, s64});
135 
136  getActionDefinitionsBuilder({G_SMULO, G_UMULO})
137  .lowerFor({{s64, s1}});
138 
139  getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
140 
141  getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
142  .legalFor({{s32, s1}, {s64, s1}});
143 
144  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
145  .legalFor({s32, s64, v2s64, v4s32, v2s32});
146 
147  getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
148 
149  getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
150  G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
151  G_FNEARBYINT})
152  // If we don't have full FP16 support, then scalarize the elements of
153  // vectors containing fp16 types.
154  .fewerElementsIf(
155  [=, &ST](const LegalityQuery &Query) {
156  const auto &Ty = Query.Types[0];
157  return Ty.isVector() && Ty.getElementType() == s16 &&
158  !ST.hasFullFP16();
159  },
160  [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
161  // If we don't have full FP16 support, then widen s16 to s32 if we
162  // encounter it.
163  .widenScalarIf(
164  [=, &ST](const LegalityQuery &Query) {
165  return Query.Types[0] == s16 && !ST.hasFullFP16();
166  },
167  [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
168  .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
169 
170  getActionDefinitionsBuilder(
171  {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
172  // We need a call for these, so we always need to scalarize.
173  .scalarize(0)
174  // Regardless of FP16 support, widen 16-bit elements to 32-bits.
175  .minScalar(0, s32)
176  .libcallFor({s32, s64, v2s32, v4s32, v2s64});
177 
178  getActionDefinitionsBuilder(G_INSERT)
179  .unsupportedIf([=](const LegalityQuery &Query) {
180  return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
181  })
182  .legalIf([=](const LegalityQuery &Query) {
183  const LLT &Ty0 = Query.Types[0];
184  const LLT &Ty1 = Query.Types[1];
185  if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
186  return false;
187  return isPowerOf2_32(Ty1.getSizeInBits()) &&
188  (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
189  })
190  .clampScalar(0, s32, s64)
191  .widenScalarToNextPow2(0)
192  .maxScalarIf(typeInSet(0, {s32}), 1, s16)
193  .maxScalarIf(typeInSet(0, {s64}), 1, s32)
194  .widenScalarToNextPow2(1);
195 
196  getActionDefinitionsBuilder(G_EXTRACT)
197  .unsupportedIf([=](const LegalityQuery &Query) {
198  return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
199  })
200  .legalIf([=](const LegalityQuery &Query) {
201  const LLT &Ty0 = Query.Types[0];
202  const LLT &Ty1 = Query.Types[1];
203  if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
204  return false;
205  if (Ty1 == p0)
206  return true;
207  return isPowerOf2_32(Ty0.getSizeInBits()) &&
208  (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
209  })
210  .clampScalar(1, s32, s128)
211  .widenScalarToNextPow2(1)
212  .maxScalarIf(typeInSet(1, {s32}), 0, s16)
213  .maxScalarIf(typeInSet(1, {s64}), 0, s32)
214  .widenScalarToNextPow2(0);
215 
216  getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
217  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
218  {s32, p0, 16, 8},
219  {s32, p0, 32, 8},
220  {s64, p0, 8, 2},
221  {s64, p0, 16, 2},
222  {s64, p0, 32, 4},
223  {s64, p0, 64, 8},
224  {p0, p0, 64, 8},
225  {v2s32, p0, 64, 8}})
226  .clampScalar(0, s32, s64)
227  .widenScalarToNextPow2(0)
228  // TODO: We could support sum-of-pow2's but the lowering code doesn't know
229  // how to do that yet.
230  .unsupportedIfMemSizeNotPow2()
231  // Lower anything left over into G_*EXT and G_LOAD
232  .lower();
233 
234  auto IsPtrVecPred = [=](const LegalityQuery &Query) {
235  const LLT &ValTy = Query.Types[0];
236  if (!ValTy.isVector())
237  return false;
238  const LLT EltTy = ValTy.getElementType();
239  return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
240  };
241 
242  getActionDefinitionsBuilder(G_LOAD)
243  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
244  {s16, p0, 16, 8},
245  {s32, p0, 32, 8},
246  {s64, p0, 64, 8},
247  {p0, p0, 64, 8},
248  {s128, p0, 128, 8},
249  {v8s8, p0, 64, 8},
250  {v16s8, p0, 128, 8},
251  {v4s16, p0, 64, 8},
252  {v8s16, p0, 128, 8},
253  {v2s32, p0, 64, 8},
254  {v4s32, p0, 128, 8},
255  {v2s64, p0, 128, 8}})
256  // These extends are also legal
257  .legalForTypesWithMemDesc({{s32, p0, 8, 8},
258  {s32, p0, 16, 8}})
259  .clampScalar(0, s8, s64)
260  .lowerIfMemSizeNotPow2()
261  // Lower any any-extending loads left into G_ANYEXT and G_LOAD
262  .lowerIf([=](const LegalityQuery &Query) {
263  return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
264  })
265  .widenScalarToNextPow2(0)
266  .clampMaxNumElements(0, s32, 2)
267  .clampMaxNumElements(0, s64, 1)
268  .customIf(IsPtrVecPred);
269 
270  getActionDefinitionsBuilder(G_STORE)
271  .legalForTypesWithMemDesc({{s8, p0, 8, 8},
272  {s16, p0, 16, 8},
273  {s32, p0, 8, 8},
274  {s32, p0, 16, 8},
275  {s32, p0, 32, 8},
276  {s64, p0, 64, 8},
277  {p0, p0, 64, 8},
278  {s128, p0, 128, 8},
279  {v16s8, p0, 128, 8},
280  {v4s16, p0, 64, 8},
281  {v8s16, p0, 128, 8},
282  {v2s32, p0, 64, 8},
283  {v4s32, p0, 128, 8},
284  {v2s64, p0, 128, 8}})
285  .clampScalar(0, s8, s64)
286  .lowerIfMemSizeNotPow2()
287  .lowerIf([=](const LegalityQuery &Query) {
288  return Query.Types[0].isScalar() &&
289  Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
290  })
291  .clampMaxNumElements(0, s32, 2)
292  .clampMaxNumElements(0, s64, 1)
293  .customIf(IsPtrVecPred);
294 
295  // Constants
296  getActionDefinitionsBuilder(G_CONSTANT)
297  .legalFor({p0, s8, s16, s32, s64})
298  .clampScalar(0, s8, s64)
299  .widenScalarToNextPow2(0);
300  getActionDefinitionsBuilder(G_FCONSTANT)
301  .legalFor({s32, s64})
302  .clampScalar(0, s32, s64);
303 
304  getActionDefinitionsBuilder(G_ICMP)
305  .legalFor({{s32, s32},
306  {s32, s64},
307  {s32, p0},
308  {v4s32, v4s32},
309  {v2s32, v2s32},
310  {v2s64, v2s64},
311  {v2s64, v2p0},
312  {v4s16, v4s16},
313  {v8s16, v8s16},
314  {v8s8, v8s8},
315  {v16s8, v16s8}})
316  .clampScalar(1, s32, s64)
317  .clampScalar(0, s32, s32)
318  .minScalarEltSameAsIf(
319  [=](const LegalityQuery &Query) {
320  const LLT &Ty = Query.Types[0];
321  const LLT &SrcTy = Query.Types[1];
322  return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
323  Ty.getElementType() != SrcTy.getElementType();
324  },
325  0, 1)
326  .minScalarOrEltIf(
327  [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
328  1, s32)
329  .minScalarOrEltIf(
330  [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
331  s64)
333 
334  getActionDefinitionsBuilder(G_FCMP)
335  .legalFor({{s32, s32}, {s32, s64}})
336  .clampScalar(0, s32, s32)
337  .clampScalar(1, s32, s64)
338  .widenScalarToNextPow2(1);
339 
340  // Extensions
341  auto ExtLegalFunc = [=](const LegalityQuery &Query) {
342  unsigned DstSize = Query.Types[0].getSizeInBits();
343 
344  if (DstSize == 128 && !Query.Types[0].isVector())
345  return false; // Extending to a scalar s128 needs narrowing.
346 
347  // Make sure that we have something that will fit in a register, and
348  // make sure it's a power of 2.
349  if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
350  return false;
351 
352  const LLT &SrcTy = Query.Types[1];
353 
354  // Special case for s1.
355  if (SrcTy == s1)
356  return true;
357 
358  // Make sure we fit in a register otherwise. Don't bother checking that
359  // the source type is below 128 bits. We shouldn't be allowing anything
360  // through which is wider than the destination in the first place.
361  unsigned SrcSize = SrcTy.getSizeInBits();
362  if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
363  return false;
364 
365  return true;
366  };
367  getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
368  .legalIf(ExtLegalFunc)
369  .clampScalar(0, s64, s64); // Just for s128, others are handled above.
370 
371  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
372 
373  getActionDefinitionsBuilder(G_SEXT_INREG).lower();
374 
375  // FP conversions
376  getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
377  {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
378  getActionDefinitionsBuilder(G_FPEXT).legalFor(
379  {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
380 
381  // Conversions
382  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
383  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
384  .clampScalar(0, s32, s64)
385  .widenScalarToNextPow2(0)
386  .clampScalar(1, s32, s64)
387  .widenScalarToNextPow2(1);
388 
389  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
390  .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
391  .clampScalar(1, s32, s64)
392  .widenScalarToNextPow2(1)
393  .clampScalar(0, s32, s64)
394  .widenScalarToNextPow2(0);
395 
396  // Control-flow
397  getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32});
398  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
399 
400  // Select
401  // FIXME: We can probably do a bit better than just scalarizing vector
402  // selects.
403  getActionDefinitionsBuilder(G_SELECT)
404  .legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
405  .clampScalar(0, s32, s64)
406  .widenScalarToNextPow2(0)
407  .scalarize(0);
408 
409  // Pointer-handling
410  getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
411  getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
412 
413  getActionDefinitionsBuilder(G_PTRTOINT)
414  .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
415  .maxScalar(0, s64)
416  .widenScalarToNextPow2(0, /*Min*/ 8);
417 
418  getActionDefinitionsBuilder(G_INTTOPTR)
419  .unsupportedIf([&](const LegalityQuery &Query) {
420  return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
421  })
422  .legalFor({{p0, s64}});
423 
424  // Casts for 32 and 64-bit width type are just copies.
425  // Same for 128-bit width type, except they are on the FPR bank.
426  getActionDefinitionsBuilder(G_BITCAST)
427  // FIXME: This is wrong since G_BITCAST is not allowed to change the
428  // number of bits but it's what the previous code described and fixing
429  // it breaks tests.
430  .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
431  v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
432  v2p0});
433 
434  getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
435 
436  // va_list must be a pointer, but most sized types are pretty easy to handle
437  // as the destination.
438  getActionDefinitionsBuilder(G_VAARG)
439  .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
440  .clampScalar(0, s8, s64)
441  .widenScalarToNextPow2(0, /*Min*/ 8);
442 
443  if (ST.hasLSE()) {
444  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
445  .lowerIf(all(
446  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
448 
449  getActionDefinitionsBuilder(
450  {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
451  G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
452  G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
453  .legalIf(all(
454  typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
456  }
457 
458  getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
459 
460  // Merge/Unmerge
461  for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
462  unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
463  unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
464 
465  auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
466  const LLT &Ty = Query.Types[TypeIdx];
467  if (Ty.isVector()) {
468  const LLT &EltTy = Ty.getElementType();
469  if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
470  return true;
471  if (!isPowerOf2_32(EltTy.getSizeInBits()))
472  return true;
473  }
474  return false;
475  };
476 
477  // FIXME: This rule is horrible, but specifies the same as what we had
478  // before with the particularly strange definitions removed (e.g.
479  // s8 = G_MERGE_VALUES s32, s32).
480  // Part of the complexity comes from these ops being extremely flexible. For
481  // example, you can build/decompose vectors with it, concatenate vectors,
482  // etc. and in addition to this you can also bitcast with it at the same
483  // time. We've been considering breaking it up into multiple ops to make it
484  // more manageable throughout the backend.
485  getActionDefinitionsBuilder(Op)
486  // Break up vectors with weird elements into scalars
487  .fewerElementsIf(
488  [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
489  scalarize(0))
490  .fewerElementsIf(
491  [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
492  scalarize(1))
493  // Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
494  // or 384.
495  .clampScalar(BigTyIdx, s8, s512)
496  .widenScalarIf(
497  [=](const LegalityQuery &Query) {
498  const LLT &Ty = Query.Types[BigTyIdx];
499  return !isPowerOf2_32(Ty.getSizeInBits()) &&
500  Ty.getSizeInBits() % 64 != 0;
501  },
502  [=](const LegalityQuery &Query) {
503  // Pick the next power of 2, or a multiple of 64 over 128.
504  // Whichever is smaller.
505  const LLT &Ty = Query.Types[BigTyIdx];
506  unsigned NewSizeInBits = 1
507  << Log2_32_Ceil(Ty.getSizeInBits() + 1);
508  if (NewSizeInBits >= 256) {
509  unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
510  if (RoundedTo < NewSizeInBits)
511  NewSizeInBits = RoundedTo;
512  }
513  return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
514  })
515  // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
516  // worth considering the multiples of 64 since 2*192 and 2*384 are not
517  // valid.
518  .clampScalar(LitTyIdx, s8, s256)
519  .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
520  // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
521  // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
522  // At this point it's simple enough to accept the legal types.
523  .legalIf([=](const LegalityQuery &Query) {
524  const LLT &BigTy = Query.Types[BigTyIdx];
525  const LLT &LitTy = Query.Types[LitTyIdx];
526  if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
527  return false;
528  if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
529  return false;
530  return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
531  })
532  // Any vectors left are the wrong size. Scalarize them.
533  .scalarize(0)
534  .scalarize(1);
535  }
536 
537  getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
538  .unsupportedIf([=](const LegalityQuery &Query) {
539  const LLT &EltTy = Query.Types[1].getElementType();
540  return Query.Types[0] != EltTy;
541  })
542  .minScalar(2, s64)
543  .legalIf([=](const LegalityQuery &Query) {
544  const LLT &VecTy = Query.Types[1];
545  return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
546  VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
547  });
548 
549  getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
550  .legalIf([=](const LegalityQuery &Query) {
551  const LLT &VecTy = Query.Types[0];
552  // TODO: Support s8 and s16
553  return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
554  });
555 
556  getActionDefinitionsBuilder(G_BUILD_VECTOR)
557  .legalFor({{v4s16, s16},
558  {v8s16, s16},
559  {v2s32, s32},
560  {v4s32, s32},
561  {v2p0, p0},
562  {v2s64, s64}})
563  .clampNumElements(0, v4s32, v4s32)
564  .clampNumElements(0, v2s64, v2s64)
565 
566  // Deal with larger scalar types, which will be implicitly truncated.
567  .legalIf([=](const LegalityQuery &Query) {
568  return Query.Types[0].getScalarSizeInBits() <
569  Query.Types[1].getSizeInBits();
570  })
571  .minScalarSameAs(1, 0);
572 
573  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
574  {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
575  .scalarize(1);
576 
577  getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
578  .legalIf([=](const LegalityQuery &Query) {
579  const LLT &DstTy = Query.Types[0];
580  const LLT &SrcTy = Query.Types[1];
581  // For now just support the TBL2 variant which needs the source vectors
582  // to be the same size as the dest.
583  if (DstTy != SrcTy)
584  return false;
585  for (auto &Ty : {v2s32, v4s32, v2s64}) {
586  if (DstTy == Ty)
587  return true;
588  }
589  return false;
590  })
591  // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
592  // just want those lowered into G_BUILD_VECTOR
593  .lowerIf([=](const LegalityQuery &Query) {
594  return !Query.Types[1].isVector();
595  })
596  .clampNumElements(0, v4s32, v4s32)
597  .clampNumElements(0, v2s64, v2s64);
598 
599  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
600  .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
601 
602  getActionDefinitionsBuilder(G_JUMP_TABLE)
603  .legalFor({{p0}, {s64}});
604 
605  getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
606  return Query.Types[0] == p0 && Query.Types[1] == s64;
607  });
608 
609  getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
610 
611  computeTables();
612  verify(*ST.getInstrInfo());
613 }
614 
617  MachineIRBuilder &MIRBuilder,
618  GISelChangeObserver &Observer) const {
619  switch (MI.getOpcode()) {
620  default:
621  // No idea what to do.
622  return false;
623  case TargetOpcode::G_VAARG:
624  return legalizeVaArg(MI, MRI, MIRBuilder);
625  case TargetOpcode::G_LOAD:
626  case TargetOpcode::G_STORE:
627  return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
628  case TargetOpcode::G_SHL:
629  case TargetOpcode::G_ASHR:
630  case TargetOpcode::G_LSHR:
631  return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
632  }
633 
634  llvm_unreachable("expected switch to return");
635 }
636 
639  MachineIRBuilder &MIRBuilder) const {
640  switch (MI.getIntrinsicID()) {
641  case Intrinsic::memcpy:
642  case Intrinsic::memset:
643  case Intrinsic::memmove:
644  if (createMemLibcall(MIRBuilder, MRI, MI) ==
646  return false;
647  MI.eraseFromParent();
648  return true;
649  default:
650  break;
651  }
652  return true;
653 }
654 
655 bool AArch64LegalizerInfo::legalizeShlAshrLshr(
657  GISelChangeObserver &Observer) const {
658  assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
659  MI.getOpcode() == TargetOpcode::G_LSHR ||
660  MI.getOpcode() == TargetOpcode::G_SHL);
661  // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
662  // imported patterns can select it later. Either way, it will be legal.
663  Register AmtReg = MI.getOperand(2).getReg();
664  auto *CstMI = MRI.getVRegDef(AmtReg);
665  assert(CstMI && "expected to find a vreg def");
666  if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
667  return true;
668  // Check the shift amount is in range for an immediate form.
669  unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
670  if (Amount > 31)
671  return true; // This will have to remain a register variant.
672  assert(MRI.getType(AmtReg).getSizeInBits() == 32);
673  MIRBuilder.setInstr(MI);
674  auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
675  MI.getOperand(2).setReg(ExtCst.getReg(0));
676  return true;
677 }
678 
679 bool AArch64LegalizerInfo::legalizeLoadStore(
680  MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
681  GISelChangeObserver &Observer) const {
682  assert(MI.getOpcode() == TargetOpcode::G_STORE ||
683  MI.getOpcode() == TargetOpcode::G_LOAD);
684  // Here we just try to handle vector loads/stores where our value type might
685  // have pointer elements, which the SelectionDAG importer can't handle. To
686  // allow the existing patterns for s64 to fire for p0, we just try to bitcast
687  // the value to use s64 types.
688 
689  // Custom legalization requires the instruction, if not deleted, must be fully
690  // legalized. In order to allow further legalization of the inst, we create
691  // a new instruction and erase the existing one.
692 
693  Register ValReg = MI.getOperand(0).getReg();
694  const LLT ValTy = MRI.getType(ValReg);
695 
696  if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
697  ValTy.getElementType().getAddressSpace() != 0) {
698  LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
699  return false;
700  }
701 
702  MIRBuilder.setInstr(MI);
703  unsigned PtrSize = ValTy.getElementType().getSizeInBits();
704  const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
705  auto &MMO = **MI.memoperands_begin();
706  if (MI.getOpcode() == TargetOpcode::G_STORE) {
707  auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
708  MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
709  } else {
710  Register NewReg = MRI.createGenericVirtualRegister(NewTy);
711  auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
712  MIRBuilder.buildBitcast({ValReg}, {NewLoad});
713  }
714  MI.eraseFromParent();
715  return true;
716 }
717 
718 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
719  MachineRegisterInfo &MRI,
720  MachineIRBuilder &MIRBuilder) const {
721  MIRBuilder.setInstr(MI);
722  MachineFunction &MF = MIRBuilder.getMF();
723  unsigned Align = MI.getOperand(2).getImm();
724  Register Dst = MI.getOperand(0).getReg();
725  Register ListPtr = MI.getOperand(1).getReg();
726 
727  LLT PtrTy = MRI.getType(ListPtr);
728  LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
729 
730  const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
732  MIRBuilder.buildLoad(
733  List, ListPtr,
735  PtrSize, /* Align = */ PtrSize));
736 
737  Register DstPtr;
738  if (Align > PtrSize) {
739  // Realign the list to the actual required alignment.
740  auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
741 
742  auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0));
743 
744  DstPtr = MRI.createGenericVirtualRegister(PtrTy);
745  MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
746  } else
747  DstPtr = List;
748 
749  uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
750  MIRBuilder.buildLoad(
751  Dst, DstPtr,
753  ValSize, std::max(Align, PtrSize)));
754 
755  auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
756 
757  auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0));
758 
759  MIRBuilder.buildStore(
760  NewList, ListPtr,
762  PtrSize, /* Align = */ PtrSize));
763 
764  MI.eraseFromParent();
765  return true;
766 }
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:551
static LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified types.
bool isVector() const
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override
Return true if MI is either legal or has been legalized and false if not legal.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
This file declares the targeting of the Machinelegalizer class for AArch64.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
MachineFunction & getMF()
Getter for the function we currently build.
void setReg(Register Reg)
Change the register this operand corresponds to.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
Abstract class that contains various methods for clients to notify about changes. ...
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Helper class to build MachineInstr.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI)
Create a libcall to memcpy et al.
unsigned getAddressSpace() const
MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_GEP Op0, Op1.
Some kind of error has occurred and we could not legalize this instruction.
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const override
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool verify(const TargetRegisterInfo &TRI) const
Check that information hold by this instance make sense for the given TRI.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
The memory access writes data.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:552
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
int64_t getImm() const
MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTR_MASK Op0, NumBits.
This file declares the MachineIRBuilder class.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool isPointer() const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
Representation of each machine instruction.
Definition: MachineInstr.h:64
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:133
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2...
ArrayRef< LLT > Types
const NodeList & List
Definition: RDFGraph.cpp:201
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
uint32_t Size
Definition: Profile.cpp:46
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getIntrinsicID() const
Returns the Intrinsic::ID for this instruction.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
const AArch64InstrInfo * getInstrInfo() const override
IRTranslator LLVM IR MI
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register getReg() const
getReg - Returns the register number.
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:544
Wrapper class representing virtual and physical registers.
Definition: Register.h:19