LLVM 18.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
17#include "llvm/ADT/STLExtras.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/Type.h"
33#include <initializer_list>
34
35#define DEBUG_TYPE "aarch64-legalinfo"
36
37using namespace llvm;
38using namespace LegalizeActions;
39using namespace LegalizeMutations;
40using namespace LegalityPredicates;
41using namespace MIPatternMatch;
42
44 : ST(&ST) {
45 using namespace TargetOpcode;
46 const LLT p0 = LLT::pointer(0, 64);
47 const LLT s8 = LLT::scalar(8);
48 const LLT s16 = LLT::scalar(16);
49 const LLT s32 = LLT::scalar(32);
50 const LLT s64 = LLT::scalar(64);
51 const LLT s128 = LLT::scalar(128);
52 const LLT v16s8 = LLT::fixed_vector(16, 8);
53 const LLT v8s8 = LLT::fixed_vector(8, 8);
54 const LLT v4s8 = LLT::fixed_vector(4, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
64 v16s8, v8s16, v4s32,
65 v2s64, v2p0,
66 /* End 128bit types */
67 /* Begin 64bit types */
68 v8s8, v4s16, v2s32};
69 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
70 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
71 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
72
73 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
74
75 // FIXME: support subtargets which have neon/fp-armv8 disabled.
76 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
78 return;
79 }
80
81 // Some instructions only support s16 if the subtarget has full 16-bit FP
82 // support.
83 const bool HasFP16 = ST.hasFullFP16();
84 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
85
86 const bool HasCSSC = ST.hasCSSC();
87 const bool HasRCPC3 = ST.hasRCPC3();
88
90 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
91 .legalFor({p0, s8, s16, s32, s64})
92 .legalFor(PackedVectorAllTypeList)
94 .clampScalar(0, s8, s64)
96 [=](const LegalityQuery &Query) {
97 return Query.Types[0].isVector() &&
98 (Query.Types[0].getElementType() != s64 ||
99 Query.Types[0].getNumElements() != 2);
100 },
101 [=](const LegalityQuery &Query) {
102 LLT EltTy = Query.Types[0].getElementType();
103 if (EltTy == s64)
104 return std::make_pair(0, LLT::fixed_vector(2, 64));
105 return std::make_pair(0, EltTy);
106 });
107
109 .legalFor({p0, s16, s32, s64})
110 .legalFor(PackedVectorAllTypeList)
112 .clampScalar(0, s16, s64)
113 // Maximum: sN * k = 128
114 .clampMaxNumElements(0, s8, 16)
115 .clampMaxNumElements(0, s16, 8)
116 .clampMaxNumElements(0, s32, 4)
117 .clampMaxNumElements(0, s64, 2)
118 .clampMaxNumElements(0, p0, 2);
119
121 .legalFor({s32, s64, v4s32, v2s32, v2s64})
122 .widenScalarToNextPow2(0)
123 .clampScalar(0, s32, s64);
124
125 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
126 .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
129 .clampMaxNumElements(0, s8, 16)
130 .clampMaxNumElements(0, s16, 8)
131 .clampNumElements(0, v2s32, v4s32)
132 .clampNumElements(0, v2s64, v2s64)
134 [=](const LegalityQuery &Query) {
135 return Query.Types[0].getNumElements() <= 2;
136 },
137 0, s32)
138 .minScalarOrEltIf(
139 [=](const LegalityQuery &Query) {
140 return Query.Types[0].getNumElements() <= 4;
141 },
142 0, s16)
143 .minScalarOrEltIf(
144 [=](const LegalityQuery &Query) {
145 return Query.Types[0].getNumElements() <= 16;
146 },
147 0, s8)
149
150 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
151 .customIf([=](const LegalityQuery &Query) {
152 const auto &SrcTy = Query.Types[0];
153 const auto &AmtTy = Query.Types[1];
154 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
155 AmtTy.getSizeInBits() == 32;
156 })
157 .legalFor({
158 {s32, s32},
159 {s32, s64},
160 {s64, s64},
161 {v8s8, v8s8},
162 {v16s8, v16s8},
163 {v4s16, v4s16},
164 {v8s16, v8s16},
165 {v2s32, v2s32},
166 {v4s32, v4s32},
167 {v2s64, v2s64},
168 })
169 .widenScalarToNextPow2(0)
170 .clampScalar(1, s32, s64)
171 .clampScalar(0, s32, s64)
172 .clampNumElements(0, v2s32, v4s32)
173 .clampNumElements(0, v2s64, v2s64)
175 .minScalarSameAs(1, 0);
176
178 .legalFor({{p0, s64}, {v2p0, v2s64}})
179 .clampScalar(1, s64, s64);
180
181 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
182
183 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
184 .legalFor({s32, s64})
185 .libcallFor({s128})
186 .clampScalar(0, s32, s64)
188 .scalarize(0);
189
190 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
191 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
193 .clampScalarOrElt(0, s32, s64)
194 .clampNumElements(0, v2s32, v4s32)
195 .clampNumElements(0, v2s64, v2s64)
196 .moreElementsToNextPow2(0);
197
198
199 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
200 .widenScalarToNextPow2(0, /*Min = */ 32)
201 .clampScalar(0, s32, s64)
202 .lower();
203
204 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
205 .legalFor({s64, v8s16, v16s8, v4s32})
206 .lower();
207
208 auto &MinMaxActions = getActionDefinitionsBuilder(
209 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
210 if (HasCSSC)
211 MinMaxActions
212 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
213 // Making clamping conditional on CSSC extension as without legal types we
214 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
215 // if we detect a type smaller than 32-bit.
216 .minScalar(0, s32);
217 else
218 MinMaxActions
219 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
220 MinMaxActions
221 .clampNumElements(0, v8s8, v16s8)
222 .clampNumElements(0, v4s16, v8s16)
223 .clampNumElements(0, v2s32, v4s32)
224 // FIXME: This sholdn't be needed as v2s64 types are going to
225 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
226 .clampNumElements(0, v2s64, v2s64)
227 .lower();
228
230 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
231 .legalFor({{s32, s32}, {s64, s32}})
232 .clampScalar(0, s32, s64)
233 .clampScalar(1, s32, s64)
235
236 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
237 G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
238 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
239 G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
240 G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
241 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
242 .legalIf([=](const LegalityQuery &Query) {
243 const auto &Ty = Query.Types[0];
244 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
245 })
246 .libcallFor({s128})
247 .minScalarOrElt(0, MinFPScalar)
248 .clampNumElements(0, v4s16, v8s16)
249 .clampNumElements(0, v2s32, v4s32)
250 .clampNumElements(0, v2s64, v2s64)
252
254 .libcallFor({s32, s64})
255 .minScalar(0, s32)
256 .scalarize(0);
257
258 getActionDefinitionsBuilder(G_INTRINSIC_LRINT)
259 // If we don't have full FP16 support, then scalarize the elements of
260 // vectors containing fp16 types.
262 [=, &ST](const LegalityQuery &Query) {
263 const auto &Ty = Query.Types[0];
264 return Ty.isVector() && Ty.getElementType() == s16 &&
265 !ST.hasFullFP16();
266 },
267 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
268 // If we don't have full FP16 support, then widen s16 to s32 if we
269 // encounter it.
270 .widenScalarIf(
271 [=, &ST](const LegalityQuery &Query) {
272 return Query.Types[0] == s16 && !ST.hasFullFP16();
273 },
274 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
275 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
276
278 {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10,
279 G_FEXP, G_FEXP2, G_FEXP10})
280 // We need a call for these, so we always need to scalarize.
281 .scalarize(0)
282 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
283 .minScalar(0, s32)
284 .libcallFor({s32, s64});
285
287 .legalIf(all(typeInSet(0, {s32, s64, p0}),
288 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
290 .clampScalar(0, s32, s64)
292 .minScalar(1, s8)
293 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
294 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
295
297 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
298 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
300 .clampScalar(1, s32, s128)
302 .minScalar(0, s16)
303 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
304 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
305 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
306
307
308 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
309 auto &Actions = getActionDefinitionsBuilder(Op);
310
311 if (Op == G_SEXTLOAD)
313
314 // Atomics have zero extending behavior.
315 Actions
316 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
317 {s32, p0, s16, 8},
318 {s32, p0, s32, 8},
319 {s64, p0, s8, 2},
320 {s64, p0, s16, 2},
321 {s64, p0, s32, 4},
322 {s64, p0, s64, 8},
323 {p0, p0, s64, 8},
324 {v2s32, p0, s64, 8}})
325 .widenScalarToNextPow2(0)
326 .clampScalar(0, s32, s64)
327 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
328 // how to do that yet.
329 .unsupportedIfMemSizeNotPow2()
330 // Lower anything left over into G_*EXT and G_LOAD
331 .lower();
332 }
333
334 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
335 const LLT &ValTy = Query.Types[0];
336 if (!ValTy.isVector())
337 return false;
338 const LLT EltTy = ValTy.getElementType();
339 return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
340 };
341
343 .customIf([=](const LegalityQuery &Query) {
344 return HasRCPC3 && Query.Types[0] == s128 &&
345 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
346 })
347 .customIf([=](const LegalityQuery &Query) {
348 return Query.Types[0] == s128 &&
349 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
350 })
351 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
352 {s16, p0, s16, 8},
353 {s32, p0, s32, 8},
354 {s64, p0, s64, 8},
355 {p0, p0, s64, 8},
356 {s128, p0, s128, 8},
357 {v8s8, p0, s64, 8},
358 {v16s8, p0, s128, 8},
359 {v4s16, p0, s64, 8},
360 {v8s16, p0, s128, 8},
361 {v2s32, p0, s64, 8},
362 {v4s32, p0, s128, 8},
363 {v2s64, p0, s128, 8}})
364 // These extends are also legal
365 .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
366 .widenScalarToNextPow2(0, /* MinSize = */ 8)
368 .clampScalar(0, s8, s64)
370 [=](const LegalityQuery &Query) {
371 // Clamp extending load results to 32-bits.
372 return Query.Types[0].isScalar() &&
373 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
374 Query.Types[0].getSizeInBits() > 32;
375 },
376 changeTo(0, s32))
377 .clampMaxNumElements(0, s8, 16)
378 .clampMaxNumElements(0, s16, 8)
379 .clampMaxNumElements(0, s32, 4)
380 .clampMaxNumElements(0, s64, 2)
381 .clampMaxNumElements(0, p0, 2)
382 .customIf(IsPtrVecPred)
383 .scalarizeIf(typeIs(0, v2s16), 0);
384
386 .customIf([=](const LegalityQuery &Query) {
387 return HasRCPC3 && Query.Types[0] == s128 &&
388 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
389 })
390 .customIf([=](const LegalityQuery &Query) {
391 return Query.Types[0] == s128 &&
392 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
393 })
394 .legalForTypesWithMemDesc(
395 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
396 {s32, p0, s8, 8}, // truncstorei8 from s32
397 {s64, p0, s8, 8}, // truncstorei8 from s64
398 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
399 {s64, p0, s16, 8}, // truncstorei16 from s64
400 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
401 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
402 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
403 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
404 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
405 .clampScalar(0, s8, s64)
406 .lowerIf([=](const LegalityQuery &Query) {
407 return Query.Types[0].isScalar() &&
408 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
409 })
410 // Maximum: sN * k = 128
411 .clampMaxNumElements(0, s8, 16)
412 .clampMaxNumElements(0, s16, 8)
413 .clampMaxNumElements(0, s32, 4)
414 .clampMaxNumElements(0, s64, 2)
415 .clampMaxNumElements(0, p0, 2)
417 .customIf(IsPtrVecPred)
418 .scalarizeIf(typeIs(0, v2s16), 0);
419
420 getActionDefinitionsBuilder(G_INDEXED_STORE)
421 // Idx 0 == Ptr, Idx 1 == Val
422 // TODO: we can implement legalizations but as of now these are
423 // generated in a very specific way.
425 {p0, s8, s8, 8},
426 {p0, s16, s16, 8},
427 {p0, s32, s8, 8},
428 {p0, s32, s16, 8},
429 {p0, s32, s32, 8},
430 {p0, s64, s64, 8},
431 {p0, p0, p0, 8},
432 {p0, v8s8, v8s8, 8},
433 {p0, v16s8, v16s8, 8},
434 {p0, v4s16, v4s16, 8},
435 {p0, v8s16, v8s16, 8},
436 {p0, v2s32, v2s32, 8},
437 {p0, v4s32, v4s32, 8},
438 {p0, v2s64, v2s64, 8},
439 {p0, v2p0, v2p0, 8},
440 {p0, s128, s128, 8},
441 })
442 .unsupported();
443
444 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
445 LLT LdTy = Query.Types[0];
446 LLT PtrTy = Query.Types[1];
447 if (llvm::find(PackedVectorAllTypesVec, LdTy) ==
448 PackedVectorAllTypesVec.end() &&
449 llvm::find(ScalarAndPtrTypesVec, LdTy) == ScalarAndPtrTypesVec.end() &&
450 LdTy != s128)
451 return false;
452 if (PtrTy != p0)
453 return false;
454 return true;
455 };
456 getActionDefinitionsBuilder(G_INDEXED_LOAD)
459 .legalIf(IndexedLoadBasicPred)
460 .unsupported();
461 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
462 .unsupportedIf(
464 .legalIf(all(typeInSet(0, {s16, s32, s64}),
465 LegalityPredicate([=](const LegalityQuery &Q) {
466 LLT LdTy = Q.Types[0];
467 LLT PtrTy = Q.Types[1];
468 LLT MemTy = Q.MMODescrs[0].MemoryTy;
469 if (PtrTy != p0)
470 return false;
471 if (LdTy == s16)
472 return MemTy == s8;
473 if (LdTy == s32)
474 return MemTy == s8 || MemTy == s16;
475 if (LdTy == s64)
476 return MemTy == s8 || MemTy == s16 || MemTy == s32;
477 return false;
478 })))
479 .unsupported();
480
481 // Constants
483 .legalFor({p0, s8, s16, s32, s64})
484 .widenScalarToNextPow2(0)
485 .clampScalar(0, s8, s64);
486 getActionDefinitionsBuilder(G_FCONSTANT)
487 .legalIf([=](const LegalityQuery &Query) {
488 const auto &Ty = Query.Types[0];
489 if (HasFP16 && Ty == s16)
490 return true;
491 return Ty == s32 || Ty == s64 || Ty == s128;
492 })
493 .clampScalar(0, MinFPScalar, s128);
494
496 .legalFor({{s32, s32},
497 {s32, s64},
498 {s32, p0},
499 {v4s32, v4s32},
500 {v2s32, v2s32},
501 {v2s64, v2s64},
502 {v2s64, v2p0},
503 {v4s16, v4s16},
504 {v8s16, v8s16},
505 {v8s8, v8s8},
506 {v16s8, v16s8}})
508 .clampScalar(1, s32, s64)
509 .clampScalar(0, s32, s32)
510 .minScalarEltSameAsIf(
511 [=](const LegalityQuery &Query) {
512 const LLT &Ty = Query.Types[0];
513 const LLT &SrcTy = Query.Types[1];
514 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
515 Ty.getElementType() != SrcTy.getElementType();
516 },
517 0, 1)
518 .minScalarOrEltIf(
519 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
520 1, s32)
521 .minScalarOrEltIf(
522 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
523 s64)
524 .clampNumElements(0, v2s32, v4s32);
525
527 // If we don't have full FP16 support, then scalarize the elements of
528 // vectors containing fp16 types.
530 [=](const LegalityQuery &Query) {
531 const auto &Ty = Query.Types[0];
532 return Ty.isVector() && Ty.getElementType() == s16 && !HasFP16;
533 },
534 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
535 // If we don't have full FP16 support, then widen s16 to s32 if we
536 // encounter it.
537 .widenScalarIf(
538 [=](const LegalityQuery &Query) {
539 return Query.Types[0] == s16 && !HasFP16;
540 },
541 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
542 .legalFor({{s16, s16},
543 {s32, s32},
544 {s32, s64},
545 {v4s32, v4s32},
546 {v2s32, v2s32},
547 {v2s64, v2s64},
548 {v4s16, v4s16},
549 {v8s16, v8s16}})
551 .clampScalar(1, s32, s64)
552 .clampScalar(0, s32, s32)
553 .minScalarEltSameAsIf(
554 [=](const LegalityQuery &Query) {
555 const LLT &Ty = Query.Types[0];
556 const LLT &SrcTy = Query.Types[1];
557 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
558 Ty.getElementType() != SrcTy.getElementType();
559 },
560 0, 1)
561 .clampNumElements(0, v2s32, v4s32)
562 .clampMaxNumElements(1, s64, 2);
563
564 // Extensions
565 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
566 unsigned DstSize = Query.Types[0].getSizeInBits();
567
568 // Handle legal vectors using legalFor
569 if (Query.Types[0].isVector())
570 return false;
571
572 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
573 return false; // Extending to a scalar s128 needs narrowing.
574
575 const LLT &SrcTy = Query.Types[1];
576
577 // Make sure we fit in a register otherwise. Don't bother checking that
578 // the source type is below 128 bits. We shouldn't be allowing anything
579 // through which is wider than the destination in the first place.
580 unsigned SrcSize = SrcTy.getSizeInBits();
581 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
582 return false;
583
584 return true;
585 };
586 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
587 .legalIf(ExtLegalFunc)
588 .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}})
589 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
591 .clampMaxNumElements(1, s8, 8)
592 .clampMaxNumElements(1, s16, 4)
593 .clampMaxNumElements(1, s32, 2)
594 // Tries to convert a large EXTEND into two smaller EXTENDs
595 .lowerIf([=](const LegalityQuery &Query) {
596 return (Query.Types[0].getScalarSizeInBits() >
597 Query.Types[1].getScalarSizeInBits() * 2) &&
598 Query.Types[0].isVector() &&
599 (Query.Types[1].getScalarSizeInBits() == 8 ||
600 Query.Types[1].getScalarSizeInBits() == 16);
601 });
602
604 .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
606 .clampMaxNumElements(0, s8, 8)
607 .clampMaxNumElements(0, s16, 4)
608 .clampMaxNumElements(0, s32, 2)
609 .minScalarOrEltIf(
610 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
611 0, s8)
612 .lowerIf([=](const LegalityQuery &Query) {
613 LLT DstTy = Query.Types[0];
614 LLT SrcTy = Query.Types[1];
615 return DstTy.isVector() && (SrcTy.getSizeInBits() > 128 ||
616 (DstTy.getScalarSizeInBits() * 2 <
617 SrcTy.getScalarSizeInBits()));
618 })
619
620 .alwaysLegal();
621
622 getActionDefinitionsBuilder(G_SEXT_INREG)
623 .legalFor({s32, s64})
624 .legalFor(PackedVectorAllTypeList)
625 .maxScalar(0, s64)
626 .lower();
627
628 // FP conversions
630 .legalFor(
631 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
632 .clampNumElements(0, v4s16, v4s16)
633 .clampNumElements(0, v2s32, v2s32)
634 .scalarize(0);
635
637 .legalFor(
638 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
639 .clampNumElements(0, v4s32, v4s32)
640 .clampNumElements(0, v2s64, v2s64)
641 .scalarize(0);
642
643 // Conversions
644 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
645 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
646 .legalIf([=](const LegalityQuery &Query) {
647 return HasFP16 &&
648 (Query.Types[1] == s16 || Query.Types[1] == v4s16 ||
649 Query.Types[1] == v8s16) &&
650 (Query.Types[0] == s32 || Query.Types[0] == s64 ||
651 Query.Types[0] == v4s16 || Query.Types[0] == v8s16);
652 })
653 .widenScalarToNextPow2(0)
654 .clampScalar(0, s32, s64)
656 .clampScalarOrElt(1, MinFPScalar, s64)
659 [=](const LegalityQuery &Query) {
660 return Query.Types[0].getScalarSizeInBits() >
661 Query.Types[1].getScalarSizeInBits();
662 },
664 .widenScalarIf(
665 [=](const LegalityQuery &Query) {
666 return Query.Types[0].getScalarSizeInBits() <
667 Query.Types[1].getScalarSizeInBits();
668 },
670 .clampNumElements(0, v4s16, v8s16)
671 .clampNumElements(0, v2s32, v4s32)
672 .clampMaxNumElements(0, s64, 2);
673
674 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
675 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
676 .legalIf([=](const LegalityQuery &Query) {
677 return HasFP16 &&
678 (Query.Types[0] == s16 || Query.Types[0] == v4s16 ||
679 Query.Types[0] == v8s16) &&
680 (Query.Types[1] == s32 || Query.Types[1] == s64 ||
681 Query.Types[1] == v4s16 || Query.Types[1] == v8s16);
682 })
683 .widenScalarToNextPow2(1)
684 .clampScalar(1, s32, s64)
686 .clampScalarOrElt(0, MinFPScalar, s64)
689 [=](const LegalityQuery &Query) {
690 return Query.Types[0].getScalarSizeInBits() <
691 Query.Types[1].getScalarSizeInBits();
692 },
694 .widenScalarIf(
695 [=](const LegalityQuery &Query) {
696 return Query.Types[0].getScalarSizeInBits() >
697 Query.Types[1].getScalarSizeInBits();
698 },
700 .clampNumElements(0, v4s16, v8s16)
701 .clampNumElements(0, v2s32, v4s32)
702 .clampMaxNumElements(0, s64, 2);
703
704 // Control-flow
706 .legalFor({s32})
707 .clampScalar(0, s32, s32);
708 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
709
711 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
712 .widenScalarToNextPow2(0)
713 .clampScalar(0, s32, s64)
714 .clampScalar(1, s32, s32)
716 .lowerIf(isVector(0));
717
718 // Pointer-handling
719 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
720
721 if (TM.getCodeModel() == CodeModel::Small)
722 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
723 else
724 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
725
727 .legalFor({{s64, p0}, {v2s64, v2p0}})
728 .widenScalarToNextPow2(0, 64)
729 .clampScalar(0, s64, s64);
730
732 .unsupportedIf([&](const LegalityQuery &Query) {
733 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
734 })
735 .legalFor({{p0, s64}, {v2p0, v2s64}});
736
737 // Casts for 32 and 64-bit width type are just copies.
738 // Same for 128-bit width type, except they are on the FPR bank.
740 // FIXME: This is wrong since G_BITCAST is not allowed to change the
741 // number of bits but it's what the previous code described and fixing
742 // it breaks tests.
743 .legalForCartesianProduct({s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
744 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
745 v2p0});
746
747 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
748
749 // va_list must be a pointer, but most sized types are pretty easy to handle
750 // as the destination.
752 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
753 .clampScalar(0, s8, s64)
754 .widenScalarToNextPow2(0, /*Min*/ 8);
755
756 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
757 .lowerIf(
758 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
759
760 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
761 .customIf([](const LegalityQuery &Query) {
762 return Query.Types[0].getSizeInBits() == 128;
763 })
764 .clampScalar(0, s32, s64)
765 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
766
768 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
769 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
770 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
771 .clampScalar(0, s32, s64)
772 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
773
774 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
775
776 // Merge/Unmerge
777 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
778 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
779 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
781 .widenScalarToNextPow2(LitTyIdx, 8)
782 .widenScalarToNextPow2(BigTyIdx, 32)
783 .clampScalar(LitTyIdx, s8, s64)
784 .clampScalar(BigTyIdx, s32, s128)
785 .legalIf([=](const LegalityQuery &Q) {
786 switch (Q.Types[BigTyIdx].getSizeInBits()) {
787 case 32:
788 case 64:
789 case 128:
790 break;
791 default:
792 return false;
793 }
794 switch (Q.Types[LitTyIdx].getSizeInBits()) {
795 case 8:
796 case 16:
797 case 32:
798 case 64:
799 return true;
800 default:
801 return false;
802 }
803 });
804 }
805
806 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
807 .unsupportedIf([=](const LegalityQuery &Query) {
808 const LLT &EltTy = Query.Types[1].getElementType();
809 return Query.Types[0] != EltTy;
810 })
811 .minScalar(2, s64)
812 .customIf([=](const LegalityQuery &Query) {
813 const LLT &VecTy = Query.Types[1];
814 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
815 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
816 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2p0;
817 })
818 .minScalarOrEltIf(
819 [=](const LegalityQuery &Query) {
820 // We want to promote to <M x s1> to <M x s64> if that wouldn't
821 // cause the total vec size to be > 128b.
822 return Query.Types[1].getNumElements() <= 2;
823 },
824 0, s64)
825 .minScalarOrEltIf(
826 [=](const LegalityQuery &Query) {
827 return Query.Types[1].getNumElements() <= 4;
828 },
829 0, s32)
830 .minScalarOrEltIf(
831 [=](const LegalityQuery &Query) {
832 return Query.Types[1].getNumElements() <= 8;
833 },
834 0, s16)
835 .minScalarOrEltIf(
836 [=](const LegalityQuery &Query) {
837 return Query.Types[1].getNumElements() <= 16;
838 },
839 0, s8)
840 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
841 .clampMaxNumElements(1, s64, 2)
842 .clampMaxNumElements(1, s32, 4)
843 .clampMaxNumElements(1, s16, 8)
844 .clampMaxNumElements(1, p0, 2);
845
846 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
847 .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64}))
849
850 getActionDefinitionsBuilder(G_BUILD_VECTOR)
851 .legalFor({{v8s8, s8},
852 {v16s8, s8},
853 {v4s16, s16},
854 {v8s16, s16},
855 {v2s32, s32},
856 {v4s32, s32},
857 {v2p0, p0},
858 {v2s64, s64}})
859 .clampNumElements(0, v4s32, v4s32)
860 .clampNumElements(0, v2s64, v2s64)
861 .minScalarOrElt(0, s8)
863 .minScalarSameAs(1, 0);
864
865 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
866
869 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
870 .scalarize(1)
871 .widenScalarToNextPow2(1, /*Min=*/32)
872 .clampScalar(1, s32, s64)
873 .scalarSameSizeAs(0, 1);
874 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
875
876 // TODO: Custom lowering for v2s32, v4s32, v2s64.
877 getActionDefinitionsBuilder(G_BITREVERSE)
878 .legalFor({s32, s64, v8s8, v16s8})
879 .widenScalarToNextPow2(0, /*Min = */ 32)
880 .clampScalar(0, s32, s64);
881
882 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
883
885 .lowerIf(isVector(0))
886 .widenScalarToNextPow2(1, /*Min=*/32)
887 .clampScalar(1, s32, s64)
888 .scalarSameSizeAs(0, 1)
889 .legalIf([=](const LegalityQuery &Query) {
890 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
891 })
892 .customIf([=](const LegalityQuery &Query) {
893 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
894 });
895
896 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
897 .legalIf([=](const LegalityQuery &Query) {
898 const LLT &DstTy = Query.Types[0];
899 const LLT &SrcTy = Query.Types[1];
900 // For now just support the TBL2 variant which needs the source vectors
901 // to be the same size as the dest.
902 if (DstTy != SrcTy)
903 return false;
904 return llvm::is_contained(
905 {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
906 })
907 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
908 // just want those lowered into G_BUILD_VECTOR
909 .lowerIf([=](const LegalityQuery &Query) {
910 return !Query.Types[1].isVector();
911 })
912 .moreElementsIf(
913 [](const LegalityQuery &Query) {
914 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
915 Query.Types[0].getNumElements() >
916 Query.Types[1].getNumElements();
917 },
918 changeTo(1, 0))
920 .clampNumElements(0, v4s32, v4s32)
921 .clampNumElements(0, v2s64, v2s64)
922 .moreElementsIf(
923 [](const LegalityQuery &Query) {
924 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
925 Query.Types[0].getNumElements() <
926 Query.Types[1].getNumElements();
927 },
928 changeTo(0, 1));
929
930 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
931 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
932
933 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
934
935 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
936
937 getActionDefinitionsBuilder({G_DYN_STACKALLOC,
938 G_STACKSAVE,
939 G_STACKRESTORE}).lower();
940
941 if (ST.hasMOPS()) {
942 // G_BZERO is not supported. Currently it is only emitted by
943 // PreLegalizerCombiner for G_MEMSET with zero constant.
945
947 .legalForCartesianProduct({p0}, {s64}, {s64})
948 .customForCartesianProduct({p0}, {s8}, {s64})
949 .immIdx(0); // Inform verifier imm idx 0 is handled.
950
951 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
952 .legalForCartesianProduct({p0}, {p0}, {s64})
953 .immIdx(0); // Inform verifier imm idx 0 is handled.
954
955 // G_MEMCPY_INLINE does not have a tailcall immediate
956 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
957 .legalForCartesianProduct({p0}, {p0}, {s64});
958
959 } else {
960 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
961 .libcall();
962 }
963
964 // FIXME: Legal vector types are only legal with NEON.
965 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
966 if (HasCSSC)
967 ABSActions
968 .legalFor({s32, s64});
969 ABSActions
970 .legalFor(PackedVectorAllTypeList)
971 .lowerIf(isScalar(0));
972
973 // For fadd reductions we have pairwise operations available. We treat the
974 // usual legal types as legal and handle the lowering to pairwise instructions
975 // later.
976 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
977 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
978 .legalIf([=](const LegalityQuery &Query) {
979 const auto &Ty = Query.Types[1];
980 return (Ty == v4s16 || Ty == v8s16) && HasFP16;
981 })
982 .minScalarOrElt(0, MinFPScalar)
983 .clampMaxNumElements(1, s64, 2)
984 .clampMaxNumElements(1, s32, 4)
985 .clampMaxNumElements(1, s16, 8)
986 .lower();
987
988 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
989 .legalFor({{s8, v16s8},
990 {s8, v8s8},
991 {s16, v8s16},
992 {s16, v4s16},
993 {s32, v4s32},
994 {s32, v2s32},
995 {s64, v2s64}})
996 .clampMaxNumElements(1, s64, 2)
997 .clampMaxNumElements(1, s32, 4)
998 .clampMaxNumElements(1, s16, 8)
999 .clampMaxNumElements(1, s8, 16)
1000 .lower();
1001
1002 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1003 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1004 .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1005 .legalIf([=](const LegalityQuery &Query) {
1006 const auto &Ty = Query.Types[1];
1007 return Query.Types[0] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1008 })
1009 .minScalarOrElt(0, MinFPScalar)
1010 .clampMaxNumElements(1, s64, 2)
1011 .clampMaxNumElements(1, s32, 4)
1012 .clampMaxNumElements(1, s16, 8)
1013 .lower();
1014
1015 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1016 .clampMaxNumElements(1, s32, 2)
1017 .clampMaxNumElements(1, s16, 4)
1018 .clampMaxNumElements(1, s8, 8)
1019 .scalarize(1)
1020 .lower();
1021
1023 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1024 .legalFor({{s8, v8s8},
1025 {s8, v16s8},
1026 {s16, v4s16},
1027 {s16, v8s16},
1028 {s32, v2s32},
1029 {s32, v4s32}})
1030 .clampMaxNumElements(1, s64, 2)
1031 .clampMaxNumElements(1, s32, 4)
1032 .clampMaxNumElements(1, s16, 8)
1033 .clampMaxNumElements(1, s8, 16)
1034 .scalarize(1)
1035 .lower();
1036
1038 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1039 // Try to break down into smaller vectors as long as they're at least 64
1040 // bits. This lets us use vector operations for some parts of the
1041 // reduction.
1042 .fewerElementsIf(
1043 [=](const LegalityQuery &Q) {
1044 LLT SrcTy = Q.Types[1];
1045 if (SrcTy.isScalar())
1046 return false;
1047 if (!isPowerOf2_32(SrcTy.getNumElements()))
1048 return false;
1049 // We can usually perform 64b vector operations.
1050 return SrcTy.getSizeInBits() > 64;
1051 },
1052 [=](const LegalityQuery &Q) {
1053 LLT SrcTy = Q.Types[1];
1054 return std::make_pair(1, SrcTy.divide(2));
1055 })
1056 .scalarize(1)
1057 .lower();
1058
1059 getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
1060 .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
1061
1062 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
1063 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
1064 .lower();
1065
1067 .legalFor({{s32, s64}, {s64, s64}})
1068 .customIf([=](const LegalityQuery &Q) {
1069 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
1070 })
1071 .lower();
1073
1074 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
1075 .customFor({{s32, s32}, {s64, s64}});
1076
1077 auto always = [=](const LegalityQuery &Q) { return true; };
1078 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
1079 if (HasCSSC)
1080 CTPOPActions
1081 .legalFor({{s32, s32},
1082 {s64, s64},
1083 {v8s8, v8s8},
1084 {v16s8, v16s8}})
1085 .customFor({{s128, s128},
1086 {v2s64, v2s64},
1087 {v2s32, v2s32},
1088 {v4s32, v4s32},
1089 {v4s16, v4s16},
1090 {v8s16, v8s16}});
1091 else
1092 CTPOPActions
1093 .legalFor({{v8s8, v8s8},
1094 {v16s8, v16s8}})
1095 .customFor({{s32, s32},
1096 {s64, s64},
1097 {s128, s128},
1098 {v2s64, v2s64},
1099 {v2s32, v2s32},
1100 {v4s32, v4s32},
1101 {v4s16, v4s16},
1102 {v8s16, v8s16}});
1103 CTPOPActions
1104 .clampScalar(0, s32, s128)
1105 .widenScalarToNextPow2(0)
1106 .minScalarEltSameAsIf(always, 1, 0)
1107 .maxScalarEltSameAsIf(always, 1, 0);
1108
1109 // TODO: Vector types.
1110 getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
1111
1112 // TODO: Libcall support for s128.
1113 // TODO: s16 should be legal with full FP16 support.
1114 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
1115 .legalFor({{s64, s32}, {s64, s64}});
1116
1117 // TODO: Custom legalization for vector types.
1118 // TODO: Custom legalization for mismatched types.
1119 // TODO: s16 support.
1120 getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
1121
1123
1124 // Access to floating-point environment.
1125 getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1126 .libcall();
1127
1128 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1129
1131 verify(*ST.getInstrInfo());
1132}
1133
1135 MachineInstr &MI) const {
1136 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1137 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1138 GISelChangeObserver &Observer = Helper.Observer;
1139 switch (MI.getOpcode()) {
1140 default:
1141 // No idea what to do.
1142 return false;
1143 case TargetOpcode::G_VAARG:
1144 return legalizeVaArg(MI, MRI, MIRBuilder);
1145 case TargetOpcode::G_LOAD:
1146 case TargetOpcode::G_STORE:
1147 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1148 case TargetOpcode::G_SHL:
1149 case TargetOpcode::G_ASHR:
1150 case TargetOpcode::G_LSHR:
1151 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1152 case TargetOpcode::G_GLOBAL_VALUE:
1153 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1154 case TargetOpcode::G_SBFX:
1155 case TargetOpcode::G_UBFX:
1156 return legalizeBitfieldExtract(MI, MRI, Helper);
1157 case TargetOpcode::G_FSHL:
1158 case TargetOpcode::G_FSHR:
1159 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1160 case TargetOpcode::G_ROTR:
1161 return legalizeRotate(MI, MRI, Helper);
1162 case TargetOpcode::G_CTPOP:
1163 return legalizeCTPOP(MI, MRI, Helper);
1164 case TargetOpcode::G_ATOMIC_CMPXCHG:
1165 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1166 case TargetOpcode::G_CTTZ:
1167 return legalizeCTTZ(MI, Helper);
1168 case TargetOpcode::G_BZERO:
1169 case TargetOpcode::G_MEMCPY:
1170 case TargetOpcode::G_MEMMOVE:
1171 case TargetOpcode::G_MEMSET:
1172 return legalizeMemOps(MI, Helper);
1173 case TargetOpcode::G_FCOPYSIGN:
1174 return legalizeFCopySign(MI, Helper);
1175 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1176 return legalizeExtractVectorElt(MI, MRI, Helper);
1177 }
1178
1179 llvm_unreachable("expected switch to return");
1180}
1181
1182bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1184 MachineIRBuilder &MIRBuilder,
1185 GISelChangeObserver &Observer,
1186 LegalizerHelper &Helper) const {
1187 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1188 MI.getOpcode() == TargetOpcode::G_FSHR);
1189
1190 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1191 // lowering
1192 Register ShiftNo = MI.getOperand(3).getReg();
1193 LLT ShiftTy = MRI.getType(ShiftNo);
1194 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1195
1196 // Adjust shift amount according to Opcode (FSHL/FSHR)
1197 // Convert FSHL to FSHR
1198 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1199 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1200
1201 // Lower non-constant shifts and leave zero shifts to the optimizer.
1202 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1203 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1205
1206 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1207
1208 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1209
1210 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1211 // in the range of 0 <-> BitWidth, it is legal
1212 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1213 VRegAndVal->Value.ult(BitWidth))
1214 return true;
1215
1216 // Cast the ShiftNumber to a 64-bit type
1217 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1218
1219 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1220 Observer.changingInstr(MI);
1221 MI.getOperand(3).setReg(Cast64.getReg(0));
1222 Observer.changedInstr(MI);
1223 }
1224 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1225 // instruction
1226 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1227 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1228 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1229 Cast64.getReg(0)});
1230 MI.eraseFromParent();
1231 }
1232 return true;
1233}
1234
1235bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1237 LegalizerHelper &Helper) const {
1238 // To allow for imported patterns to match, we ensure that the rotate amount
1239 // is 64b with an extension.
1240 Register AmtReg = MI.getOperand(2).getReg();
1241 LLT AmtTy = MRI.getType(AmtReg);
1242 (void)AmtTy;
1243 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1244 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1245 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1246 Helper.Observer.changingInstr(MI);
1247 MI.getOperand(2).setReg(NewAmt.getReg(0));
1248 Helper.Observer.changedInstr(MI);
1249 return true;
1250}
1251
1252bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1254 GISelChangeObserver &Observer) const {
1255 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1256 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1257 // G_ADD_LOW instructions.
1258 // By splitting this here, we can optimize accesses in the small code model by
1259 // folding in the G_ADD_LOW into the load/store offset.
1260 auto &GlobalOp = MI.getOperand(1);
1261 const auto* GV = GlobalOp.getGlobal();
1262 if (GV->isThreadLocal())
1263 return true; // Don't want to modify TLS vars.
1264
1265 auto &TM = ST->getTargetLowering()->getTargetMachine();
1266 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1267
1268 if (OpFlags & AArch64II::MO_GOT)
1269 return true;
1270
1271 auto Offset = GlobalOp.getOffset();
1272 Register DstReg = MI.getOperand(0).getReg();
1273 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1274 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1275 // Set the regclass on the dest reg too.
1276 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1277
1278 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1279 // by creating a MOVK that sets bits 48-63 of the register to (global address
1280 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1281 // prevent an incorrect tag being generated during relocation when the
1282 // global appears before the code section. Without the offset, a global at
1283 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1284 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1285 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1286 // instead of `0xf`.
1287 // This assumes that we're in the small code model so we can assume a binary
1288 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1289 // binary must also be loaded into address range [0, 2^48). Both of these
1290 // properties need to be ensured at runtime when using tagged addresses.
1291 if (OpFlags & AArch64II::MO_TAGGED) {
1292 assert(!Offset &&
1293 "Should not have folded in an offset for a tagged global!");
1294 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1295 .addGlobalAddress(GV, 0x100000000,
1297 .addImm(48);
1298 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1299 }
1300
1301 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1302 .addGlobalAddress(GV, Offset,
1304 MI.eraseFromParent();
1305 return true;
1306}
1307
1309 MachineInstr &MI) const {
1310 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1311 switch (IntrinsicID) {
1312 case Intrinsic::vacopy: {
1313 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1314 unsigned VaListSize =
1315 (ST->isTargetDarwin() || ST->isTargetWindows())
1316 ? PtrSize
1317 : ST->isTargetILP32() ? 20 : 32;
1318
1319 MachineFunction &MF = *MI.getMF();
1321 LLT::scalar(VaListSize * 8));
1322 MachineIRBuilder MIB(MI);
1323 MIB.buildLoad(Val, MI.getOperand(2),
1326 VaListSize, Align(PtrSize)));
1327 MIB.buildStore(Val, MI.getOperand(1),
1330 VaListSize, Align(PtrSize)));
1331 MI.eraseFromParent();
1332 return true;
1333 }
1334 case Intrinsic::get_dynamic_area_offset: {
1335 MachineIRBuilder &MIB = Helper.MIRBuilder;
1336 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1337 MI.eraseFromParent();
1338 return true;
1339 }
1340 case Intrinsic::aarch64_mops_memset_tag: {
1341 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1342 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1343 // the instruction).
1344 MachineIRBuilder MIB(MI);
1345 auto &Value = MI.getOperand(3);
1346 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1347 Value.setReg(ExtValueReg);
1348 return true;
1349 }
1350 case Intrinsic::prefetch: {
1351 MachineIRBuilder MIB(MI);
1352 auto &AddrVal = MI.getOperand(1);
1353
1354 int64_t IsWrite = MI.getOperand(2).getImm();
1355 int64_t Locality = MI.getOperand(3).getImm();
1356 int64_t IsData = MI.getOperand(4).getImm();
1357
1358 bool IsStream = Locality == 0;
1359 if (Locality != 0) {
1360 assert(Locality <= 3 && "Prefetch locality out-of-range");
1361 // The locality degree is the opposite of the cache speed.
1362 // Put the number the other way around.
1363 // The encoding starts at 0 for level 1
1364 Locality = 3 - Locality;
1365 }
1366
1367 unsigned PrfOp =
1368 (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
1369
1370 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1371 MI.eraseFromParent();
1372 return true;
1373 }
1374 case Intrinsic::aarch64_prefetch: {
1375 MachineIRBuilder MIB(MI);
1376 auto &AddrVal = MI.getOperand(1);
1377
1378 int64_t IsWrite = MI.getOperand(2).getImm();
1379 int64_t Target = MI.getOperand(3).getImm();
1380 int64_t IsStream = MI.getOperand(4).getImm();
1381 int64_t IsData = MI.getOperand(5).getImm();
1382
1383 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1384 (!IsData << 3) | // IsDataCache bit
1385 (Target << 1) | // Cache level bits
1386 (unsigned)IsStream; // Stream bit
1387
1388 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1389 MI.eraseFromParent();
1390 return true;
1391 }
1392 case Intrinsic::aarch64_neon_uaddv:
1393 case Intrinsic::aarch64_neon_saddv:
1394 case Intrinsic::aarch64_neon_umaxv:
1395 case Intrinsic::aarch64_neon_smaxv:
1396 case Intrinsic::aarch64_neon_uminv:
1397 case Intrinsic::aarch64_neon_sminv: {
1398 MachineIRBuilder MIB(MI);
1399 MachineRegisterInfo &MRI = *MIB.getMRI();
1400 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1401 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1402 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1403
1404 auto OldDst = MI.getOperand(0).getReg();
1405 auto OldDstTy = MRI.getType(OldDst);
1406 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1407 if (OldDstTy == NewDstTy)
1408 return true;
1409
1410 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1411
1412 Helper.Observer.changingInstr(MI);
1413 MI.getOperand(0).setReg(NewDst);
1414 Helper.Observer.changedInstr(MI);
1415
1416 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1417 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1418 OldDst, NewDst);
1419
1420 return true;
1421 }
1422 case Intrinsic::aarch64_neon_smax:
1423 case Intrinsic::aarch64_neon_smin:
1424 case Intrinsic::aarch64_neon_umax:
1425 case Intrinsic::aarch64_neon_umin:
1426 case Intrinsic::aarch64_neon_fmax:
1427 case Intrinsic::aarch64_neon_fmin: {
1428 MachineIRBuilder MIB(MI);
1429 if (IntrinsicID == Intrinsic::aarch64_neon_smax)
1430 MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1431 else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
1432 MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1433 else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
1434 MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1435 else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
1436 MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
1437 else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
1438 MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
1439 {MI.getOperand(2), MI.getOperand(3)});
1440 else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
1441 MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
1442 {MI.getOperand(2), MI.getOperand(3)});
1443 MI.eraseFromParent();
1444 return true;
1445 }
1446 case Intrinsic::experimental_vector_reverse:
1447 // TODO: Add support for vector_reverse
1448 return false;
1449 }
1450
1451 return true;
1452}
1453
1454bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1456 GISelChangeObserver &Observer) const {
1457 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1458 MI.getOpcode() == TargetOpcode::G_LSHR ||
1459 MI.getOpcode() == TargetOpcode::G_SHL);
1460 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1461 // imported patterns can select it later. Either way, it will be legal.
1462 Register AmtReg = MI.getOperand(2).getReg();
1463 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1464 if (!VRegAndVal)
1465 return true;
1466 // Check the shift amount is in range for an immediate form.
1467 int64_t Amount = VRegAndVal->Value.getSExtValue();
1468 if (Amount > 31)
1469 return true; // This will have to remain a register variant.
1470 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1471 Observer.changingInstr(MI);
1472 MI.getOperand(2).setReg(ExtCst.getReg(0));
1473 Observer.changedInstr(MI);
1474 return true;
1475}
1476
1479 Base = Root;
1480 Offset = 0;
1481
1482 Register NewBase;
1483 int64_t NewOffset;
1484 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1485 isShiftedInt<7, 3>(NewOffset)) {
1486 Base = NewBase;
1487 Offset = NewOffset;
1488 }
1489}
1490
1491// FIXME: This should be removed and replaced with the generic bitcast legalize
1492// action.
1493bool AArch64LegalizerInfo::legalizeLoadStore(
1495 GISelChangeObserver &Observer) const {
1496 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1497 MI.getOpcode() == TargetOpcode::G_LOAD);
1498 // Here we just try to handle vector loads/stores where our value type might
1499 // have pointer elements, which the SelectionDAG importer can't handle. To
1500 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1501 // the value to use s64 types.
1502
1503 // Custom legalization requires the instruction, if not deleted, must be fully
1504 // legalized. In order to allow further legalization of the inst, we create
1505 // a new instruction and erase the existing one.
1506
1507 Register ValReg = MI.getOperand(0).getReg();
1508 const LLT ValTy = MRI.getType(ValReg);
1509
1510 if (ValTy == LLT::scalar(128)) {
1511
1512 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1513 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1514 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1515 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1516 bool IsRcpC3 =
1517 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1518
1519 LLT s64 = LLT::scalar(64);
1520
1521 unsigned Opcode;
1522 if (IsRcpC3) {
1523 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1524 } else {
1525 // For LSE2, loads/stores should have been converted to monotonic and had
1526 // a fence inserted after them.
1527 assert(Ordering == AtomicOrdering::Monotonic ||
1528 Ordering == AtomicOrdering::Unordered);
1529 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1530
1531 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1532 }
1533
1535 if (IsLoad) {
1536 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1537 MIRBuilder.buildMergeLikeInstr(
1538 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1539 } else {
1540 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1541 NewI = MIRBuilder.buildInstr(
1542 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1543 }
1544
1545 if (IsRcpC3) {
1546 NewI.addUse(MI.getOperand(1).getReg());
1547 } else {
1548 Register Base;
1549 int Offset;
1550 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1551 NewI.addUse(Base);
1552 NewI.addImm(Offset / 8);
1553 }
1554
1555 NewI.cloneMemRefs(MI);
1557 *MRI.getTargetRegisterInfo(),
1558 *ST->getRegBankInfo());
1559 MI.eraseFromParent();
1560 return true;
1561 }
1562
1563 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
1564 ValTy.getElementType().getAddressSpace() != 0) {
1565 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1566 return false;
1567 }
1568
1569 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1570 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1571 auto &MMO = **MI.memoperands_begin();
1572 MMO.setType(NewTy);
1573
1574 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1575 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1576 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1577 } else {
1578 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1579 MIRBuilder.buildBitcast(ValReg, NewLoad);
1580 }
1581 MI.eraseFromParent();
1582 return true;
1583}
1584
1585bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1587 MachineIRBuilder &MIRBuilder) const {
1588 MachineFunction &MF = MIRBuilder.getMF();
1589 Align Alignment(MI.getOperand(2).getImm());
1590 Register Dst = MI.getOperand(0).getReg();
1591 Register ListPtr = MI.getOperand(1).getReg();
1592
1593 LLT PtrTy = MRI.getType(ListPtr);
1594 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1595
1596 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1597 const Align PtrAlign = Align(PtrSize);
1598 auto List = MIRBuilder.buildLoad(
1599 PtrTy, ListPtr,
1601 PtrTy, PtrAlign));
1602
1603 MachineInstrBuilder DstPtr;
1604 if (Alignment > PtrAlign) {
1605 // Realign the list to the actual required alignment.
1606 auto AlignMinus1 =
1607 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1608 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1609 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1610 } else
1611 DstPtr = List;
1612
1613 LLT ValTy = MRI.getType(Dst);
1614 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1615 MIRBuilder.buildLoad(
1616 Dst, DstPtr,
1618 ValTy, std::max(Alignment, PtrAlign)));
1619
1620 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1621
1622 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1623
1624 MIRBuilder.buildStore(NewList, ListPtr,
1627 PtrTy, PtrAlign));
1628
1629 MI.eraseFromParent();
1630 return true;
1631}
1632
1633bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1635 // Only legal if we can select immediate forms.
1636 // TODO: Lower this otherwise.
1637 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1638 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1639}
1640
1641bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1643 LegalizerHelper &Helper) const {
1644 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1645 // it can be more efficiently lowered to the following sequence that uses
1646 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1647 // registers are cheap.
1648 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1649 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1650 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1651 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1652 //
1653 // For 128 bit vector popcounts, we lower to the following sequence:
1654 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1655 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1656 // uaddlp.4s v0, v0 // v4s32, v2s64
1657 // uaddlp.2d v0, v0 // v2s64
1658 //
1659 // For 64 bit vector popcounts, we lower to the following sequence:
1660 // cnt.8b v0, v0 // v4s16, v2s32
1661 // uaddlp.4h v0, v0 // v4s16, v2s32
1662 // uaddlp.2s v0, v0 // v2s32
1663
1664 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1665 Register Dst = MI.getOperand(0).getReg();
1666 Register Val = MI.getOperand(1).getReg();
1667 LLT Ty = MRI.getType(Val);
1668 unsigned Size = Ty.getSizeInBits();
1669
1670 assert(Ty == MRI.getType(Dst) &&
1671 "Expected src and dst to have the same type!");
1672
1673 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1674 LLT s64 = LLT::scalar(64);
1675
1676 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1677 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1678 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1679 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1680
1681 MIRBuilder.buildZExt(Dst, Add);
1682 MI.eraseFromParent();
1683 return true;
1684 }
1685
1686 if (!ST->hasNEON() ||
1687 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1688 // Use generic lowering when custom lowering is not possible.
1689 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1690 Helper.lowerBitCount(MI) ==
1692 }
1693
1694 // Pre-conditioning: widen Val up to the nearest vector type.
1695 // s32,s64,v4s16,v2s32 -> v8i8
1696 // v8s16,v4s32,v2s64 -> v16i8
1697 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1698 if (Ty.isScalar()) {
1699 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1700 if (Size == 32) {
1701 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1702 }
1703 }
1704 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1705
1706 // Count bits in each byte-sized lane.
1707 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1708
1709 // Sum across lanes.
1710 Register HSum = CTPOP.getReg(0);
1711 unsigned Opc;
1712 SmallVector<LLT> HAddTys;
1713 if (Ty.isScalar()) {
1714 Opc = Intrinsic::aarch64_neon_uaddlv;
1715 HAddTys.push_back(LLT::scalar(32));
1716 } else if (Ty == LLT::fixed_vector(8, 16)) {
1717 Opc = Intrinsic::aarch64_neon_uaddlp;
1718 HAddTys.push_back(LLT::fixed_vector(8, 16));
1719 } else if (Ty == LLT::fixed_vector(4, 32)) {
1720 Opc = Intrinsic::aarch64_neon_uaddlp;
1721 HAddTys.push_back(LLT::fixed_vector(8, 16));
1722 HAddTys.push_back(LLT::fixed_vector(4, 32));
1723 } else if (Ty == LLT::fixed_vector(2, 64)) {
1724 Opc = Intrinsic::aarch64_neon_uaddlp;
1725 HAddTys.push_back(LLT::fixed_vector(8, 16));
1726 HAddTys.push_back(LLT::fixed_vector(4, 32));
1727 HAddTys.push_back(LLT::fixed_vector(2, 64));
1728 } else if (Ty == LLT::fixed_vector(4, 16)) {
1729 Opc = Intrinsic::aarch64_neon_uaddlp;
1730 HAddTys.push_back(LLT::fixed_vector(4, 16));
1731 } else if (Ty == LLT::fixed_vector(2, 32)) {
1732 Opc = Intrinsic::aarch64_neon_uaddlp;
1733 HAddTys.push_back(LLT::fixed_vector(4, 16));
1734 HAddTys.push_back(LLT::fixed_vector(2, 32));
1735 } else
1736 llvm_unreachable("unexpected vector shape");
1738 for (LLT HTy : HAddTys) {
1739 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
1740 HSum = UADD.getReg(0);
1741 }
1742
1743 // Post-conditioning.
1744 if (Ty.isScalar() && (Size == 64 || Size == 128))
1745 MIRBuilder.buildZExt(Dst, UADD);
1746 else
1747 UADD->getOperand(0).setReg(Dst);
1748 MI.eraseFromParent();
1749 return true;
1750}
1751
1752bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1754 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1755 LLT s64 = LLT::scalar(64);
1756 auto Addr = MI.getOperand(1).getReg();
1757 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1758 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1759 auto DstLo = MRI.createGenericVirtualRegister(s64);
1760 auto DstHi = MRI.createGenericVirtualRegister(s64);
1761
1763 if (ST->hasLSE()) {
1764 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1765 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1766 // the rest of the MIR so we must reassemble the extracted registers into a
1767 // 128-bit known-regclass one with code like this:
1768 //
1769 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1770 // %out = CASP %in1, ...
1771 // %OldLo = G_EXTRACT %out, 0
1772 // %OldHi = G_EXTRACT %out, 64
1773 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1774 unsigned Opcode;
1775 switch (Ordering) {
1777 Opcode = AArch64::CASPAX;
1778 break;
1780 Opcode = AArch64::CASPLX;
1781 break;
1784 Opcode = AArch64::CASPALX;
1785 break;
1786 default:
1787 Opcode = AArch64::CASPX;
1788 break;
1789 }
1790
1791 LLT s128 = LLT::scalar(128);
1792 auto CASDst = MRI.createGenericVirtualRegister(s128);
1793 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1794 auto CASNew = MRI.createGenericVirtualRegister(s128);
1795 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1796 .addUse(DesiredI->getOperand(0).getReg())
1797 .addImm(AArch64::sube64)
1798 .addUse(DesiredI->getOperand(1).getReg())
1799 .addImm(AArch64::subo64);
1800 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1801 .addUse(NewI->getOperand(0).getReg())
1802 .addImm(AArch64::sube64)
1803 .addUse(NewI->getOperand(1).getReg())
1804 .addImm(AArch64::subo64);
1805
1806 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1807
1808 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1809 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1810 } else {
1811 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1812 // can take arbitrary registers so it just has the normal GPR64 operands the
1813 // rest of AArch64 is expecting.
1814 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1815 unsigned Opcode;
1816 switch (Ordering) {
1818 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1819 break;
1821 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1822 break;
1825 Opcode = AArch64::CMP_SWAP_128;
1826 break;
1827 default:
1828 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1829 break;
1830 }
1831
1832 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1833 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
1834 {Addr, DesiredI->getOperand(0),
1835 DesiredI->getOperand(1), NewI->getOperand(0),
1836 NewI->getOperand(1)});
1837 }
1838
1839 CAS.cloneMemRefs(MI);
1841 *MRI.getTargetRegisterInfo(),
1842 *ST->getRegBankInfo());
1843
1844 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
1845 MI.eraseFromParent();
1846 return true;
1847}
1848
1849bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
1850 LegalizerHelper &Helper) const {
1851 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1852 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1853 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1854 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
1855 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
1856 MI.eraseFromParent();
1857 return true;
1858}
1859
1860bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
1861 LegalizerHelper &Helper) const {
1862 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1863
1864 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
1865 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
1866 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1867 // the instruction).
1868 auto &Value = MI.getOperand(1);
1869 Register ExtValueReg =
1870 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1871 Value.setReg(ExtValueReg);
1872 return true;
1873 }
1874
1875 return false;
1876}
1877
1878bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
1879 LegalizerHelper &Helper) const {
1880 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1881 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1882 Register Dst = MI.getOperand(0).getReg();
1883 LLT DstTy = MRI.getType(Dst);
1884 assert(DstTy.isScalar() && "Only expected scalars right now!");
1885 const unsigned DstSize = DstTy.getSizeInBits();
1886 assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
1887 assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
1888 "Expected homogeneous types!");
1889
1890 // We want to materialize a mask with the high bit set.
1891 uint64_t EltMask;
1892 LLT VecTy;
1893
1894 // TODO: s16 support.
1895 switch (DstSize) {
1896 default:
1897 llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
1898 case 64: {
1899 // AdvSIMD immediate moves cannot materialize out mask in a single
1900 // instruction for 64-bit elements. Instead, materialize zero and then
1901 // negate it.
1902 EltMask = 0;
1903 VecTy = LLT::fixed_vector(2, DstTy);
1904 break;
1905 }
1906 case 32:
1907 EltMask = 0x80000000ULL;
1908 VecTy = LLT::fixed_vector(4, DstTy);
1909 break;
1910 }
1911
1912 // Widen In1 and In2 to 128 bits. We want these to eventually become
1913 // INSERT_SUBREGs.
1914 auto Undef = MIRBuilder.buildUndef(VecTy);
1915 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
1916 auto Ins1 = MIRBuilder.buildInsertVectorElement(
1917 VecTy, Undef, MI.getOperand(1).getReg(), Zero);
1918 auto Ins2 = MIRBuilder.buildInsertVectorElement(
1919 VecTy, Undef, MI.getOperand(2).getReg(), Zero);
1920
1921 // Construct the mask.
1922 auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
1923 if (DstSize == 64)
1924 Mask = MIRBuilder.buildFNeg(VecTy, Mask);
1925
1926 auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins2, Ins1});
1927
1928 // Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
1929 // want this to eventually become an EXTRACT_SUBREG.
1930 SmallVector<Register, 2> DstRegs(1, Dst);
1931 for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
1932 DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
1933 MIRBuilder.buildUnmerge(DstRegs, Sel);
1934 MI.eraseFromParent();
1935 return true;
1936}
1937
1938bool AArch64LegalizerInfo::legalizeExtractVectorElt(
1940 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1941 auto VRegAndVal =
1942 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1943 if (VRegAndVal)
1944 return true;
1945 return Helper.lowerExtractInsertVectorElt(MI) !=
1947}
unsigned const MachineRegisterInfo * MRI
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static constexpr uint32_t Opcode
Definition: aarch32.h:200
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1672
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1507
This class represents an Operation in the Expression.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:257
constexpr bool isScalar() const
Definition: LowLevelType.h:139
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:56
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:149
constexpr bool isVector() const
Definition: LowLevelType.h:145
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:49
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:183
constexpr bool isPointer() const
Definition: LowLevelType.h:141
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:280
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:174
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:270
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:92
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:227
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & clampScalarOrElt(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMAX Op0, Op1.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
const TargetMachine & getTargetMachine() const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
LegalityPredicate isScalar(unsigned TypeIdx)
True iff the specified type index is a scalar.
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:73
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:152
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1883
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
std::function< bool(const LegalityQuery &)> LegalityPredicate
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...