LLVM 17.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v8s16 = LLT::fixed_vector(8, 16);
54 const LLT v4s16 = LLT::fixed_vector(4, 16);
55 const LLT v2s16 = LLT::fixed_vector(2, 16);
56 const LLT v2s32 = LLT::fixed_vector(2, 32);
57 const LLT v4s32 = LLT::fixed_vector(4, 32);
58 const LLT v2s64 = LLT::fixed_vector(2, 64);
59 const LLT v2p0 = LLT::fixed_vector(2, p0);
60
61 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
62 v16s8, v8s16, v4s32,
63 v2s64, v2p0,
64 /* End 128bit types */
65 /* Begin 64bit types */
66 v8s8, v4s16, v2s32};
67
68 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
69
70 // FIXME: support subtargets which have neon/fp-armv8 disabled.
71 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
73 return;
74 }
75
76 // Some instructions only support s16 if the subtarget has full 16-bit FP
77 // support.
78 const bool HasFP16 = ST.hasFullFP16();
79 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
80
81 const bool HasCSSC = ST.hasCSSC();
82 const bool HasRCPC3 = ST.hasRCPC3();
83
84 getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
85 .legalFor({p0, s8, s16, s32, s64})
86 .legalFor(PackedVectorAllTypeList)
88 .clampScalar(0, s8, s64)
90 [=](const LegalityQuery &Query) {
91 return Query.Types[0].isVector() &&
92 (Query.Types[0].getElementType() != s64 ||
93 Query.Types[0].getNumElements() != 2);
94 },
95 [=](const LegalityQuery &Query) {
96 LLT EltTy = Query.Types[0].getElementType();
97 if (EltTy == s64)
98 return std::make_pair(0, LLT::fixed_vector(2, 64));
99 return std::make_pair(0, EltTy);
100 });
101
103 .legalFor({p0, s16, s32, s64})
104 .legalFor(PackedVectorAllTypeList)
106 .clampScalar(0, s16, s64)
107 // Maximum: sN * k = 128
108 .clampMaxNumElements(0, s8, 16)
109 .clampMaxNumElements(0, s16, 8)
110 .clampMaxNumElements(0, s32, 4)
111 .clampMaxNumElements(0, s64, 2)
112 .clampMaxNumElements(0, p0, 2);
113
115 .legalFor({s32, s64, v4s32, v2s32, v2s64})
116 .widenScalarToNextPow2(0)
117 .clampScalar(0, s32, s64);
118
119 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
120 .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
121 .scalarizeIf(
122 [=](const LegalityQuery &Query) {
123 return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
124 },
125 0)
126 .legalFor({v2s64})
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
129 .clampMaxNumElements(0, s8, 16)
130 .clampMaxNumElements(0, s16, 8)
131 .clampNumElements(0, v2s32, v4s32)
132 .clampNumElements(0, v2s64, v2s64)
134 [=](const LegalityQuery &Query) {
135 return Query.Types[0].getNumElements() <= 2;
136 },
137 0, s32)
138 .minScalarOrEltIf(
139 [=](const LegalityQuery &Query) {
140 return Query.Types[0].getNumElements() <= 4;
141 },
142 0, s16)
143 .minScalarOrEltIf(
144 [=](const LegalityQuery &Query) {
145 return Query.Types[0].getNumElements() <= 16;
146 },
147 0, s8)
149
150 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
151 .customIf([=](const LegalityQuery &Query) {
152 const auto &SrcTy = Query.Types[0];
153 const auto &AmtTy = Query.Types[1];
154 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
155 AmtTy.getSizeInBits() == 32;
156 })
157 .legalFor({
158 {s32, s32},
159 {s32, s64},
160 {s64, s64},
161 {v8s8, v8s8},
162 {v16s8, v16s8},
163 {v4s16, v4s16},
164 {v8s16, v8s16},
165 {v2s32, v2s32},
166 {v4s32, v4s32},
167 {v2s64, v2s64},
168 })
169 .widenScalarToNextPow2(0)
170 .clampScalar(1, s32, s64)
171 .clampScalar(0, s32, s64)
172 .clampNumElements(0, v2s32, v4s32)
173 .clampNumElements(0, v2s64, v2s64)
175 .minScalarSameAs(1, 0);
176
178 .legalFor({{p0, s64}, {v2p0, v2s64}})
179 .clampScalar(1, s64, s64);
180
181 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
182
183 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
184 .legalFor({s32, s64})
185 .libcallFor({s128})
186 .clampScalar(0, s32, s64)
188 .scalarize(0);
189
190 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
191 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
193 .clampScalarOrElt(0, s32, s64)
194 .clampNumElements(0, v2s32, v4s32)
195 .clampNumElements(0, v2s64, v2s64)
196 .moreElementsToNextPow2(0);
197
198
199 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
200 .widenScalarToNextPow2(0, /*Min = */ 32)
201 .clampScalar(0, s32, s64)
202 .lower();
203
204 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
205 .legalFor({s64, v8s16, v16s8, v4s32})
206 .lower();
207
208 auto &MinMaxActions = getActionDefinitionsBuilder(
209 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
210 if (HasCSSC)
211 MinMaxActions
212 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
213 // Making clamping conditional on CSSC extension as without legal types we
214 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
215 // if we detect a type smaller than 32-bit.
216 .minScalar(0, s32);
217 else
218 MinMaxActions
219 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
220 MinMaxActions
221 .clampNumElements(0, v8s8, v16s8)
222 .clampNumElements(0, v4s16, v8s16)
223 .clampNumElements(0, v2s32, v4s32)
224 // FIXME: This sholdn't be needed as v2s64 types are going to
225 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
226 .clampNumElements(0, v2s64, v2s64)
227 .lower();
228
230 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
231 .legalFor({{s32, s32}, {s64, s32}})
232 .clampScalar(0, s32, s64)
233 .clampScalar(1, s32, s64)
235
236 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
237 .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
238 .clampScalar(0, MinFPScalar, s64)
239 .clampNumElements(0, v2s32, v4s32)
240 .clampNumElements(0, v2s64, v2s64);
241
242 getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
243
244 getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
245 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
246 G_FNEARBYINT, G_INTRINSIC_LRINT})
247 // If we don't have full FP16 support, then scalarize the elements of
248 // vectors containing fp16 types.
249 .fewerElementsIf(
250 [=, &ST](const LegalityQuery &Query) {
251 const auto &Ty = Query.Types[0];
252 return Ty.isVector() && Ty.getElementType() == s16 &&
253 !ST.hasFullFP16();
254 },
255 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
256 // If we don't have full FP16 support, then widen s16 to s32 if we
257 // encounter it.
258 .widenScalarIf(
259 [=, &ST](const LegalityQuery &Query) {
260 return Query.Types[0] == s16 && !ST.hasFullFP16();
261 },
262 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
263 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
264
266 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
267 // We need a call for these, so we always need to scalarize.
268 .scalarize(0)
269 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
270 .minScalar(0, s32)
271 .libcallFor({s32, s64, v2s32, v4s32, v2s64});
272
274 .legalIf(all(typeInSet(0, {s32, s64, p0}),
275 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
277 .clampScalar(0, s32, s64)
279 .minScalar(1, s8)
280 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
281 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
282
284 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
285 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
287 .clampScalar(1, s32, s128)
289 .minScalar(0, s16)
290 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
291 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
292 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
293
294
295 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
296 auto &Actions = getActionDefinitionsBuilder(Op);
297
298 if (Op == G_SEXTLOAD)
300
301 // Atomics have zero extending behavior.
302 Actions
303 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
304 {s32, p0, s16, 8},
305 {s32, p0, s32, 8},
306 {s64, p0, s8, 2},
307 {s64, p0, s16, 2},
308 {s64, p0, s32, 4},
309 {s64, p0, s64, 8},
310 {p0, p0, s64, 8},
311 {v2s32, p0, s64, 8}})
312 .widenScalarToNextPow2(0)
313 .clampScalar(0, s32, s64)
314 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
315 // how to do that yet.
316 .unsupportedIfMemSizeNotPow2()
317 // Lower anything left over into G_*EXT and G_LOAD
318 .lower();
319 }
320
321 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
322 const LLT &ValTy = Query.Types[0];
323 if (!ValTy.isVector())
324 return false;
325 const LLT EltTy = ValTy.getElementType();
326 return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
327 };
328
330 .customIf([=](const LegalityQuery &Query) {
331 return HasRCPC3 && Query.Types[0] == s128 &&
332 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
333 })
334 .customIf([=](const LegalityQuery &Query) {
335 return Query.Types[0] == s128 &&
336 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
337 })
338 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
339 {s16, p0, s16, 8},
340 {s32, p0, s32, 8},
341 {s64, p0, s64, 8},
342 {p0, p0, s64, 8},
343 {s128, p0, s128, 8},
344 {v8s8, p0, s64, 8},
345 {v16s8, p0, s128, 8},
346 {v4s16, p0, s64, 8},
347 {v8s16, p0, s128, 8},
348 {v2s32, p0, s64, 8},
349 {v4s32, p0, s128, 8},
350 {v2s64, p0, s128, 8}})
351 // These extends are also legal
352 .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
353 .widenScalarToNextPow2(0, /* MinSize = */ 8)
355 .clampScalar(0, s8, s64)
357 [=](const LegalityQuery &Query) {
358 // Clamp extending load results to 32-bits.
359 return Query.Types[0].isScalar() &&
360 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
361 Query.Types[0].getSizeInBits() > 32;
362 },
363 changeTo(0, s32))
364 .clampMaxNumElements(0, s8, 16)
365 .clampMaxNumElements(0, s16, 8)
366 .clampMaxNumElements(0, s32, 4)
367 .clampMaxNumElements(0, s64, 2)
368 .clampMaxNumElements(0, p0, 2)
369 .customIf(IsPtrVecPred)
370 .scalarizeIf(typeIs(0, v2s16), 0);
371
373 .customIf([=](const LegalityQuery &Query) {
374 return HasRCPC3 && Query.Types[0] == s128 &&
375 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
376 })
377 .customIf([=](const LegalityQuery &Query) {
378 return Query.Types[0] == s128 &&
379 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
380 })
381 .legalForTypesWithMemDesc(
382 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
383 {s32, p0, s8, 8}, // truncstorei8 from s32
384 {s64, p0, s8, 8}, // truncstorei8 from s64
385 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
386 {s64, p0, s16, 8}, // truncstorei16 from s64
387 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
388 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
389 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
390 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
391 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
392 .clampScalar(0, s8, s64)
393 .lowerIf([=](const LegalityQuery &Query) {
394 return Query.Types[0].isScalar() &&
395 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
396 })
397 // Maximum: sN * k = 128
398 .clampMaxNumElements(0, s8, 16)
399 .clampMaxNumElements(0, s16, 8)
400 .clampMaxNumElements(0, s32, 4)
401 .clampMaxNumElements(0, s64, 2)
402 .clampMaxNumElements(0, p0, 2)
404 .customIf(IsPtrVecPred)
405 .scalarizeIf(typeIs(0, v2s16), 0);
406
407 // Constants
409 .legalFor({p0, s8, s16, s32, s64})
410 .widenScalarToNextPow2(0)
411 .clampScalar(0, s8, s64);
412 getActionDefinitionsBuilder(G_FCONSTANT)
413 .legalIf([=](const LegalityQuery &Query) {
414 const auto &Ty = Query.Types[0];
415 if (HasFP16 && Ty == s16)
416 return true;
417 return Ty == s32 || Ty == s64 || Ty == s128;
418 })
419 .clampScalar(0, MinFPScalar, s128);
420
422 .legalFor({{s32, s32},
423 {s32, s64},
424 {s32, p0},
425 {v4s32, v4s32},
426 {v2s32, v2s32},
427 {v2s64, v2s64},
428 {v2s64, v2p0},
429 {v4s16, v4s16},
430 {v8s16, v8s16},
431 {v8s8, v8s8},
432 {v16s8, v16s8}})
434 .clampScalar(1, s32, s64)
435 .clampScalar(0, s32, s32)
436 .minScalarEltSameAsIf(
437 [=](const LegalityQuery &Query) {
438 const LLT &Ty = Query.Types[0];
439 const LLT &SrcTy = Query.Types[1];
440 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
441 Ty.getElementType() != SrcTy.getElementType();
442 },
443 0, 1)
444 .minScalarOrEltIf(
445 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
446 1, s32)
447 .minScalarOrEltIf(
448 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
449 s64)
450 .clampNumElements(0, v2s32, v4s32);
451
453 // If we don't have full FP16 support, then scalarize the elements of
454 // vectors containing fp16 types.
456 [=](const LegalityQuery &Query) {
457 const auto &Ty = Query.Types[0];
458 return Ty.isVector() && Ty.getElementType() == s16 && !HasFP16;
459 },
460 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
461 // If we don't have full FP16 support, then widen s16 to s32 if we
462 // encounter it.
463 .widenScalarIf(
464 [=](const LegalityQuery &Query) {
465 return Query.Types[0] == s16 && !HasFP16;
466 },
467 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
468 .legalFor({{s16, s16},
469 {s32, s32},
470 {s32, s64},
471 {v4s32, v4s32},
472 {v2s32, v2s32},
473 {v2s64, v2s64},
474 {v4s16, v4s16},
475 {v8s16, v8s16}})
477 .clampScalar(1, s32, s64)
478 .clampScalar(0, s32, s32)
479 .minScalarEltSameAsIf(
480 [=](const LegalityQuery &Query) {
481 const LLT &Ty = Query.Types[0];
482 const LLT &SrcTy = Query.Types[1];
483 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
484 Ty.getElementType() != SrcTy.getElementType();
485 },
486 0, 1)
487 .clampNumElements(0, v2s32, v4s32);
488
489 // Extensions
490 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
491 unsigned DstSize = Query.Types[0].getSizeInBits();
492
493 if (DstSize == 128 && !Query.Types[0].isVector())
494 return false; // Extending to a scalar s128 needs narrowing.
495
496 // Make sure that we have something that will fit in a register, and
497 // make sure it's a power of 2.
498 if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
499 return false;
500
501 const LLT &SrcTy = Query.Types[1];
502
503 // Make sure we fit in a register otherwise. Don't bother checking that
504 // the source type is below 128 bits. We shouldn't be allowing anything
505 // through which is wider than the destination in the first place.
506 unsigned SrcSize = SrcTy.getSizeInBits();
507 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
508 return false;
509
510 return true;
511 };
512 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
513 .legalIf(ExtLegalFunc)
514 .clampScalar(0, s64, s64); // Just for s128, others are handled above.
515
518 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
519 0, s8)
520 .customIf([=](const LegalityQuery &Query) {
521 LLT DstTy = Query.Types[0];
522 LLT SrcTy = Query.Types[1];
523 return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
524 })
525 .alwaysLegal();
526
527 getActionDefinitionsBuilder(G_SEXT_INREG)
528 .legalFor({s32, s64})
529 .legalFor(PackedVectorAllTypeList)
530 .lower();
531
532 // FP conversions
534 .legalFor(
535 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
536 .clampMaxNumElements(0, s32, 2);
538 .legalFor(
539 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
540 .clampMaxNumElements(0, s64, 2);
541
542 // Conversions
543 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
544 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
545 .widenScalarToNextPow2(0)
546 .clampScalar(0, s32, s64)
548 .clampScalar(1, s32, s64);
549
550 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
551 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
552 .clampScalar(1, s32, s64)
553 .minScalarSameAs(1, 0)
554 .clampScalar(0, s32, s64)
556
557 // Control-flow
559 .legalFor({s32})
560 .clampScalar(0, s32, s32);
561 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
562
564 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
565 .widenScalarToNextPow2(0)
566 .clampScalar(0, s32, s64)
567 .clampScalar(1, s32, s32)
569 .lowerIf(isVector(0));
570
571 // Pointer-handling
572 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
573
574 if (TM.getCodeModel() == CodeModel::Small)
575 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
576 else
577 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
578
580 .legalFor({{s64, p0}, {v2s64, v2p0}})
581 .widenScalarToNextPow2(0, 64)
582 .clampScalar(0, s64, s64);
583
585 .unsupportedIf([&](const LegalityQuery &Query) {
586 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
587 })
588 .legalFor({{p0, s64}, {v2p0, v2s64}});
589
590 // Casts for 32 and 64-bit width type are just copies.
591 // Same for 128-bit width type, except they are on the FPR bank.
593 // FIXME: This is wrong since G_BITCAST is not allowed to change the
594 // number of bits but it's what the previous code described and fixing
595 // it breaks tests.
596 .legalForCartesianProduct({s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
597 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
598 v2p0});
599
600 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
601
602 // va_list must be a pointer, but most sized types are pretty easy to handle
603 // as the destination.
605 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
606 .clampScalar(0, s8, s64)
607 .widenScalarToNextPow2(0, /*Min*/ 8);
608
609 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
610 .lowerIf(
611 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
612
613 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
614 .customIf([](const LegalityQuery &Query) {
615 return Query.Types[0].getSizeInBits() == 128;
616 })
617 .clampScalar(0, s32, s64)
618 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
619
621 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
622 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
623 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
624 .clampScalar(0, s32, s64)
625 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
626
627 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
628
629 // Merge/Unmerge
630 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
631 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
632 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
634 .widenScalarToNextPow2(LitTyIdx, 8)
635 .widenScalarToNextPow2(BigTyIdx, 32)
636 .clampScalar(LitTyIdx, s8, s64)
637 .clampScalar(BigTyIdx, s32, s128)
638 .legalIf([=](const LegalityQuery &Q) {
639 switch (Q.Types[BigTyIdx].getSizeInBits()) {
640 case 32:
641 case 64:
642 case 128:
643 break;
644 default:
645 return false;
646 }
647 switch (Q.Types[LitTyIdx].getSizeInBits()) {
648 case 8:
649 case 16:
650 case 32:
651 case 64:
652 return true;
653 default:
654 return false;
655 }
656 });
657 }
658
659 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
660 .unsupportedIf([=](const LegalityQuery &Query) {
661 const LLT &EltTy = Query.Types[1].getElementType();
662 return Query.Types[0] != EltTy;
663 })
664 .minScalar(2, s64)
665 .legalIf([=](const LegalityQuery &Query) {
666 const LLT &VecTy = Query.Types[1];
667 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
668 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
669 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
670 VecTy == v2p0;
671 })
672 .minScalarOrEltIf(
673 [=](const LegalityQuery &Query) {
674 // We want to promote to <M x s1> to <M x s64> if that wouldn't
675 // cause the total vec size to be > 128b.
676 return Query.Types[1].getNumElements() <= 2;
677 },
678 0, s64)
679 .minScalarOrEltIf(
680 [=](const LegalityQuery &Query) {
681 return Query.Types[1].getNumElements() <= 4;
682 },
683 0, s32)
684 .minScalarOrEltIf(
685 [=](const LegalityQuery &Query) {
686 return Query.Types[1].getNumElements() <= 8;
687 },
688 0, s16)
689 .minScalarOrEltIf(
690 [=](const LegalityQuery &Query) {
691 return Query.Types[1].getNumElements() <= 16;
692 },
693 0, s8)
694 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
695 .clampMaxNumElements(1, s64, 2)
696 .clampMaxNumElements(1, s32, 4)
697 .clampMaxNumElements(1, s16, 8)
698 .clampMaxNumElements(1, p0, 2);
699
700 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
701 .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64}));
702
703 getActionDefinitionsBuilder(G_BUILD_VECTOR)
704 .legalFor({{v8s8, s8},
705 {v16s8, s8},
706 {v2s16, s16},
707 {v4s16, s16},
708 {v8s16, s16},
709 {v2s32, s32},
710 {v4s32, s32},
711 {v2p0, p0},
712 {v2s64, s64}})
713 .clampNumElements(0, v4s32, v4s32)
714 .clampNumElements(0, v2s64, v2s64)
715 .minScalarOrElt(0, s8)
716 .minScalarSameAs(1, 0);
717
718 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
719
722 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
723 .scalarize(1)
724 .widenScalarToNextPow2(1, /*Min=*/32)
725 .clampScalar(1, s32, s64)
726 .scalarSameSizeAs(0, 1);
727 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
728
729 // TODO: Custom lowering for v2s32, v4s32, v2s64.
730 getActionDefinitionsBuilder(G_BITREVERSE)
731 .legalFor({s32, s64, v8s8, v16s8})
732 .widenScalarToNextPow2(0, /*Min = */ 32)
733 .clampScalar(0, s32, s64);
734
735 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
736
738 .lowerIf(isVector(0))
739 .widenScalarToNextPow2(1, /*Min=*/32)
740 .clampScalar(1, s32, s64)
741 .scalarSameSizeAs(0, 1)
742 .legalIf([=](const LegalityQuery &Query) {
743 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
744 })
745 .customIf([=](const LegalityQuery &Query) {
746 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
747 });
748
749 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
750 .legalIf([=](const LegalityQuery &Query) {
751 const LLT &DstTy = Query.Types[0];
752 const LLT &SrcTy = Query.Types[1];
753 // For now just support the TBL2 variant which needs the source vectors
754 // to be the same size as the dest.
755 if (DstTy != SrcTy)
756 return false;
757 return llvm::is_contained({v2s32, v4s32, v2s64, v2p0, v16s8, v8s16},
758 DstTy);
759 })
760 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
761 // just want those lowered into G_BUILD_VECTOR
762 .lowerIf([=](const LegalityQuery &Query) {
763 return !Query.Types[1].isVector();
764 })
765 .moreElementsIf(
766 [](const LegalityQuery &Query) {
767 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
768 Query.Types[0].getNumElements() >
769 Query.Types[1].getNumElements();
770 },
771 changeTo(1, 0))
773 .clampNumElements(0, v4s32, v4s32)
774 .clampNumElements(0, v2s64, v2s64)
775 .moreElementsIf(
776 [](const LegalityQuery &Query) {
777 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
778 Query.Types[0].getNumElements() <
779 Query.Types[1].getNumElements();
780 },
781 changeTo(0, 1));
782
783 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
784 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
785
786 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
787
788 getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
789 return Query.Types[0] == p0 && Query.Types[1] == s64;
790 });
791
792 getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
793
794 if (ST.hasMOPS()) {
795 // G_BZERO is not supported. Currently it is only emitted by
796 // PreLegalizerCombiner for G_MEMSET with zero constant.
798
800 .legalForCartesianProduct({p0}, {s64}, {s64})
801 .customForCartesianProduct({p0}, {s8}, {s64})
802 .immIdx(0); // Inform verifier imm idx 0 is handled.
803
804 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
805 .legalForCartesianProduct({p0}, {p0}, {s64})
806 .immIdx(0); // Inform verifier imm idx 0 is handled.
807
808 // G_MEMCPY_INLINE does not have a tailcall immediate
809 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
810 .legalForCartesianProduct({p0}, {p0}, {s64});
811
812 } else {
813 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
814 .libcall();
815 }
816
817 // FIXME: Legal vector types are only legal with NEON.
818 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
819 if (HasCSSC)
820 ABSActions
821 .legalFor({s32, s64});
822 ABSActions
823 .legalFor(PackedVectorAllTypeList)
824 .lowerIf(isScalar(0));
825
826 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
827 // We only have FADDP to do reduction-like operations. Lower the rest.
828 .legalFor({{s32, v2s32}, {s64, v2s64}})
829 .clampMaxNumElements(1, s64, 2)
830 .clampMaxNumElements(1, s32, 2)
831 .lower();
832
833 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
834 .legalFor(
835 {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
836 .clampMaxNumElements(1, s64, 2)
837 .clampMaxNumElements(1, s32, 4)
838 .lower();
839
841 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
842 // Try to break down into smaller vectors as long as they're at least 64
843 // bits. This lets us use vector operations for some parts of the
844 // reduction.
845 .fewerElementsIf(
846 [=](const LegalityQuery &Q) {
847 LLT SrcTy = Q.Types[1];
848 if (SrcTy.isScalar())
849 return false;
851 return false;
852 // We can usually perform 64b vector operations.
853 return SrcTy.getSizeInBits() > 64;
854 },
855 [=](const LegalityQuery &Q) {
856 LLT SrcTy = Q.Types[1];
857 return std::make_pair(1, SrcTy.divide(2));
858 })
859 .scalarize(1)
860 .lower();
861
862 getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
863 .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
864
865 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
866
868 .legalFor({{s32, s64}, {s64, s64}})
869 .customIf([=](const LegalityQuery &Q) {
870 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
871 })
872 .lower();
874
875 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
876 .customFor({{s32, s32}, {s64, s64}});
877
878 auto always = [=](const LegalityQuery &Q) { return true; };
879 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
880 if (HasCSSC)
881 CTPOPActions
882 .legalFor({{s32, s32},
883 {s64, s64},
884 {v8s8, v8s8},
885 {v16s8, v16s8}})
886 .customFor({{s128, s128},
887 {v2s64, v2s64},
888 {v2s32, v2s32},
889 {v4s32, v4s32},
890 {v4s16, v4s16},
891 {v8s16, v8s16}});
892 else
893 CTPOPActions
894 .legalFor({{v8s8, v8s8},
895 {v16s8, v16s8}})
896 .customFor({{s32, s32},
897 {s64, s64},
898 {s128, s128},
899 {v2s64, v2s64},
900 {v2s32, v2s32},
901 {v4s32, v4s32},
902 {v4s16, v4s16},
903 {v8s16, v8s16}});
904 CTPOPActions
905 .clampScalar(0, s32, s128)
906 .widenScalarToNextPow2(0)
907 .minScalarEltSameAsIf(always, 1, 0)
908 .maxScalarEltSameAsIf(always, 1, 0);
909
910 // TODO: Vector types.
911 getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
912
913 // TODO: Vector types.
914 getActionDefinitionsBuilder({G_FMAXNUM, G_FMINNUM})
915 .legalFor({MinFPScalar, s32, s64})
916 .libcallFor({s128})
917 .minScalar(0, MinFPScalar);
918
919 getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
920 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
921 .legalIf([=](const LegalityQuery &Query) {
922 const auto &Ty = Query.Types[0];
923 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
924 })
925 .minScalar(0, MinFPScalar)
926 .clampNumElements(0, v4s16, v8s16)
927 .clampNumElements(0, v2s32, v4s32)
928 .clampNumElements(0, v2s64, v2s64);
929
930 // TODO: Libcall support for s128.
931 // TODO: s16 should be legal with full FP16 support.
932 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
933 .legalFor({{s64, s32}, {s64, s64}});
934
935 // TODO: Custom legalization for vector types.
936 // TODO: Custom legalization for mismatched types.
937 // TODO: s16 support.
938 getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
939
941
943 verify(*ST.getInstrInfo());
944}
945
947 MachineInstr &MI) const {
948 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
949 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
950 GISelChangeObserver &Observer = Helper.Observer;
951 switch (MI.getOpcode()) {
952 default:
953 // No idea what to do.
954 return false;
955 case TargetOpcode::G_VAARG:
956 return legalizeVaArg(MI, MRI, MIRBuilder);
957 case TargetOpcode::G_LOAD:
958 case TargetOpcode::G_STORE:
959 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
960 case TargetOpcode::G_SHL:
961 case TargetOpcode::G_ASHR:
962 case TargetOpcode::G_LSHR:
963 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
964 case TargetOpcode::G_GLOBAL_VALUE:
965 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
966 case TargetOpcode::G_TRUNC:
967 return legalizeVectorTrunc(MI, Helper);
968 case TargetOpcode::G_SBFX:
969 case TargetOpcode::G_UBFX:
970 return legalizeBitfieldExtract(MI, MRI, Helper);
971 case TargetOpcode::G_ROTR:
972 return legalizeRotate(MI, MRI, Helper);
973 case TargetOpcode::G_CTPOP:
974 return legalizeCTPOP(MI, MRI, Helper);
975 case TargetOpcode::G_ATOMIC_CMPXCHG:
976 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
977 case TargetOpcode::G_CTTZ:
978 return legalizeCTTZ(MI, Helper);
979 case TargetOpcode::G_BZERO:
980 case TargetOpcode::G_MEMCPY:
981 case TargetOpcode::G_MEMMOVE:
982 case TargetOpcode::G_MEMSET:
983 return legalizeMemOps(MI, Helper);
984 case TargetOpcode::G_FCOPYSIGN:
985 return legalizeFCopySign(MI, Helper);
986 }
987
988 llvm_unreachable("expected switch to return");
989}
990
991bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
993 LegalizerHelper &Helper) const {
994 // To allow for imported patterns to match, we ensure that the rotate amount
995 // is 64b with an extension.
996 Register AmtReg = MI.getOperand(2).getReg();
997 LLT AmtTy = MRI.getType(AmtReg);
998 (void)AmtTy;
999 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1000 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1001 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1002 Helper.Observer.changingInstr(MI);
1003 MI.getOperand(2).setReg(NewAmt.getReg(0));
1004 Helper.Observer.changedInstr(MI);
1005 return true;
1006}
1007
1009 MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
1011 for (int I = 0; I < NumParts; ++I)
1012 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
1013 MIRBuilder.buildUnmerge(VRegs, Reg);
1014}
1015
1016bool AArch64LegalizerInfo::legalizeVectorTrunc(
1017 MachineInstr &MI, LegalizerHelper &Helper) const {
1018 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1019 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1020 // Similar to how operand splitting is done in SelectiondDAG, we can handle
1021 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
1022 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
1023 // %lo16(<4 x s16>) = G_TRUNC %inlo
1024 // %hi16(<4 x s16>) = G_TRUNC %inhi
1025 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
1026 // %res(<8 x s8>) = G_TRUNC %in16
1027
1028 Register DstReg = MI.getOperand(0).getReg();
1029 Register SrcReg = MI.getOperand(1).getReg();
1030 LLT DstTy = MRI.getType(DstReg);
1031 LLT SrcTy = MRI.getType(SrcReg);
1032 assert(llvm::has_single_bit<uint32_t>(DstTy.getSizeInBits()) &&
1033 llvm::has_single_bit<uint32_t>(SrcTy.getSizeInBits()));
1034
1035 // Split input type.
1036 LLT SplitSrcTy =
1038 // First, split the source into two smaller vectors.
1039 SmallVector<Register, 2> SplitSrcs;
1040 extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
1041
1042 // Truncate the splits into intermediate narrower elements.
1043 LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
1044 for (unsigned I = 0; I < SplitSrcs.size(); ++I)
1045 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
1046
1047 auto Concat = MIRBuilder.buildConcatVectors(
1048 DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
1049
1050 Helper.Observer.changingInstr(MI);
1051 MI.getOperand(1).setReg(Concat.getReg(0));
1052 Helper.Observer.changedInstr(MI);
1053 return true;
1054}
1055
1056bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1058 GISelChangeObserver &Observer) const {
1059 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1060 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1061 // G_ADD_LOW instructions.
1062 // By splitting this here, we can optimize accesses in the small code model by
1063 // folding in the G_ADD_LOW into the load/store offset.
1064 auto &GlobalOp = MI.getOperand(1);
1065 const auto* GV = GlobalOp.getGlobal();
1066 if (GV->isThreadLocal())
1067 return true; // Don't want to modify TLS vars.
1068
1069 auto &TM = ST->getTargetLowering()->getTargetMachine();
1070 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1071
1072 if (OpFlags & AArch64II::MO_GOT)
1073 return true;
1074
1075 auto Offset = GlobalOp.getOffset();
1076 Register DstReg = MI.getOperand(0).getReg();
1077 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1078 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1079 // Set the regclass on the dest reg too.
1080 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1081
1082 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1083 // by creating a MOVK that sets bits 48-63 of the register to (global address
1084 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1085 // prevent an incorrect tag being generated during relocation when the the
1086 // global appears before the code section. Without the offset, a global at
1087 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1088 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1089 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1090 // instead of `0xf`.
1091 // This assumes that we're in the small code model so we can assume a binary
1092 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1093 // binary must also be loaded into address range [0, 2^48). Both of these
1094 // properties need to be ensured at runtime when using tagged addresses.
1095 if (OpFlags & AArch64II::MO_TAGGED) {
1096 assert(!Offset &&
1097 "Should not have folded in an offset for a tagged global!");
1098 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1099 .addGlobalAddress(GV, 0x100000000,
1101 .addImm(48);
1102 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1103 }
1104
1105 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1106 .addGlobalAddress(GV, Offset,
1108 MI.eraseFromParent();
1109 return true;
1110}
1111
1113 MachineInstr &MI) const {
1114 switch (MI.getIntrinsicID()) {
1115 case Intrinsic::vacopy: {
1116 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1117 unsigned VaListSize =
1118 (ST->isTargetDarwin() || ST->isTargetWindows())
1119 ? PtrSize
1120 : ST->isTargetILP32() ? 20 : 32;
1121
1122 MachineFunction &MF = *MI.getMF();
1124 LLT::scalar(VaListSize * 8));
1125 MachineIRBuilder MIB(MI);
1126 MIB.buildLoad(Val, MI.getOperand(2),
1129 VaListSize, Align(PtrSize)));
1130 MIB.buildStore(Val, MI.getOperand(1),
1133 VaListSize, Align(PtrSize)));
1134 MI.eraseFromParent();
1135 return true;
1136 }
1137 case Intrinsic::get_dynamic_area_offset: {
1138 MachineIRBuilder &MIB = Helper.MIRBuilder;
1139 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1140 MI.eraseFromParent();
1141 return true;
1142 }
1143 case Intrinsic::aarch64_mops_memset_tag: {
1144 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1145 // Zext the value to 64 bit
1146 MachineIRBuilder MIB(MI);
1147 auto &Value = MI.getOperand(3);
1148 Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1149 Value.setReg(ZExtValueReg);
1150 return true;
1151 }
1152 case Intrinsic::prefetch: {
1153 MachineIRBuilder MIB(MI);
1154 auto &AddrVal = MI.getOperand(1);
1155
1156 int64_t IsWrite = MI.getOperand(2).getImm();
1157 int64_t Locality = MI.getOperand(3).getImm();
1158 int64_t IsData = MI.getOperand(4).getImm();
1159
1160 bool IsStream = Locality == 0;
1161 if (Locality != 0) {
1162 assert(Locality <= 3 && "Prefetch locality out-of-range");
1163 // The locality degree is the opposite of the cache speed.
1164 // Put the number the other way around.
1165 // The encoding starts at 0 for level 1
1166 Locality = 3 - Locality;
1167 }
1168
1169 unsigned PrfOp =
1170 (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
1171
1172 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1173 MI.eraseFromParent();
1174 return true;
1175 }
1176 case Intrinsic::aarch64_prefetch: {
1177 MachineIRBuilder MIB(MI);
1178 auto &AddrVal = MI.getOperand(1);
1179
1180 int64_t IsWrite = MI.getOperand(2).getImm();
1181 int64_t Target = MI.getOperand(3).getImm();
1182 int64_t IsStream = MI.getOperand(4).getImm();
1183 int64_t IsData = MI.getOperand(5).getImm();
1184
1185 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1186 (!IsData << 3) | // IsDataCache bit
1187 (Target << 1) | // Cache level bits
1188 (unsigned)IsStream; // Stream bit
1189
1190 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1191 MI.eraseFromParent();
1192 return true;
1193 }
1194 }
1195
1196 return true;
1197}
1198
1199bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1201 GISelChangeObserver &Observer) const {
1202 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1203 MI.getOpcode() == TargetOpcode::G_LSHR ||
1204 MI.getOpcode() == TargetOpcode::G_SHL);
1205 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1206 // imported patterns can select it later. Either way, it will be legal.
1207 Register AmtReg = MI.getOperand(2).getReg();
1208 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1209 if (!VRegAndVal)
1210 return true;
1211 // Check the shift amount is in range for an immediate form.
1212 int64_t Amount = VRegAndVal->Value.getSExtValue();
1213 if (Amount > 31)
1214 return true; // This will have to remain a register variant.
1215 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1216 Observer.changingInstr(MI);
1217 MI.getOperand(2).setReg(ExtCst.getReg(0));
1218 Observer.changedInstr(MI);
1219 return true;
1220}
1221
1224 Base = Root;
1225 Offset = 0;
1226
1227 Register NewBase;
1228 int64_t NewOffset;
1229 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1230 isShiftedInt<7, 3>(NewOffset)) {
1231 Base = NewBase;
1232 Offset = NewOffset;
1233 }
1234}
1235
1236// FIXME: This should be removed and replaced with the generic bitcast legalize
1237// action.
1238bool AArch64LegalizerInfo::legalizeLoadStore(
1240 GISelChangeObserver &Observer) const {
1241 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1242 MI.getOpcode() == TargetOpcode::G_LOAD);
1243 // Here we just try to handle vector loads/stores where our value type might
1244 // have pointer elements, which the SelectionDAG importer can't handle. To
1245 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1246 // the value to use s64 types.
1247
1248 // Custom legalization requires the instruction, if not deleted, must be fully
1249 // legalized. In order to allow further legalization of the inst, we create
1250 // a new instruction and erase the existing one.
1251
1252 Register ValReg = MI.getOperand(0).getReg();
1253 const LLT ValTy = MRI.getType(ValReg);
1254
1255 if (ValTy == LLT::scalar(128)) {
1256
1257 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1258 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1259 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1260 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1261 bool IsRcpC3 =
1262 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1263
1264 LLT s64 = LLT::scalar(64);
1265
1266 unsigned Opcode;
1267 if (IsRcpC3) {
1268 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1269 } else {
1270 // For LSE2, loads/stores should have been converted to monotonic and had
1271 // a fence inserted after them.
1272 assert(Ordering == AtomicOrdering::Monotonic ||
1273 Ordering == AtomicOrdering::Unordered);
1274 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1275
1276 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1277 }
1278
1280 if (IsLoad) {
1281 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1282 MIRBuilder.buildMergeLikeInstr(
1283 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1284 } else {
1285 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1286 NewI = MIRBuilder.buildInstr(
1287 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1288 }
1289
1290 if (IsRcpC3) {
1291 NewI.addUse(MI.getOperand(1).getReg());
1292 } else {
1293 Register Base;
1294 int Offset;
1295 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1296 NewI.addUse(Base);
1297 NewI.addImm(Offset / 8);
1298 }
1299
1300 NewI.cloneMemRefs(MI);
1302 *MRI.getTargetRegisterInfo(),
1303 *ST->getRegBankInfo());
1304 MI.eraseFromParent();
1305 return true;
1306 }
1307
1308 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
1309 ValTy.getElementType().getAddressSpace() != 0) {
1310 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1311 return false;
1312 }
1313
1314 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1315 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1316 auto &MMO = **MI.memoperands_begin();
1317 MMO.setType(NewTy);
1318
1319 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1320 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1321 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1322 } else {
1323 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1324 MIRBuilder.buildBitcast(ValReg, NewLoad);
1325 }
1326 MI.eraseFromParent();
1327 return true;
1328}
1329
1330bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1332 MachineIRBuilder &MIRBuilder) const {
1333 MachineFunction &MF = MIRBuilder.getMF();
1334 Align Alignment(MI.getOperand(2).getImm());
1335 Register Dst = MI.getOperand(0).getReg();
1336 Register ListPtr = MI.getOperand(1).getReg();
1337
1338 LLT PtrTy = MRI.getType(ListPtr);
1339 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1340
1341 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1342 const Align PtrAlign = Align(PtrSize);
1343 auto List = MIRBuilder.buildLoad(
1344 PtrTy, ListPtr,
1346 PtrTy, PtrAlign));
1347
1348 MachineInstrBuilder DstPtr;
1349 if (Alignment > PtrAlign) {
1350 // Realign the list to the actual required alignment.
1351 auto AlignMinus1 =
1352 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1353 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1354 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1355 } else
1356 DstPtr = List;
1357
1358 LLT ValTy = MRI.getType(Dst);
1359 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1360 MIRBuilder.buildLoad(
1361 Dst, DstPtr,
1363 ValTy, std::max(Alignment, PtrAlign)));
1364
1365 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1366
1367 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1368
1369 MIRBuilder.buildStore(NewList, ListPtr,
1372 PtrTy, PtrAlign));
1373
1374 MI.eraseFromParent();
1375 return true;
1376}
1377
1378bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1380 // Only legal if we can select immediate forms.
1381 // TODO: Lower this otherwise.
1382 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1383 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1384}
1385
1386bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1388 LegalizerHelper &Helper) const {
1389 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1390 // it can be more efficiently lowered to the following sequence that uses
1391 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1392 // registers are cheap.
1393 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1394 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1395 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1396 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1397 //
1398 // For 128 bit vector popcounts, we lower to the following sequence:
1399 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1400 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1401 // uaddlp.4s v0, v0 // v4s32, v2s64
1402 // uaddlp.2d v0, v0 // v2s64
1403 //
1404 // For 64 bit vector popcounts, we lower to the following sequence:
1405 // cnt.8b v0, v0 // v4s16, v2s32
1406 // uaddlp.4h v0, v0 // v4s16, v2s32
1407 // uaddlp.2s v0, v0 // v2s32
1408
1409 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1410 Register Dst = MI.getOperand(0).getReg();
1411 Register Val = MI.getOperand(1).getReg();
1412 LLT Ty = MRI.getType(Val);
1413 unsigned Size = Ty.getSizeInBits();
1414
1415 assert(Ty == MRI.getType(Dst) &&
1416 "Expected src and dst to have the same type!");
1417
1418 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1419 LLT s64 = LLT::scalar(64);
1420
1421 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1422 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1423 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1424 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1425
1426 MIRBuilder.buildZExt(Dst, Add);
1427 MI.eraseFromParent();
1428 return true;
1429 }
1430
1431 if (!ST->hasNEON() ||
1432 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1433 // Use generic lowering when custom lowering is not possible.
1434 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1435 Helper.lowerBitCount(MI) ==
1437 }
1438
1439 // Pre-conditioning: widen Val up to the nearest vector type.
1440 // s32,s64,v4s16,v2s32 -> v8i8
1441 // v8s16,v4s32,v2s64 -> v16i8
1442 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1443 if (Ty.isScalar()) {
1444 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1445 if (Size == 32) {
1446 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1447 }
1448 }
1449 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1450
1451 // Count bits in each byte-sized lane.
1452 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1453
1454 // Sum across lanes.
1455 Register HSum = CTPOP.getReg(0);
1456 unsigned Opc;
1457 SmallVector<LLT> HAddTys;
1458 if (Ty.isScalar()) {
1459 Opc = Intrinsic::aarch64_neon_uaddlv;
1460 HAddTys.push_back(LLT::scalar(32));
1461 } else if (Ty == LLT::fixed_vector(8, 16)) {
1462 Opc = Intrinsic::aarch64_neon_uaddlp;
1463 HAddTys.push_back(LLT::fixed_vector(8, 16));
1464 } else if (Ty == LLT::fixed_vector(4, 32)) {
1465 Opc = Intrinsic::aarch64_neon_uaddlp;
1466 HAddTys.push_back(LLT::fixed_vector(8, 16));
1467 HAddTys.push_back(LLT::fixed_vector(4, 32));
1468 } else if (Ty == LLT::fixed_vector(2, 64)) {
1469 Opc = Intrinsic::aarch64_neon_uaddlp;
1470 HAddTys.push_back(LLT::fixed_vector(8, 16));
1471 HAddTys.push_back(LLT::fixed_vector(4, 32));
1472 HAddTys.push_back(LLT::fixed_vector(2, 64));
1473 } else if (Ty == LLT::fixed_vector(4, 16)) {
1474 Opc = Intrinsic::aarch64_neon_uaddlp;
1475 HAddTys.push_back(LLT::fixed_vector(4, 16));
1476 } else if (Ty == LLT::fixed_vector(2, 32)) {
1477 Opc = Intrinsic::aarch64_neon_uaddlp;
1478 HAddTys.push_back(LLT::fixed_vector(4, 16));
1479 HAddTys.push_back(LLT::fixed_vector(2, 32));
1480 } else
1481 llvm_unreachable("unexpected vector shape");
1483 for (LLT HTy : HAddTys) {
1484 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}, /*HasSideEffects =*/false)
1485 .addUse(HSum);
1486 HSum = UADD.getReg(0);
1487 }
1488
1489 // Post-conditioning.
1490 if (Ty.isScalar() && (Size == 64 || Size == 128))
1491 MIRBuilder.buildZExt(Dst, UADD);
1492 else
1493 UADD->getOperand(0).setReg(Dst);
1494 MI.eraseFromParent();
1495 return true;
1496}
1497
1498bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1500 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1501 LLT s64 = LLT::scalar(64);
1502 auto Addr = MI.getOperand(1).getReg();
1503 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1504 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1505 auto DstLo = MRI.createGenericVirtualRegister(s64);
1506 auto DstHi = MRI.createGenericVirtualRegister(s64);
1507
1509 if (ST->hasLSE()) {
1510 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1511 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1512 // the rest of the MIR so we must reassemble the extracted registers into a
1513 // 128-bit known-regclass one with code like this:
1514 //
1515 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1516 // %out = CASP %in1, ...
1517 // %OldLo = G_EXTRACT %out, 0
1518 // %OldHi = G_EXTRACT %out, 64
1519 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1520 unsigned Opcode;
1521 switch (Ordering) {
1523 Opcode = AArch64::CASPAX;
1524 break;
1526 Opcode = AArch64::CASPLX;
1527 break;
1530 Opcode = AArch64::CASPALX;
1531 break;
1532 default:
1533 Opcode = AArch64::CASPX;
1534 break;
1535 }
1536
1537 LLT s128 = LLT::scalar(128);
1538 auto CASDst = MRI.createGenericVirtualRegister(s128);
1539 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1540 auto CASNew = MRI.createGenericVirtualRegister(s128);
1541 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1542 .addUse(DesiredI->getOperand(0).getReg())
1543 .addImm(AArch64::sube64)
1544 .addUse(DesiredI->getOperand(1).getReg())
1545 .addImm(AArch64::subo64);
1546 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1547 .addUse(NewI->getOperand(0).getReg())
1548 .addImm(AArch64::sube64)
1549 .addUse(NewI->getOperand(1).getReg())
1550 .addImm(AArch64::subo64);
1551
1552 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1553
1554 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1555 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1556 } else {
1557 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1558 // can take arbitrary registers so it just has the normal GPR64 operands the
1559 // rest of AArch64 is expecting.
1560 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1561 unsigned Opcode;
1562 switch (Ordering) {
1564 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1565 break;
1567 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1568 break;
1571 Opcode = AArch64::CMP_SWAP_128;
1572 break;
1573 default:
1574 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1575 break;
1576 }
1577
1578 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1579 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
1580 {Addr, DesiredI->getOperand(0),
1581 DesiredI->getOperand(1), NewI->getOperand(0),
1582 NewI->getOperand(1)});
1583 }
1584
1585 CAS.cloneMemRefs(MI);
1587 *MRI.getTargetRegisterInfo(),
1588 *ST->getRegBankInfo());
1589
1590 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
1591 MI.eraseFromParent();
1592 return true;
1593}
1594
1595bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
1596 LegalizerHelper &Helper) const {
1597 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1598 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1599 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1600 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
1601 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
1602 MI.eraseFromParent();
1603 return true;
1604}
1605
1606bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
1607 LegalizerHelper &Helper) const {
1608 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1609
1610 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
1611 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
1612 // Zext the value operand to 64 bit
1613 auto &Value = MI.getOperand(1);
1614 Register ZExtValueReg =
1615 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1616 Value.setReg(ZExtValueReg);
1617 return true;
1618 }
1619
1620 return false;
1621}
1622
1623bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
1624 LegalizerHelper &Helper) const {
1625 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1626 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1627 Register Dst = MI.getOperand(0).getReg();
1628 LLT DstTy = MRI.getType(Dst);
1629 assert(DstTy.isScalar() && "Only expected scalars right now!");
1630 const unsigned DstSize = DstTy.getSizeInBits();
1631 assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
1632 assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
1633 "Expected homogeneous types!");
1634
1635 // We want to materialize a mask with the high bit set.
1636 uint64_t EltMask;
1637 LLT VecTy;
1638
1639 // TODO: s16 support.
1640 switch (DstSize) {
1641 default:
1642 llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
1643 case 64: {
1644 // AdvSIMD immediate moves cannot materialize out mask in a single
1645 // instruction for 64-bit elements. Instead, materialize zero and then
1646 // negate it.
1647 EltMask = 0;
1648 VecTy = LLT::fixed_vector(2, DstTy);
1649 break;
1650 }
1651 case 32:
1652 EltMask = 0x80000000ULL;
1653 VecTy = LLT::fixed_vector(4, DstTy);
1654 break;
1655 }
1656
1657 // Widen In1 and In2 to 128 bits. We want these to eventually become
1658 // INSERT_SUBREGs.
1659 auto Undef = MIRBuilder.buildUndef(VecTy);
1660 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
1661 auto Ins1 = MIRBuilder.buildInsertVectorElement(
1662 VecTy, Undef, MI.getOperand(1).getReg(), Zero);
1663 auto Ins2 = MIRBuilder.buildInsertVectorElement(
1664 VecTy, Undef, MI.getOperand(2).getReg(), Zero);
1665
1666 // Construct the mask.
1667 auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
1668 if (DstSize == 64)
1669 Mask = MIRBuilder.buildFNeg(VecTy, Mask);
1670
1671 auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask});
1672
1673 // Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
1674 // want this to eventually become an EXTRACT_SUBREG.
1675 SmallVector<Register, 2> DstRegs(1, Dst);
1676 for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
1677 DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
1678 MIRBuilder.buildUnmerge(DstRegs, Sel);
1679 MI.eraseFromParent();
1680 return true;
1681}
unsigned const MachineRegisterInfo * MRI
static void extractParts(Register Reg, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs)
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:233
constexpr bool isScalar() const
Definition: LowLevelType.h:123
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:56
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:133
constexpr bool isVector() const
Definition: LowLevelType.h:129
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:49
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:159
constexpr bool isPointer() const
Definition: LowLevelType.h:125
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:257
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:150
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:187
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:247
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:76
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:196
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
Definition: LowLevelType.h:203
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
LegalizeResult lowerBitCount(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
const TargetMachine & getTargetMachine() const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:234
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
LegalityPredicate isScalar(unsigned TypeIdx)
True iff the specified type index is a scalar.
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:73
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:152
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:409
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...