LLVM 17.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
16#include "AArch64Subtarget.h"
27#include "llvm/IR/Intrinsics.h"
28#include "llvm/IR/IntrinsicsAArch64.h"
29#include "llvm/IR/Type.h"
31#include <initializer_list>
32
33#define DEBUG_TYPE "aarch64-legalinfo"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace LegalizeMutations;
38using namespace LegalityPredicates;
39using namespace MIPatternMatch;
40
42 : ST(&ST) {
43 using namespace TargetOpcode;
44 const LLT p0 = LLT::pointer(0, 64);
45 const LLT s8 = LLT::scalar(8);
46 const LLT s16 = LLT::scalar(16);
47 const LLT s32 = LLT::scalar(32);
48 const LLT s64 = LLT::scalar(64);
49 const LLT s128 = LLT::scalar(128);
50 const LLT v16s8 = LLT::fixed_vector(16, 8);
51 const LLT v8s8 = LLT::fixed_vector(8, 8);
52 const LLT v4s8 = LLT::fixed_vector(4, 8);
53 const LLT v8s16 = LLT::fixed_vector(8, 16);
54 const LLT v4s16 = LLT::fixed_vector(4, 16);
55 const LLT v2s16 = LLT::fixed_vector(2, 16);
56 const LLT v2s32 = LLT::fixed_vector(2, 32);
57 const LLT v4s32 = LLT::fixed_vector(4, 32);
58 const LLT v2s64 = LLT::fixed_vector(2, 64);
59 const LLT v2p0 = LLT::fixed_vector(2, p0);
60
61 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
62 v16s8, v8s16, v4s32,
63 v2s64, v2p0,
64 /* End 128bit types */
65 /* Begin 64bit types */
66 v8s8, v4s16, v2s32};
67
68 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
69
70 // FIXME: support subtargets which have neon/fp-armv8 disabled.
71 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
73 return;
74 }
75
76 // Some instructions only support s16 if the subtarget has full 16-bit FP
77 // support.
78 const bool HasFP16 = ST.hasFullFP16();
79 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
80
81 const bool HasCSSC = ST.hasCSSC();
82 const bool HasRCPC3 = ST.hasRCPC3();
83
84 getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
85 .legalFor({p0, s8, s16, s32, s64})
86 .legalFor(PackedVectorAllTypeList)
88 .clampScalar(0, s8, s64)
90 [=](const LegalityQuery &Query) {
91 return Query.Types[0].isVector() &&
92 (Query.Types[0].getElementType() != s64 ||
93 Query.Types[0].getNumElements() != 2);
94 },
95 [=](const LegalityQuery &Query) {
96 LLT EltTy = Query.Types[0].getElementType();
97 if (EltTy == s64)
98 return std::make_pair(0, LLT::fixed_vector(2, 64));
99 return std::make_pair(0, EltTy);
100 });
101
103 .legalFor({p0, s16, s32, s64})
104 .legalFor(PackedVectorAllTypeList)
106 .clampScalar(0, s16, s64)
107 // Maximum: sN * k = 128
108 .clampMaxNumElements(0, s8, 16)
109 .clampMaxNumElements(0, s16, 8)
110 .clampMaxNumElements(0, s32, 4)
111 .clampMaxNumElements(0, s64, 2)
112 .clampMaxNumElements(0, p0, 2);
113
115 .legalFor({s32, s64, v4s32, v2s32, v2s64})
116 .widenScalarToNextPow2(0)
117 .clampScalar(0, s32, s64);
118
119 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
120 .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
121 .scalarizeIf(
122 [=](const LegalityQuery &Query) {
123 return Query.Opcode == G_MUL && Query.Types[0] == v2s64;
124 },
125 0)
126 .legalFor({v2s64})
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
129 .clampNumElements(0, v2s32, v4s32)
130 .clampNumElements(0, v2s64, v2s64)
132
133 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
134 .customIf([=](const LegalityQuery &Query) {
135 const auto &SrcTy = Query.Types[0];
136 const auto &AmtTy = Query.Types[1];
137 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
138 AmtTy.getSizeInBits() == 32;
139 })
140 .legalFor({
141 {s32, s32},
142 {s32, s64},
143 {s64, s64},
144 {v8s8, v8s8},
145 {v16s8, v16s8},
146 {v4s16, v4s16},
147 {v8s16, v8s16},
148 {v2s32, v2s32},
149 {v4s32, v4s32},
150 {v2s64, v2s64},
151 })
152 .widenScalarToNextPow2(0)
153 .clampScalar(1, s32, s64)
154 .clampScalar(0, s32, s64)
155 .clampNumElements(0, v2s32, v4s32)
156 .clampNumElements(0, v2s64, v2s64)
158 .minScalarSameAs(1, 0);
159
161 .legalFor({{p0, s64}, {v2p0, v2s64}})
162 .clampScalar(1, s64, s64);
163
164 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
165
166 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
167 .legalFor({s32, s64})
168 .libcallFor({s128})
169 .clampScalar(0, s32, s64)
171 .scalarize(0);
172
173 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
174 .lowerFor({s8, s16, s32, s64, v2s64, v4s32, v2s32})
176 .clampScalarOrElt(0, s32, s64)
177 .clampNumElements(0, v2s32, v4s32)
178 .clampNumElements(0, v2s64, v2s64)
179 .moreElementsToNextPow2(0);
180
181
182 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
183 .widenScalarToNextPow2(0, /*Min = */ 32)
184 .clampScalar(0, s32, s64)
185 .lower();
186
187 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
188 .legalFor({s64, v8s16, v16s8, v4s32})
189 .lower();
190
191 auto &MinMaxActions = getActionDefinitionsBuilder(
192 {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
193 if (HasCSSC)
194 MinMaxActions
195 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
196 // Making clamping conditional on CSSC extension as without legal types we
197 // lower to CMP which can fold one of the two sxtb's we'd otherwise need
198 // if we detect a type smaller than 32-bit.
199 .minScalar(0, s32);
200 else
201 MinMaxActions
202 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
203 MinMaxActions
204 .clampNumElements(0, v8s8, v16s8)
205 .clampNumElements(0, v4s16, v8s16)
206 .clampNumElements(0, v2s32, v4s32)
207 // FIXME: This sholdn't be needed as v2s64 types are going to
208 // be expanded anyway, but G_ICMP doesn't support splitting vectors yet
209 .clampNumElements(0, v2s64, v2s64)
210 .lower();
211
213 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
214 .legalFor({{s32, s32}, {s64, s32}})
215 .clampScalar(0, s32, s64)
216 .clampScalar(1, s32, s64)
218
219 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
220 .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
221 .clampScalar(0, MinFPScalar, s64)
222 .clampNumElements(0, v2s32, v4s32)
223 .clampNumElements(0, v2s64, v2s64);
224
225 getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
226
227 getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
228 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
229 G_FNEARBYINT, G_INTRINSIC_LRINT})
230 // If we don't have full FP16 support, then scalarize the elements of
231 // vectors containing fp16 types.
232 .fewerElementsIf(
233 [=, &ST](const LegalityQuery &Query) {
234 const auto &Ty = Query.Types[0];
235 return Ty.isVector() && Ty.getElementType() == s16 &&
236 !ST.hasFullFP16();
237 },
238 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
239 // If we don't have full FP16 support, then widen s16 to s32 if we
240 // encounter it.
241 .widenScalarIf(
242 [=, &ST](const LegalityQuery &Query) {
243 return Query.Types[0] == s16 && !ST.hasFullFP16();
244 },
245 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
246 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
247
249 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
250 // We need a call for these, so we always need to scalarize.
251 .scalarize(0)
252 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
253 .minScalar(0, s32)
254 .libcallFor({s32, s64, v2s32, v4s32, v2s64});
255
257 .legalIf(all(typeInSet(0, {s32, s64, p0}),
258 typeInSet(1, {s8, s16, s32}), smallerThan(1, 0)))
260 .clampScalar(0, s32, s64)
262 .minScalar(1, s8)
263 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
264 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
265
267 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
268 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
270 .clampScalar(1, s32, s128)
272 .minScalar(0, s16)
273 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
274 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
275 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
276
277
278 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
279 auto &Actions = getActionDefinitionsBuilder(Op);
280
281 if (Op == G_SEXTLOAD)
283
284 // Atomics have zero extending behavior.
285 Actions
286 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
287 {s32, p0, s16, 8},
288 {s32, p0, s32, 8},
289 {s64, p0, s8, 2},
290 {s64, p0, s16, 2},
291 {s64, p0, s32, 4},
292 {s64, p0, s64, 8},
293 {p0, p0, s64, 8},
294 {v2s32, p0, s64, 8}})
295 .widenScalarToNextPow2(0)
296 .clampScalar(0, s32, s64)
297 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
298 // how to do that yet.
299 .unsupportedIfMemSizeNotPow2()
300 // Lower anything left over into G_*EXT and G_LOAD
301 .lower();
302 }
303
304 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
305 const LLT &ValTy = Query.Types[0];
306 if (!ValTy.isVector())
307 return false;
308 const LLT EltTy = ValTy.getElementType();
309 return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
310 };
311
313 .customIf([=](const LegalityQuery &Query) {
314 return HasRCPC3 && Query.Types[0] == s128 &&
315 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
316 })
317 .customIf([=](const LegalityQuery &Query) {
318 return Query.Types[0] == s128 &&
319 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
320 })
321 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
322 {s16, p0, s16, 8},
323 {s32, p0, s32, 8},
324 {s64, p0, s64, 8},
325 {p0, p0, s64, 8},
326 {s128, p0, s128, 8},
327 {v8s8, p0, s64, 8},
328 {v16s8, p0, s128, 8},
329 {v4s16, p0, s64, 8},
330 {v8s16, p0, s128, 8},
331 {v2s32, p0, s64, 8},
332 {v4s32, p0, s128, 8},
333 {v2s64, p0, s128, 8}})
334 // These extends are also legal
335 .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
336 .widenScalarToNextPow2(0, /* MinSize = */ 8)
338 .clampScalar(0, s8, s64)
340 [=](const LegalityQuery &Query) {
341 // Clamp extending load results to 32-bits.
342 return Query.Types[0].isScalar() &&
343 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
344 Query.Types[0].getSizeInBits() > 32;
345 },
346 changeTo(0, s32))
347 .clampMaxNumElements(0, s8, 16)
348 .clampMaxNumElements(0, s16, 8)
349 .clampMaxNumElements(0, s32, 4)
350 .clampMaxNumElements(0, s64, 2)
351 .clampMaxNumElements(0, p0, 2)
352 .customIf(IsPtrVecPred)
353 .scalarizeIf(typeIs(0, v2s16), 0);
354
356 .customIf([=](const LegalityQuery &Query) {
357 return HasRCPC3 && Query.Types[0] == s128 &&
358 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
359 })
360 .customIf([=](const LegalityQuery &Query) {
361 return Query.Types[0] == s128 &&
362 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
363 })
364 .legalForTypesWithMemDesc(
365 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
366 {s32, p0, s8, 8}, // truncstorei8 from s32
367 {s64, p0, s8, 8}, // truncstorei8 from s64
368 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
369 {s64, p0, s16, 8}, // truncstorei16 from s64
370 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
371 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
372 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
373 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
374 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
375 .clampScalar(0, s8, s64)
376 .lowerIf([=](const LegalityQuery &Query) {
377 return Query.Types[0].isScalar() &&
378 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
379 })
380 // Maximum: sN * k = 128
381 .clampMaxNumElements(0, s8, 16)
382 .clampMaxNumElements(0, s16, 8)
383 .clampMaxNumElements(0, s32, 4)
384 .clampMaxNumElements(0, s64, 2)
385 .clampMaxNumElements(0, p0, 2)
387 .customIf(IsPtrVecPred)
388 .scalarizeIf(typeIs(0, v2s16), 0);
389
390 // Constants
392 .legalFor({p0, s8, s16, s32, s64})
393 .widenScalarToNextPow2(0)
394 .clampScalar(0, s8, s64);
395 getActionDefinitionsBuilder(G_FCONSTANT)
396 .legalIf([=](const LegalityQuery &Query) {
397 const auto &Ty = Query.Types[0];
398 if (HasFP16 && Ty == s16)
399 return true;
400 return Ty == s32 || Ty == s64 || Ty == s128;
401 })
402 .clampScalar(0, MinFPScalar, s128);
403
404 getActionDefinitionsBuilder({G_ICMP, G_FCMP})
405 .legalFor({{s32, s32},
406 {s32, s64},
407 {s32, p0},
408 {v4s32, v4s32},
409 {v2s32, v2s32},
410 {v2s64, v2s64},
411 {v2s64, v2p0},
412 {v4s16, v4s16},
413 {v8s16, v8s16},
414 {v8s8, v8s8},
415 {v16s8, v16s8}})
417 .clampScalar(1, s32, s64)
418 .clampScalar(0, s32, s32)
419 .minScalarEltSameAsIf(
420 [=](const LegalityQuery &Query) {
421 const LLT &Ty = Query.Types[0];
422 const LLT &SrcTy = Query.Types[1];
423 return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
424 Ty.getElementType() != SrcTy.getElementType();
425 },
426 0, 1)
427 .minScalarOrEltIf(
428 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
429 1, s32)
430 .minScalarOrEltIf(
431 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
432 s64)
433 .clampNumElements(0, v2s32, v4s32);
434
435 // Extensions
436 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
437 unsigned DstSize = Query.Types[0].getSizeInBits();
438
439 if (DstSize == 128 && !Query.Types[0].isVector())
440 return false; // Extending to a scalar s128 needs narrowing.
441
442 // Make sure that we have something that will fit in a register, and
443 // make sure it's a power of 2.
444 if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
445 return false;
446
447 const LLT &SrcTy = Query.Types[1];
448
449 // Make sure we fit in a register otherwise. Don't bother checking that
450 // the source type is below 128 bits. We shouldn't be allowing anything
451 // through which is wider than the destination in the first place.
452 unsigned SrcSize = SrcTy.getSizeInBits();
453 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
454 return false;
455
456 return true;
457 };
458 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
459 .legalIf(ExtLegalFunc)
460 .clampScalar(0, s64, s64); // Just for s128, others are handled above.
461
464 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
465 0, s8)
466 .customIf([=](const LegalityQuery &Query) {
467 LLT DstTy = Query.Types[0];
468 LLT SrcTy = Query.Types[1];
469 return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
470 })
471 .alwaysLegal();
472
473 getActionDefinitionsBuilder(G_SEXT_INREG)
474 .legalFor({s32, s64})
475 .legalFor(PackedVectorAllTypeList)
476 .lower();
477
478 // FP conversions
480 .legalFor(
481 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
482 .clampMaxNumElements(0, s32, 2);
484 .legalFor(
485 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
486 .clampMaxNumElements(0, s64, 2);
487
488 // Conversions
489 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
490 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
491 .widenScalarToNextPow2(0)
492 .clampScalar(0, s32, s64)
494 .clampScalar(1, s32, s64);
495
496 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
497 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
498 .clampScalar(1, s32, s64)
499 .minScalarSameAs(1, 0)
500 .clampScalar(0, s32, s64)
502
503 // Control-flow
505 .legalFor({s32})
506 .clampScalar(0, s32, s32);
507 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
508
510 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
511 .widenScalarToNextPow2(0)
512 .clampScalar(0, s32, s64)
513 .clampScalar(1, s32, s32)
515 .lowerIf(isVector(0));
516
517 // Pointer-handling
518 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
519
520 if (TM.getCodeModel() == CodeModel::Small)
521 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
522 else
523 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
524
526 .legalFor({{s64, p0}, {v2s64, v2p0}})
527 .widenScalarToNextPow2(0, 64)
528 .clampScalar(0, s64, s64);
529
531 .unsupportedIf([&](const LegalityQuery &Query) {
532 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
533 })
534 .legalFor({{p0, s64}, {v2p0, v2s64}});
535
536 // Casts for 32 and 64-bit width type are just copies.
537 // Same for 128-bit width type, except they are on the FPR bank.
539 // FIXME: This is wrong since G_BITCAST is not allowed to change the
540 // number of bits but it's what the previous code described and fixing
541 // it breaks tests.
542 .legalForCartesianProduct({s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
543 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
544 v2p0});
545
546 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
547
548 // va_list must be a pointer, but most sized types are pretty easy to handle
549 // as the destination.
551 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
552 .clampScalar(0, s8, s64)
553 .widenScalarToNextPow2(0, /*Min*/ 8);
554
555 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
556 .lowerIf(
557 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
558
559 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
560 .customIf([](const LegalityQuery &Query) {
561 return Query.Types[0].getSizeInBits() == 128;
562 })
563 .clampScalar(0, s32, s64)
564 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
565
567 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
568 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
569 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
570 .clampScalar(0, s32, s64)
571 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
572
573 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
574
575 // Merge/Unmerge
576 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
577 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
578 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
580 .widenScalarToNextPow2(LitTyIdx, 8)
581 .widenScalarToNextPow2(BigTyIdx, 32)
582 .clampScalar(LitTyIdx, s8, s64)
583 .clampScalar(BigTyIdx, s32, s128)
584 .legalIf([=](const LegalityQuery &Q) {
585 switch (Q.Types[BigTyIdx].getSizeInBits()) {
586 case 32:
587 case 64:
588 case 128:
589 break;
590 default:
591 return false;
592 }
593 switch (Q.Types[LitTyIdx].getSizeInBits()) {
594 case 8:
595 case 16:
596 case 32:
597 case 64:
598 return true;
599 default:
600 return false;
601 }
602 });
603 }
604
605 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
606 .unsupportedIf([=](const LegalityQuery &Query) {
607 const LLT &EltTy = Query.Types[1].getElementType();
608 return Query.Types[0] != EltTy;
609 })
610 .minScalar(2, s64)
611 .legalIf([=](const LegalityQuery &Query) {
612 const LLT &VecTy = Query.Types[1];
613 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
614 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
615 VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
616 VecTy == v2p0;
617 })
618 .minScalarOrEltIf(
619 [=](const LegalityQuery &Query) {
620 // We want to promote to <M x s1> to <M x s64> if that wouldn't
621 // cause the total vec size to be > 128b.
622 return Query.Types[1].getNumElements() <= 2;
623 },
624 0, s64)
625 .minScalarOrEltIf(
626 [=](const LegalityQuery &Query) {
627 return Query.Types[1].getNumElements() <= 4;
628 },
629 0, s32)
630 .minScalarOrEltIf(
631 [=](const LegalityQuery &Query) {
632 return Query.Types[1].getNumElements() <= 8;
633 },
634 0, s16)
635 .minScalarOrEltIf(
636 [=](const LegalityQuery &Query) {
637 return Query.Types[1].getNumElements() <= 16;
638 },
639 0, s8)
640 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
641 .clampMaxNumElements(1, s64, 2)
642 .clampMaxNumElements(1, s32, 4)
643 .clampMaxNumElements(1, s16, 8)
644 .clampMaxNumElements(1, p0, 2);
645
646 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
647 .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
648
649 getActionDefinitionsBuilder(G_BUILD_VECTOR)
650 .legalFor({{v8s8, s8},
651 {v16s8, s8},
652 {v2s16, s16},
653 {v4s16, s16},
654 {v8s16, s16},
655 {v2s32, s32},
656 {v4s32, s32},
657 {v2p0, p0},
658 {v2s64, s64}})
659 .clampNumElements(0, v4s32, v4s32)
660 .clampNumElements(0, v2s64, v2s64)
661 .minScalarOrElt(0, s8)
662 .minScalarSameAs(1, 0);
663
664 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
665
668 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
669 .scalarize(1);
670 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
671
672 // TODO: Custom lowering for v2s32, v4s32, v2s64.
673 getActionDefinitionsBuilder(G_BITREVERSE)
674 .legalFor({s32, s64, v8s8, v16s8})
675 .widenScalarToNextPow2(0, /*Min = */ 32)
676 .clampScalar(0, s32, s64);
677
678 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
679
681 .lowerIf(isVector(0))
682 .clampScalar(0, s32, s64)
683 .scalarSameSizeAs(1, 0)
684 .legalIf([=](const LegalityQuery &Query) {
685 return (HasCSSC && typeInSet(0, {s32, s64})(Query));
686 })
687 .customIf([=](const LegalityQuery &Query) {
688 return (!HasCSSC && typeInSet(0, {s32, s64})(Query));
689 });
690
691 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
692 .legalIf([=](const LegalityQuery &Query) {
693 const LLT &DstTy = Query.Types[0];
694 const LLT &SrcTy = Query.Types[1];
695 // For now just support the TBL2 variant which needs the source vectors
696 // to be the same size as the dest.
697 if (DstTy != SrcTy)
698 return false;
699 return llvm::is_contained({v2s32, v4s32, v2s64, v2p0, v16s8, v8s16},
700 DstTy);
701 })
702 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
703 // just want those lowered into G_BUILD_VECTOR
704 .lowerIf([=](const LegalityQuery &Query) {
705 return !Query.Types[1].isVector();
706 })
707 .moreElementsIf(
708 [](const LegalityQuery &Query) {
709 return Query.Types[0].isVector() && Query.Types[1].isVector() &&
710 Query.Types[0].getNumElements() >
711 Query.Types[1].getNumElements();
712 },
713 changeTo(1, 0))
715 .clampNumElements(0, v4s32, v4s32)
716 .clampNumElements(0, v2s64, v2s64);
717
718 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
719 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
720
721 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
722
723 getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
724 return Query.Types[0] == p0 && Query.Types[1] == s64;
725 });
726
727 getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();
728
729 if (ST.hasMOPS()) {
730 // G_BZERO is not supported. Currently it is only emitted by
731 // PreLegalizerCombiner for G_MEMSET with zero constant.
733
735 .legalForCartesianProduct({p0}, {s64}, {s64})
736 .customForCartesianProduct({p0}, {s8}, {s64})
737 .immIdx(0); // Inform verifier imm idx 0 is handled.
738
739 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
740 .legalForCartesianProduct({p0}, {p0}, {s64})
741 .immIdx(0); // Inform verifier imm idx 0 is handled.
742
743 // G_MEMCPY_INLINE does not have a tailcall immediate
744 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
745 .legalForCartesianProduct({p0}, {p0}, {s64});
746
747 } else {
748 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
749 .libcall();
750 }
751
752 // FIXME: Legal vector types are only legal with NEON.
753 auto &ABSActions = getActionDefinitionsBuilder(G_ABS);
754 if (HasCSSC)
755 ABSActions
756 .legalFor({s32, s64});
757 ABSActions
758 .legalFor(PackedVectorAllTypeList)
759 .lowerIf(isScalar(0));
760
761 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
762 // We only have FADDP to do reduction-like operations. Lower the rest.
763 .legalFor({{s32, v2s32}, {s64, v2s64}})
764 .clampMaxNumElements(1, s64, 2)
765 .clampMaxNumElements(1, s32, 2)
766 .lower();
767
768 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
769 .legalFor(
770 {{s8, v16s8}, {s16, v8s16}, {s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
771 .clampMaxNumElements(1, s64, 2)
772 .clampMaxNumElements(1, s32, 4)
773 .lower();
774
776 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
777 // Try to break down into smaller vectors as long as they're at least 64
778 // bits. This lets us use vector operations for some parts of the
779 // reduction.
780 .fewerElementsIf(
781 [=](const LegalityQuery &Q) {
782 LLT SrcTy = Q.Types[1];
783 if (SrcTy.isScalar())
784 return false;
786 return false;
787 // We can usually perform 64b vector operations.
788 return SrcTy.getSizeInBits() > 64;
789 },
790 [=](const LegalityQuery &Q) {
791 LLT SrcTy = Q.Types[1];
792 return std::make_pair(1, SrcTy.divide(2));
793 })
794 .scalarize(1)
795 .lower();
796
797 getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
798 .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
799
800 getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
801
803 .legalFor({{s32, s64}, {s64, s64}})
804 .customIf([=](const LegalityQuery &Q) {
805 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
806 })
807 .lower();
809
810 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
811 .customFor({{s32, s32}, {s64, s64}});
812
813 auto always = [=](const LegalityQuery &Q) { return true; };
814 auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
815 if (HasCSSC)
816 CTPOPActions
817 .legalFor({{s32, s32},
818 {s64, s64},
819 {v8s8, v8s8},
820 {v16s8, v16s8}})
821 .customFor({{s128, s128},
822 {v2s64, v2s64},
823 {v2s32, v2s32},
824 {v4s32, v4s32},
825 {v4s16, v4s16},
826 {v8s16, v8s16}});
827 else
828 CTPOPActions
829 .legalFor({{v8s8, v8s8},
830 {v16s8, v16s8}})
831 .customFor({{s32, s32},
832 {s64, s64},
833 {s128, s128},
834 {v2s64, v2s64},
835 {v2s32, v2s32},
836 {v4s32, v4s32},
837 {v4s16, v4s16},
838 {v8s16, v8s16}});
839 CTPOPActions
840 .clampScalar(0, s32, s128)
841 .widenScalarToNextPow2(0)
842 .minScalarEltSameAsIf(always, 1, 0)
843 .maxScalarEltSameAsIf(always, 1, 0);
844
845 // TODO: Vector types.
846 getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
847
848 // TODO: Vector types.
849 getActionDefinitionsBuilder({G_FMAXNUM, G_FMINNUM})
850 .legalFor({MinFPScalar, s32, s64})
851 .libcallFor({s128})
852 .minScalar(0, MinFPScalar);
853
854 getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
855 .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
856 .legalIf([=](const LegalityQuery &Query) {
857 const auto &Ty = Query.Types[0];
858 return (Ty == v8s16 || Ty == v4s16) && HasFP16;
859 })
860 .minScalar(0, MinFPScalar)
861 .clampNumElements(0, v4s16, v8s16)
862 .clampNumElements(0, v2s32, v4s32)
863 .clampNumElements(0, v2s64, v2s64);
864
865 // TODO: Libcall support for s128.
866 // TODO: s16 should be legal with full FP16 support.
867 getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
868 .legalFor({{s64, s32}, {s64, s64}});
869
870 // TODO: Custom legalization for vector types.
871 // TODO: Custom legalization for mismatched types.
872 // TODO: s16 support.
873 getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
874
876
878 verify(*ST.getInstrInfo());
879}
880
882 MachineInstr &MI) const {
883 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
884 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
885 GISelChangeObserver &Observer = Helper.Observer;
886 switch (MI.getOpcode()) {
887 default:
888 // No idea what to do.
889 return false;
890 case TargetOpcode::G_VAARG:
891 return legalizeVaArg(MI, MRI, MIRBuilder);
892 case TargetOpcode::G_LOAD:
893 case TargetOpcode::G_STORE:
894 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
895 case TargetOpcode::G_SHL:
896 case TargetOpcode::G_ASHR:
897 case TargetOpcode::G_LSHR:
898 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
899 case TargetOpcode::G_GLOBAL_VALUE:
900 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
901 case TargetOpcode::G_TRUNC:
902 return legalizeVectorTrunc(MI, Helper);
903 case TargetOpcode::G_SBFX:
904 case TargetOpcode::G_UBFX:
905 return legalizeBitfieldExtract(MI, MRI, Helper);
906 case TargetOpcode::G_ROTR:
907 return legalizeRotate(MI, MRI, Helper);
908 case TargetOpcode::G_CTPOP:
909 return legalizeCTPOP(MI, MRI, Helper);
910 case TargetOpcode::G_ATOMIC_CMPXCHG:
911 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
912 case TargetOpcode::G_CTTZ:
913 return legalizeCTTZ(MI, Helper);
914 case TargetOpcode::G_BZERO:
915 case TargetOpcode::G_MEMCPY:
916 case TargetOpcode::G_MEMMOVE:
917 case TargetOpcode::G_MEMSET:
918 return legalizeMemOps(MI, Helper);
919 case TargetOpcode::G_FCOPYSIGN:
920 return legalizeFCopySign(MI, Helper);
921 }
922
923 llvm_unreachable("expected switch to return");
924}
925
926bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
928 LegalizerHelper &Helper) const {
929 // To allow for imported patterns to match, we ensure that the rotate amount
930 // is 64b with an extension.
931 Register AmtReg = MI.getOperand(2).getReg();
932 LLT AmtTy = MRI.getType(AmtReg);
933 (void)AmtTy;
934 assert(AmtTy.isScalar() && "Expected a scalar rotate");
935 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
936 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
937 Helper.Observer.changingInstr(MI);
938 MI.getOperand(2).setReg(NewAmt.getReg(0));
939 Helper.Observer.changedInstr(MI);
940 return true;
941}
942
944 MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
946 for (int I = 0; I < NumParts; ++I)
947 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
948 MIRBuilder.buildUnmerge(VRegs, Reg);
949}
950
951bool AArch64LegalizerInfo::legalizeVectorTrunc(
952 MachineInstr &MI, LegalizerHelper &Helper) const {
953 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
954 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
955 // Similar to how operand splitting is done in SelectiondDAG, we can handle
956 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
957 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
958 // %lo16(<4 x s16>) = G_TRUNC %inlo
959 // %hi16(<4 x s16>) = G_TRUNC %inhi
960 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
961 // %res(<8 x s8>) = G_TRUNC %in16
962
963 Register DstReg = MI.getOperand(0).getReg();
964 Register SrcReg = MI.getOperand(1).getReg();
965 LLT DstTy = MRI.getType(DstReg);
966 LLT SrcTy = MRI.getType(SrcReg);
969
970 // Split input type.
971 LLT SplitSrcTy =
973 // First, split the source into two smaller vectors.
974 SmallVector<Register, 2> SplitSrcs;
975 extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
976
977 // Truncate the splits into intermediate narrower elements.
978 LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
979 for (unsigned I = 0; I < SplitSrcs.size(); ++I)
980 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
981
982 auto Concat = MIRBuilder.buildConcatVectors(
983 DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
984
985 Helper.Observer.changingInstr(MI);
986 MI.getOperand(1).setReg(Concat.getReg(0));
987 Helper.Observer.changedInstr(MI);
988 return true;
989}
990
991bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
993 GISelChangeObserver &Observer) const {
994 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
995 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
996 // G_ADD_LOW instructions.
997 // By splitting this here, we can optimize accesses in the small code model by
998 // folding in the G_ADD_LOW into the load/store offset.
999 auto &GlobalOp = MI.getOperand(1);
1000 const auto* GV = GlobalOp.getGlobal();
1001 if (GV->isThreadLocal())
1002 return true; // Don't want to modify TLS vars.
1003
1004 auto &TM = ST->getTargetLowering()->getTargetMachine();
1005 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1006
1007 if (OpFlags & AArch64II::MO_GOT)
1008 return true;
1009
1010 auto Offset = GlobalOp.getOffset();
1011 Register DstReg = MI.getOperand(0).getReg();
1012 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1013 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1014 // Set the regclass on the dest reg too.
1015 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1016
1017 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1018 // by creating a MOVK that sets bits 48-63 of the register to (global address
1019 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1020 // prevent an incorrect tag being generated during relocation when the the
1021 // global appears before the code section. Without the offset, a global at
1022 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1023 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1024 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1025 // instead of `0xf`.
1026 // This assumes that we're in the small code model so we can assume a binary
1027 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1028 // binary must also be loaded into address range [0, 2^48). Both of these
1029 // properties need to be ensured at runtime when using tagged addresses.
1030 if (OpFlags & AArch64II::MO_TAGGED) {
1031 assert(!Offset &&
1032 "Should not have folded in an offset for a tagged global!");
1033 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1034 .addGlobalAddress(GV, 0x100000000,
1036 .addImm(48);
1037 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1038 }
1039
1040 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1041 .addGlobalAddress(GV, Offset,
1043 MI.eraseFromParent();
1044 return true;
1045}
1046
1048 MachineInstr &MI) const {
1049 switch (MI.getIntrinsicID()) {
1050 case Intrinsic::vacopy: {
1051 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1052 unsigned VaListSize =
1053 (ST->isTargetDarwin() || ST->isTargetWindows())
1054 ? PtrSize
1055 : ST->isTargetILP32() ? 20 : 32;
1056
1057 MachineFunction &MF = *MI.getMF();
1059 LLT::scalar(VaListSize * 8));
1060 MachineIRBuilder MIB(MI);
1061 MIB.buildLoad(Val, MI.getOperand(2),
1064 VaListSize, Align(PtrSize)));
1065 MIB.buildStore(Val, MI.getOperand(1),
1068 VaListSize, Align(PtrSize)));
1069 MI.eraseFromParent();
1070 return true;
1071 }
1072 case Intrinsic::get_dynamic_area_offset: {
1073 MachineIRBuilder &MIB = Helper.MIRBuilder;
1074 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1075 MI.eraseFromParent();
1076 return true;
1077 }
1078 case Intrinsic::aarch64_mops_memset_tag: {
1079 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1080 // Zext the value to 64 bit
1081 MachineIRBuilder MIB(MI);
1082 auto &Value = MI.getOperand(3);
1083 Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1084 Value.setReg(ZExtValueReg);
1085 return true;
1086 }
1087 case Intrinsic::prefetch: {
1088 MachineIRBuilder MIB(MI);
1089 auto &AddrVal = MI.getOperand(1);
1090
1091 int64_t IsWrite = MI.getOperand(2).getImm();
1092 int64_t Locality = MI.getOperand(3).getImm();
1093 int64_t IsData = MI.getOperand(4).getImm();
1094
1095 bool IsStream = Locality == 0;
1096 if (Locality != 0) {
1097 assert(Locality <= 3 && "Prefetch locality out-of-range");
1098 // The locality degree is the opposite of the cache speed.
1099 // Put the number the other way around.
1100 // The encoding starts at 0 for level 1
1101 Locality = 3 - Locality;
1102 }
1103
1104 unsigned PrfOp =
1105 (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
1106
1107 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1108 MI.eraseFromParent();
1109 return true;
1110 }
1111 case Intrinsic::aarch64_prefetch: {
1112 MachineIRBuilder MIB(MI);
1113 auto &AddrVal = MI.getOperand(1);
1114
1115 int64_t IsWrite = MI.getOperand(2).getImm();
1116 int64_t Target = MI.getOperand(3).getImm();
1117 int64_t IsStream = MI.getOperand(4).getImm();
1118 int64_t IsData = MI.getOperand(5).getImm();
1119
1120 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1121 (!IsData << 3) | // IsDataCache bit
1122 (Target << 1) | // Cache level bits
1123 (unsigned)IsStream; // Stream bit
1124
1125 MIB.buildInstr(AArch64::G_PREFETCH).addImm(PrfOp).add(AddrVal);
1126 MI.eraseFromParent();
1127 return true;
1128 }
1129 }
1130
1131 return true;
1132}
1133
1134bool AArch64LegalizerInfo::legalizeShlAshrLshr(
1136 GISelChangeObserver &Observer) const {
1137 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
1138 MI.getOpcode() == TargetOpcode::G_LSHR ||
1139 MI.getOpcode() == TargetOpcode::G_SHL);
1140 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
1141 // imported patterns can select it later. Either way, it will be legal.
1142 Register AmtReg = MI.getOperand(2).getReg();
1143 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
1144 if (!VRegAndVal)
1145 return true;
1146 // Check the shift amount is in range for an immediate form.
1147 int64_t Amount = VRegAndVal->Value.getSExtValue();
1148 if (Amount > 31)
1149 return true; // This will have to remain a register variant.
1150 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
1151 Observer.changingInstr(MI);
1152 MI.getOperand(2).setReg(ExtCst.getReg(0));
1153 Observer.changedInstr(MI);
1154 return true;
1155}
1156
1159 Base = Root;
1160 Offset = 0;
1161
1162 Register NewBase;
1163 int64_t NewOffset;
1164 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
1165 isShiftedInt<7, 3>(NewOffset)) {
1166 Base = NewBase;
1167 Offset = NewOffset;
1168 }
1169}
1170
1171// FIXME: This should be removed and replaced with the generic bitcast legalize
1172// action.
1173bool AArch64LegalizerInfo::legalizeLoadStore(
1175 GISelChangeObserver &Observer) const {
1176 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
1177 MI.getOpcode() == TargetOpcode::G_LOAD);
1178 // Here we just try to handle vector loads/stores where our value type might
1179 // have pointer elements, which the SelectionDAG importer can't handle. To
1180 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
1181 // the value to use s64 types.
1182
1183 // Custom legalization requires the instruction, if not deleted, must be fully
1184 // legalized. In order to allow further legalization of the inst, we create
1185 // a new instruction and erase the existing one.
1186
1187 Register ValReg = MI.getOperand(0).getReg();
1188 const LLT ValTy = MRI.getType(ValReg);
1189
1190 if (ValTy == LLT::scalar(128)) {
1191
1192 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
1193 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
1194 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
1195 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
1196 bool IsRcpC3 =
1197 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
1198
1199 LLT s64 = LLT::scalar(64);
1200
1201 unsigned Opcode;
1202 if (IsRcpC3) {
1203 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
1204 } else {
1205 // For LSE2, loads/stores should have been converted to monotonic and had
1206 // a fence inserted after them.
1207 assert(Ordering == AtomicOrdering::Monotonic ||
1208 Ordering == AtomicOrdering::Unordered);
1209 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
1210
1211 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
1212 }
1213
1215 if (IsLoad) {
1216 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
1217 MIRBuilder.buildMergeLikeInstr(
1218 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
1219 } else {
1220 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
1221 NewI = MIRBuilder.buildInstr(
1222 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
1223 }
1224
1225 if (IsRcpC3) {
1226 NewI.addUse(MI.getOperand(1).getReg());
1227 } else {
1228 Register Base;
1229 int Offset;
1230 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
1231 NewI.addUse(Base);
1232 NewI.addImm(Offset / 8);
1233 }
1234
1235 NewI.cloneMemRefs(MI);
1237 *MRI.getTargetRegisterInfo(),
1238 *ST->getRegBankInfo());
1239 MI.eraseFromParent();
1240 return true;
1241 }
1242
1243 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
1244 ValTy.getElementType().getAddressSpace() != 0) {
1245 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
1246 return false;
1247 }
1248
1249 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
1250 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
1251 auto &MMO = **MI.memoperands_begin();
1252 MMO.setType(NewTy);
1253
1254 if (MI.getOpcode() == TargetOpcode::G_STORE) {
1255 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
1256 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
1257 } else {
1258 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
1259 MIRBuilder.buildBitcast(ValReg, NewLoad);
1260 }
1261 MI.eraseFromParent();
1262 return true;
1263}
1264
1265bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
1267 MachineIRBuilder &MIRBuilder) const {
1268 MachineFunction &MF = MIRBuilder.getMF();
1269 Align Alignment(MI.getOperand(2).getImm());
1270 Register Dst = MI.getOperand(0).getReg();
1271 Register ListPtr = MI.getOperand(1).getReg();
1272
1273 LLT PtrTy = MRI.getType(ListPtr);
1274 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
1275
1276 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
1277 const Align PtrAlign = Align(PtrSize);
1278 auto List = MIRBuilder.buildLoad(
1279 PtrTy, ListPtr,
1281 PtrTy, PtrAlign));
1282
1283 MachineInstrBuilder DstPtr;
1284 if (Alignment > PtrAlign) {
1285 // Realign the list to the actual required alignment.
1286 auto AlignMinus1 =
1287 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
1288 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
1289 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
1290 } else
1291 DstPtr = List;
1292
1293 LLT ValTy = MRI.getType(Dst);
1294 uint64_t ValSize = ValTy.getSizeInBits() / 8;
1295 MIRBuilder.buildLoad(
1296 Dst, DstPtr,
1298 ValTy, std::max(Alignment, PtrAlign)));
1299
1300 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
1301
1302 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
1303
1304 MIRBuilder.buildStore(NewList, ListPtr,
1307 PtrTy, PtrAlign));
1308
1309 MI.eraseFromParent();
1310 return true;
1311}
1312
1313bool AArch64LegalizerInfo::legalizeBitfieldExtract(
1315 // Only legal if we can select immediate forms.
1316 // TODO: Lower this otherwise.
1317 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
1318 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
1319}
1320
1321bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
1323 LegalizerHelper &Helper) const {
1324 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
1325 // it can be more efficiently lowered to the following sequence that uses
1326 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
1327 // registers are cheap.
1328 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
1329 // CNT V0.8B, V0.8B // 8xbyte pop-counts
1330 // ADDV B0, V0.8B // sum 8xbyte pop-counts
1331 // UMOV X0, V0.B[0] // copy byte result back to integer reg
1332 //
1333 // For 128 bit vector popcounts, we lower to the following sequence:
1334 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
1335 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
1336 // uaddlp.4s v0, v0 // v4s32, v2s64
1337 // uaddlp.2d v0, v0 // v2s64
1338 //
1339 // For 64 bit vector popcounts, we lower to the following sequence:
1340 // cnt.8b v0, v0 // v4s16, v2s32
1341 // uaddlp.4h v0, v0 // v4s16, v2s32
1342 // uaddlp.2s v0, v0 // v2s32
1343
1344 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1345 Register Dst = MI.getOperand(0).getReg();
1346 Register Val = MI.getOperand(1).getReg();
1347 LLT Ty = MRI.getType(Val);
1348 unsigned Size = Ty.getSizeInBits();
1349
1350 assert(Ty == MRI.getType(Dst) &&
1351 "Expected src and dst to have the same type!");
1352
1353 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
1354 LLT s64 = LLT::scalar(64);
1355
1356 auto Split = MIRBuilder.buildUnmerge(s64, Val);
1357 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
1358 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
1359 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
1360
1361 MIRBuilder.buildZExt(Dst, Add);
1362 MI.eraseFromParent();
1363 return true;
1364 }
1365
1366 if (!ST->hasNEON() ||
1367 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
1368 // Use generic lowering when custom lowering is not possible.
1369 return Ty.isScalar() && (Size == 32 || Size == 64) &&
1370 Helper.lowerBitCount(MI) ==
1372 }
1373
1374 // Pre-conditioning: widen Val up to the nearest vector type.
1375 // s32,s64,v4s16,v2s32 -> v8i8
1376 // v8s16,v4s32,v2s64 -> v16i8
1377 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
1378 if (Ty.isScalar()) {
1379 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
1380 if (Size == 32) {
1381 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
1382 }
1383 }
1384 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
1385
1386 // Count bits in each byte-sized lane.
1387 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
1388
1389 // Sum across lanes.
1390 Register HSum = CTPOP.getReg(0);
1391 unsigned Opc;
1392 SmallVector<LLT> HAddTys;
1393 if (Ty.isScalar()) {
1394 Opc = Intrinsic::aarch64_neon_uaddlv;
1395 HAddTys.push_back(LLT::scalar(32));
1396 } else if (Ty == LLT::fixed_vector(8, 16)) {
1397 Opc = Intrinsic::aarch64_neon_uaddlp;
1398 HAddTys.push_back(LLT::fixed_vector(8, 16));
1399 } else if (Ty == LLT::fixed_vector(4, 32)) {
1400 Opc = Intrinsic::aarch64_neon_uaddlp;
1401 HAddTys.push_back(LLT::fixed_vector(8, 16));
1402 HAddTys.push_back(LLT::fixed_vector(4, 32));
1403 } else if (Ty == LLT::fixed_vector(2, 64)) {
1404 Opc = Intrinsic::aarch64_neon_uaddlp;
1405 HAddTys.push_back(LLT::fixed_vector(8, 16));
1406 HAddTys.push_back(LLT::fixed_vector(4, 32));
1407 HAddTys.push_back(LLT::fixed_vector(2, 64));
1408 } else if (Ty == LLT::fixed_vector(4, 16)) {
1409 Opc = Intrinsic::aarch64_neon_uaddlp;
1410 HAddTys.push_back(LLT::fixed_vector(4, 16));
1411 } else if (Ty == LLT::fixed_vector(2, 32)) {
1412 Opc = Intrinsic::aarch64_neon_uaddlp;
1413 HAddTys.push_back(LLT::fixed_vector(4, 16));
1414 HAddTys.push_back(LLT::fixed_vector(2, 32));
1415 } else
1416 llvm_unreachable("unexpected vector shape");
1418 for (LLT HTy : HAddTys) {
1419 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}, /*HasSideEffects =*/false)
1420 .addUse(HSum);
1421 HSum = UADD.getReg(0);
1422 }
1423
1424 // Post-conditioning.
1425 if (Ty.isScalar() && (Size == 64 || Size == 128))
1426 MIRBuilder.buildZExt(Dst, UADD);
1427 else
1428 UADD->getOperand(0).setReg(Dst);
1429 MI.eraseFromParent();
1430 return true;
1431}
1432
1433bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
1435 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1436 LLT s64 = LLT::scalar(64);
1437 auto Addr = MI.getOperand(1).getReg();
1438 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
1439 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
1440 auto DstLo = MRI.createGenericVirtualRegister(s64);
1441 auto DstHi = MRI.createGenericVirtualRegister(s64);
1442
1444 if (ST->hasLSE()) {
1445 // We have 128-bit CASP instructions taking XSeqPair registers, which are
1446 // s128. We need the merge/unmerge to bracket the expansion and pair up with
1447 // the rest of the MIR so we must reassemble the extracted registers into a
1448 // 128-bit known-regclass one with code like this:
1449 //
1450 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
1451 // %out = CASP %in1, ...
1452 // %OldLo = G_EXTRACT %out, 0
1453 // %OldHi = G_EXTRACT %out, 64
1454 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1455 unsigned Opcode;
1456 switch (Ordering) {
1458 Opcode = AArch64::CASPAX;
1459 break;
1461 Opcode = AArch64::CASPLX;
1462 break;
1465 Opcode = AArch64::CASPALX;
1466 break;
1467 default:
1468 Opcode = AArch64::CASPX;
1469 break;
1470 }
1471
1472 LLT s128 = LLT::scalar(128);
1473 auto CASDst = MRI.createGenericVirtualRegister(s128);
1474 auto CASDesired = MRI.createGenericVirtualRegister(s128);
1475 auto CASNew = MRI.createGenericVirtualRegister(s128);
1476 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
1477 .addUse(DesiredI->getOperand(0).getReg())
1478 .addImm(AArch64::sube64)
1479 .addUse(DesiredI->getOperand(1).getReg())
1480 .addImm(AArch64::subo64);
1481 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
1482 .addUse(NewI->getOperand(0).getReg())
1483 .addImm(AArch64::sube64)
1484 .addUse(NewI->getOperand(1).getReg())
1485 .addImm(AArch64::subo64);
1486
1487 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
1488
1489 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
1490 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
1491 } else {
1492 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
1493 // can take arbitrary registers so it just has the normal GPR64 operands the
1494 // rest of AArch64 is expecting.
1495 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
1496 unsigned Opcode;
1497 switch (Ordering) {
1499 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
1500 break;
1502 Opcode = AArch64::CMP_SWAP_128_RELEASE;
1503 break;
1506 Opcode = AArch64::CMP_SWAP_128;
1507 break;
1508 default:
1509 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
1510 break;
1511 }
1512
1513 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1514 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
1515 {Addr, DesiredI->getOperand(0),
1516 DesiredI->getOperand(1), NewI->getOperand(0),
1517 NewI->getOperand(1)});
1518 }
1519
1520 CAS.cloneMemRefs(MI);
1522 *MRI.getTargetRegisterInfo(),
1523 *ST->getRegBankInfo());
1524
1525 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
1526 MI.eraseFromParent();
1527 return true;
1528}
1529
1530bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
1531 LegalizerHelper &Helper) const {
1532 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1533 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1534 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1535 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
1536 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
1537 MI.eraseFromParent();
1538 return true;
1539}
1540
1541bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
1542 LegalizerHelper &Helper) const {
1543 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1544
1545 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
1546 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
1547 // Zext the value operand to 64 bit
1548 auto &Value = MI.getOperand(1);
1549 Register ZExtValueReg =
1550 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1551 Value.setReg(ZExtValueReg);
1552 return true;
1553 }
1554
1555 return false;
1556}
1557
1558bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
1559 LegalizerHelper &Helper) const {
1560 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1561 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1562 Register Dst = MI.getOperand(0).getReg();
1563 LLT DstTy = MRI.getType(Dst);
1564 assert(DstTy.isScalar() && "Only expected scalars right now!");
1565 const unsigned DstSize = DstTy.getSizeInBits();
1566 assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
1567 assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
1568 "Expected homogeneous types!");
1569
1570 // We want to materialize a mask with the high bit set.
1571 uint64_t EltMask;
1572 LLT VecTy;
1573
1574 // TODO: s16 support.
1575 switch (DstSize) {
1576 default:
1577 llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
1578 case 64: {
1579 // AdvSIMD immediate moves cannot materialize out mask in a single
1580 // instruction for 64-bit elements. Instead, materialize zero and then
1581 // negate it.
1582 EltMask = 0;
1583 VecTy = LLT::fixed_vector(2, DstTy);
1584 break;
1585 }
1586 case 32:
1587 EltMask = 0x80000000ULL;
1588 VecTy = LLT::fixed_vector(4, DstTy);
1589 break;
1590 }
1591
1592 // Widen In1 and In2 to 128 bits. We want these to eventually become
1593 // INSERT_SUBREGs.
1594 auto Undef = MIRBuilder.buildUndef(VecTy);
1595 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
1596 auto Ins1 = MIRBuilder.buildInsertVectorElement(
1597 VecTy, Undef, MI.getOperand(1).getReg(), Zero);
1598 auto Ins2 = MIRBuilder.buildInsertVectorElement(
1599 VecTy, Undef, MI.getOperand(2).getReg(), Zero);
1600
1601 // Construct the mask.
1602 auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
1603 if (DstSize == 64)
1604 Mask = MIRBuilder.buildFNeg(VecTy, Mask);
1605
1606 auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask});
1607
1608 // Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
1609 // want this to eventually become an EXTRACT_SUBREG.
1610 SmallVector<Register, 2> DstRegs(1, Dst);
1611 for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
1612 DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
1613 MIRBuilder.buildUnmerge(DstRegs, Sel);
1614 MI.eraseFromParent();
1615 return true;
1616}
unsigned const MachineRegisterInfo * MRI
static void extractParts(Register Reg, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs)
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
ppc ctr loops verify
if(VerifyEach)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
const RegisterBankInfo * getRegBankInfo() const override
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & fewerElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Remove elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LegalizeRuleSet & customFor(std::initializer_list< LLT > Types)
LegalizeResult lowerBitCount(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects)
Build and insert either a G_INTRINSIC (if HasSideEffects is false) or G_INTRINSIC_W_SIDE_EFFECTS inst...
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
const TargetMachine & getTargetMachine() const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:234
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
LegalityPredicate isScalar(unsigned TypeIdx)
True iff the specified type index is a scalar.
LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:73
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition: Utils.cpp:152
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:409
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1869
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...