LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
69 v16s8, v8s16, v4s32,
70 v2s64, v2p0,
71 /* End 128bit types */
72 /* Begin 64bit types */
73 v8s8, v4s16, v2s32};
74 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
75 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
76 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
77
78 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
79
80 // FIXME: support subtargets which have neon/fp-armv8 disabled.
81 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
83 return;
84 }
85
86 // Some instructions only support s16 if the subtarget has full 16-bit FP
87 // support.
88 const bool HasFP16 = ST.hasFullFP16();
89 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
90
91 const bool HasCSSC = ST.hasCSSC();
92 const bool HasRCPC3 = ST.hasRCPC3();
93 const bool HasSVE = ST.hasSVE();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
99 v2s64, v2p0})
100 .widenScalarToNextPow2(0)
101 .clampScalar(0, s8, s64)
104 .clampNumElements(0, v8s8, v16s8)
105 .clampNumElements(0, v4s16, v8s16)
106 .clampNumElements(0, v2s32, v4s32)
107 .clampMaxNumElements(0, s64, 2)
108 .clampMaxNumElements(0, p0, 2)
110
112 .legalFor({p0, s16, s32, s64})
113 .legalFor(PackedVectorAllTypeList)
117 .clampScalar(0, s16, s64)
118 .clampNumElements(0, v8s8, v16s8)
119 .clampNumElements(0, v4s16, v8s16)
120 .clampNumElements(0, v2s32, v4s32)
121 .clampMaxNumElements(0, s64, 2)
122 .clampMaxNumElements(0, p0, 2);
123
125 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
126 smallerThan(1, 0)))
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
130 .minScalar(1, s8)
131 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
132 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
133
135 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
136 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
137 .widenScalarToNextPow2(1)
138 .clampScalar(1, s32, s128)
140 .minScalar(0, s16)
141 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
142 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
143 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
144
145 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
146 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
147 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
148 .widenScalarToNextPow2(0)
149 .clampScalar(0, s32, s64)
150 .clampMaxNumElements(0, s8, 16)
151 .clampMaxNumElements(0, s16, 8)
152 .clampNumElements(0, v2s32, v4s32)
153 .clampNumElements(0, v2s64, v2s64)
155 [=](const LegalityQuery &Query) {
156 return Query.Types[0].getNumElements() <= 2;
157 },
158 0, s32)
159 .minScalarOrEltIf(
160 [=](const LegalityQuery &Query) {
161 return Query.Types[0].getNumElements() <= 4;
162 },
163 0, s16)
164 .minScalarOrEltIf(
165 [=](const LegalityQuery &Query) {
166 return Query.Types[0].getNumElements() <= 16;
167 },
168 0, s8)
169 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
171
173 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
174 .widenScalarToNextPow2(0)
175 .clampScalar(0, s32, s64)
176 .clampMaxNumElements(0, s8, 16)
177 .clampMaxNumElements(0, s16, 8)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 [=](const LegalityQuery &Query) {
182 return Query.Types[0].getNumElements() <= 2;
183 },
184 0, s32)
185 .minScalarOrEltIf(
186 [=](const LegalityQuery &Query) {
187 return Query.Types[0].getNumElements() <= 4;
188 },
189 0, s16)
190 .minScalarOrEltIf(
191 [=](const LegalityQuery &Query) {
192 return Query.Types[0].getNumElements() <= 16;
193 },
194 0, s8)
195 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
197
198 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
199 .customIf([=](const LegalityQuery &Query) {
200 const auto &SrcTy = Query.Types[0];
201 const auto &AmtTy = Query.Types[1];
202 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
203 AmtTy.getSizeInBits() == 32;
204 })
205 .legalFor({
206 {s32, s32},
207 {s32, s64},
208 {s64, s64},
209 {v8s8, v8s8},
210 {v16s8, v16s8},
211 {v4s16, v4s16},
212 {v8s16, v8s16},
213 {v2s32, v2s32},
214 {v4s32, v4s32},
215 {v2s64, v2s64},
216 })
217 .widenScalarToNextPow2(0)
218 .clampScalar(1, s32, s64)
219 .clampScalar(0, s32, s64)
220 .clampNumElements(0, v8s8, v16s8)
221 .clampNumElements(0, v4s16, v8s16)
222 .clampNumElements(0, v2s32, v4s32)
223 .clampNumElements(0, v2s64, v2s64)
225 .minScalarSameAs(1, 0)
229
231 .legalFor({{p0, s64}, {v2p0, v2s64}})
232 .clampScalarOrElt(1, s64, s64)
233 .clampNumElements(0, v2p0, v2p0);
234
235 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
236
237 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
238 .legalFor({s32, s64})
239 .libcallFor({s128})
240 .clampScalar(0, s32, s64)
242 .scalarize(0);
243
244 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
245 .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})
246 .libcallFor({s128})
248 .minScalarOrElt(0, s32)
249 .clampNumElements(0, v2s32, v4s32)
250 .clampNumElements(0, v2s64, v2s64)
251 .scalarize(0);
252
253 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
254 .widenScalarToNextPow2(0, /*Min = */ 32)
255 .clampScalar(0, s32, s64)
256 .lower();
257
258 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
259 .legalFor({s64, v16s8, v8s16, v4s32})
260 .lower();
261
262 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
263 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
264 .legalFor(HasCSSC, {s32, s64})
265 .minScalar(HasCSSC, 0, s32)
266 .clampNumElements(0, v8s8, v16s8)
267 .clampNumElements(0, v4s16, v8s16)
268 .clampNumElements(0, v2s32, v4s32)
269 .lower();
270
271 // FIXME: Legal vector types are only legal with NEON.
273 .legalFor(HasCSSC, {s32, s64})
274 .legalFor(PackedVectorAllTypeList)
275 .customIf([=](const LegalityQuery &Q) {
276 // TODO: Fix suboptimal codegen for 128+ bit types.
277 LLT SrcTy = Q.Types[0];
278 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
279 })
280 .widenScalarIf(
281 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
282 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
283 .widenScalarIf(
284 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
285 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
286 .clampNumElements(0, v8s8, v16s8)
287 .clampNumElements(0, v4s16, v8s16)
288 .clampNumElements(0, v2s32, v4s32)
289 .clampNumElements(0, v2s64, v2s64)
291 .lower();
292
294 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
295 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
296 .lower();
297
299 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
300 .legalFor({{s32, s32}, {s64, s32}})
301 .clampScalar(0, s32, s64)
302 .clampScalar(1, s32, s64)
304
305 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
306 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
307 .lower();
308
310 .legalFor({{s32, s64}, {s64, s64}})
311 .customIf([=](const LegalityQuery &Q) {
312 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
313 })
314 .lower();
316
317 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
318 .customFor({{s32, s32}, {s64, s64}});
319
320 auto always = [=](const LegalityQuery &Q) { return true; };
322 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
323 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
324 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
325 .customFor({{s128, s128},
326 {v4s16, v4s16},
327 {v8s16, v8s16},
328 {v2s32, v2s32},
329 {v4s32, v4s32},
330 {v2s64, v2s64}})
331 .clampScalar(0, s32, s128)
333 .minScalarEltSameAsIf(always, 1, 0)
334 .maxScalarEltSameAsIf(always, 1, 0)
335 .clampNumElements(0, v8s8, v16s8)
336 .clampNumElements(0, v4s16, v8s16)
337 .clampNumElements(0, v2s32, v4s32)
338 .clampNumElements(0, v2s64, v2s64)
341
342 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
343 .legalFor({{s32, s32},
344 {s64, s64},
345 {v8s8, v8s8},
346 {v16s8, v16s8},
347 {v4s16, v4s16},
348 {v8s16, v8s16},
349 {v2s32, v2s32},
350 {v4s32, v4s32}})
351 .widenScalarToNextPow2(1, /*Min=*/32)
352 .clampScalar(1, s32, s64)
354 .clampNumElements(0, v8s8, v16s8)
355 .clampNumElements(0, v4s16, v8s16)
356 .clampNumElements(0, v2s32, v4s32)
359 .scalarSameSizeAs(0, 1);
360
361 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
362
364 .lowerIf(isVector(0))
365 .widenScalarToNextPow2(1, /*Min=*/32)
366 .clampScalar(1, s32, s64)
367 .scalarSameSizeAs(0, 1)
368 .legalFor(HasCSSC, {s32, s64})
369 .customFor(!HasCSSC, {s32, s64});
370
371 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
372
373 getActionDefinitionsBuilder(G_BITREVERSE)
374 .legalFor({s32, s64, v8s8, v16s8})
375 .widenScalarToNextPow2(0, /*Min = */ 32)
377 .clampScalar(0, s32, s64)
378 .clampNumElements(0, v8s8, v16s8)
379 .clampNumElements(0, v4s16, v8s16)
380 .clampNumElements(0, v2s32, v4s32)
381 .clampNumElements(0, v2s64, v2s64)
384 .lower();
385
387 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
389 .clampScalar(0, s32, s64)
390 .clampNumElements(0, v4s16, v8s16)
391 .clampNumElements(0, v2s32, v4s32)
392 .clampNumElements(0, v2s64, v2s64)
394
395 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
396 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
397 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
398 .clampNumElements(0, v8s8, v16s8)
399 .clampNumElements(0, v4s16, v8s16)
400 .clampNumElements(0, v2s32, v4s32)
401 .clampMaxNumElements(0, s64, 2)
404 .lower();
405
407 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
408 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
409 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
410 .legalFor({s32, s64, v2s32, v4s32, v2s64})
411 .legalFor(HasFP16, {s16, v4s16, v8s16})
412 .libcallFor({s128})
413 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
414 .minScalarOrElt(0, MinFPScalar)
415 .clampNumElements(0, v4s16, v8s16)
416 .clampNumElements(0, v2s32, v4s32)
417 .clampNumElements(0, v2s64, v2s64)
419
420 getActionDefinitionsBuilder({G_FABS, G_FNEG})
421 .legalFor({s32, s64, v2s32, v4s32, v2s64})
422 .legalFor(HasFP16, {s16, v4s16, v8s16})
423 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
425 .clampNumElements(0, v4s16, v8s16)
426 .clampNumElements(0, v2s32, v4s32)
427 .clampNumElements(0, v2s64, v2s64)
429 .lowerFor({s16, v4s16, v8s16});
430
432 .libcallFor({s32, s64, s128})
433 .minScalar(0, s32)
434 .scalarize(0);
435
436 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
437 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
438 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
439 G_FSINH, G_FTANH, G_FMODF})
440 // We need a call for these, so we always need to scalarize.
441 .scalarize(0)
442 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
443 .minScalar(0, s32)
444 .libcallFor({s32, s64, s128});
445 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
446 .scalarize(0)
447 .minScalar(0, s32)
448 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
449
450 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
451 .legalFor({{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
452 .legalFor(HasFP16, {{s32, s16}, {s64, s16}})
453 .minScalar(1, s32)
454 .libcallFor({{s64, s128}})
455 .lower();
456 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
457 .legalFor({{s64, s32}, {s64, s64}})
458 .legalFor(HasFP16, {{s64, s16}})
459 .minScalar(0, s64)
460 .minScalar(1, s32)
461 .libcallFor({{s64, s128}})
462 .lower();
463
464 // TODO: Custom legalization for mismatched types.
465 getActionDefinitionsBuilder(G_FCOPYSIGN)
467 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
468 [=](const LegalityQuery &Query) {
469 const LLT Ty = Query.Types[0];
470 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
471 })
472 .lower();
473
475
476 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
477 auto &Actions = getActionDefinitionsBuilder(Op);
478
479 if (Op == G_SEXTLOAD)
481
482 // Atomics have zero extending behavior.
483 Actions
484 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
485 {s32, p0, s16, 8},
486 {s32, p0, s32, 8},
487 {s64, p0, s8, 2},
488 {s64, p0, s16, 2},
489 {s64, p0, s32, 4},
490 {s64, p0, s64, 8},
491 {p0, p0, s64, 8},
492 {v2s32, p0, s64, 8}})
493 .widenScalarToNextPow2(0)
494 .clampScalar(0, s32, s64)
495 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
496 // how to do that yet.
497 .unsupportedIfMemSizeNotPow2()
498 // Lower anything left over into G_*EXT and G_LOAD
499 .lower();
500 }
501
502 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
503 const LLT &ValTy = Query.Types[0];
504 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
505 };
506
508 .customIf([=](const LegalityQuery &Query) {
509 return HasRCPC3 && Query.Types[0] == s128 &&
510 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
511 })
512 .customIf([=](const LegalityQuery &Query) {
513 return Query.Types[0] == s128 &&
514 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
515 })
516 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
517 {s16, p0, s16, 8},
518 {s32, p0, s32, 8},
519 {s64, p0, s64, 8},
520 {p0, p0, s64, 8},
521 {s128, p0, s128, 8},
522 {v8s8, p0, s64, 8},
523 {v16s8, p0, s128, 8},
524 {v4s16, p0, s64, 8},
525 {v8s16, p0, s128, 8},
526 {v2s32, p0, s64, 8},
527 {v4s32, p0, s128, 8},
528 {v2s64, p0, s128, 8}})
529 // These extends are also legal
530 .legalForTypesWithMemDesc(
531 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
532 .legalForTypesWithMemDesc({
533 // SVE vscale x 128 bit base sizes
534 {nxv16s8, p0, nxv16s8, 8},
535 {nxv8s16, p0, nxv8s16, 8},
536 {nxv4s32, p0, nxv4s32, 8},
537 {nxv2s64, p0, nxv2s64, 8},
538 })
539 .widenScalarToNextPow2(0, /* MinSize = */ 8)
540 .clampMaxNumElements(0, s8, 16)
541 .clampMaxNumElements(0, s16, 8)
542 .clampMaxNumElements(0, s32, 4)
543 .clampMaxNumElements(0, s64, 2)
544 .clampMaxNumElements(0, p0, 2)
546 .clampScalar(0, s8, s64)
548 [=](const LegalityQuery &Query) {
549 // Clamp extending load results to 32-bits.
550 return Query.Types[0].isScalar() &&
551 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
552 Query.Types[0].getSizeInBits() > 32;
553 },
554 changeTo(0, s32))
555 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
556 .bitcastIf(typeInSet(0, {v4s8}),
557 [=](const LegalityQuery &Query) {
558 const LLT VecTy = Query.Types[0];
559 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
560 })
561 .customIf(IsPtrVecPred)
562 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
563 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
564
566 .customIf([=](const LegalityQuery &Query) {
567 return HasRCPC3 && Query.Types[0] == s128 &&
568 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
569 })
570 .customIf([=](const LegalityQuery &Query) {
571 return Query.Types[0] == s128 &&
572 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
573 })
574 .widenScalarIf(
575 all(scalarNarrowerThan(0, 32),
577 changeTo(0, s32))
579 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
580 {s32, p0, s8, 8}, // truncstorei8 from s32
581 {s64, p0, s8, 8}, // truncstorei8 from s64
582 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
583 {s64, p0, s16, 8}, // truncstorei16 from s64
584 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
585 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
586 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
587 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
588 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
589 .legalForTypesWithMemDesc({
590 // SVE vscale x 128 bit base sizes
591 // TODO: Add nxv2p0. Consider bitcastIf.
592 // See #92130
593 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
594 {nxv16s8, p0, nxv16s8, 8},
595 {nxv8s16, p0, nxv8s16, 8},
596 {nxv4s32, p0, nxv4s32, 8},
597 {nxv2s64, p0, nxv2s64, 8},
598 })
599 .clampScalar(0, s8, s64)
600 .minScalarOrElt(0, s8)
601 .lowerIf([=](const LegalityQuery &Query) {
602 return Query.Types[0].isScalar() &&
603 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
604 })
605 // Maximum: sN * k = 128
606 .clampMaxNumElements(0, s8, 16)
607 .clampMaxNumElements(0, s16, 8)
608 .clampMaxNumElements(0, s32, 4)
609 .clampMaxNumElements(0, s64, 2)
610 .clampMaxNumElements(0, p0, 2)
612 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
613 .bitcastIf(all(typeInSet(0, {v4s8}),
614 LegalityPredicate([=](const LegalityQuery &Query) {
615 return Query.Types[0].getSizeInBits() ==
616 Query.MMODescrs[0].MemoryTy.getSizeInBits();
617 })),
618 [=](const LegalityQuery &Query) {
619 const LLT VecTy = Query.Types[0];
620 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
621 })
622 .customIf(IsPtrVecPred)
623 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
624 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
625 .lower();
626
627 getActionDefinitionsBuilder(G_INDEXED_STORE)
628 // Idx 0 == Ptr, Idx 1 == Val
629 // TODO: we can implement legalizations but as of now these are
630 // generated in a very specific way.
632 {p0, s8, s8, 8},
633 {p0, s16, s16, 8},
634 {p0, s32, s8, 8},
635 {p0, s32, s16, 8},
636 {p0, s32, s32, 8},
637 {p0, s64, s64, 8},
638 {p0, p0, p0, 8},
639 {p0, v8s8, v8s8, 8},
640 {p0, v16s8, v16s8, 8},
641 {p0, v4s16, v4s16, 8},
642 {p0, v8s16, v8s16, 8},
643 {p0, v2s32, v2s32, 8},
644 {p0, v4s32, v4s32, 8},
645 {p0, v2s64, v2s64, 8},
646 {p0, v2p0, v2p0, 8},
647 {p0, s128, s128, 8},
648 })
649 .unsupported();
650
651 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
652 LLT LdTy = Query.Types[0];
653 LLT PtrTy = Query.Types[1];
654 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
655 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
656 return false;
657 if (PtrTy != p0)
658 return false;
659 return true;
660 };
661 getActionDefinitionsBuilder(G_INDEXED_LOAD)
664 .legalIf(IndexedLoadBasicPred)
665 .unsupported();
666 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
667 .unsupportedIf(
669 .legalIf(all(typeInSet(0, {s16, s32, s64}),
670 LegalityPredicate([=](const LegalityQuery &Q) {
671 LLT LdTy = Q.Types[0];
672 LLT PtrTy = Q.Types[1];
673 LLT MemTy = Q.MMODescrs[0].MemoryTy;
674 if (PtrTy != p0)
675 return false;
676 if (LdTy == s16)
677 return MemTy == s8;
678 if (LdTy == s32)
679 return MemTy == s8 || MemTy == s16;
680 if (LdTy == s64)
681 return MemTy == s8 || MemTy == s16 || MemTy == s32;
682 return false;
683 })))
684 .unsupported();
685
686 // Constants
688 .legalFor({p0, s8, s16, s32, s64})
689 .widenScalarToNextPow2(0)
690 .clampScalar(0, s8, s64);
691 getActionDefinitionsBuilder(G_FCONSTANT)
692 // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
693 .legalFor({s16, s32, s64, s128})
694 .clampScalar(0, MinFPScalar, s128);
695
696 // FIXME: fix moreElementsToNextPow2
698 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
700 .clampScalar(1, s32, s64)
701 .clampScalar(0, s32, s32)
704 [=](const LegalityQuery &Query) {
705 const LLT &Ty = Query.Types[0];
706 const LLT &SrcTy = Query.Types[1];
707 return Ty.isVector() && !SrcTy.isPointerVector() &&
708 Ty.getElementType() != SrcTy.getElementType();
709 },
710 0, 1)
711 .minScalarOrEltIf(
712 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
713 1, s32)
714 .minScalarOrEltIf(
715 [=](const LegalityQuery &Query) {
716 return Query.Types[1].isPointerVector();
717 },
718 0, s64)
720 .clampNumElements(1, v8s8, v16s8)
721 .clampNumElements(1, v4s16, v8s16)
722 .clampNumElements(1, v2s32, v4s32)
723 .clampNumElements(1, v2s64, v2s64)
724 .clampNumElements(1, v2p0, v2p0)
725 .customIf(isVector(0));
726
728 .legalFor({{s32, s32},
729 {s32, s64},
730 {v4s32, v4s32},
731 {v2s32, v2s32},
732 {v2s64, v2s64}})
733 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
735 .clampScalar(0, s32, s32)
736 .minScalarOrElt(1, MinFPScalar)
739 [=](const LegalityQuery &Query) {
740 const LLT &Ty = Query.Types[0];
741 const LLT &SrcTy = Query.Types[1];
742 return Ty.isVector() && !SrcTy.isPointerVector() &&
743 Ty.getElementType() != SrcTy.getElementType();
744 },
745 0, 1)
746 .clampNumElements(1, v4s16, v8s16)
747 .clampNumElements(1, v2s32, v4s32)
748 .clampMaxNumElements(1, s64, 2)
750 .libcallFor({{s32, s128}});
751
752 // Extensions
753 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
754 unsigned DstSize = Query.Types[0].getSizeInBits();
755
756 // Handle legal vectors using legalFor
757 if (Query.Types[0].isVector())
758 return false;
759
760 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
761 return false; // Extending to a scalar s128 needs narrowing.
762
763 const LLT &SrcTy = Query.Types[1];
764
765 // Make sure we fit in a register otherwise. Don't bother checking that
766 // the source type is below 128 bits. We shouldn't be allowing anything
767 // through which is wider than the destination in the first place.
768 unsigned SrcSize = SrcTy.getSizeInBits();
769 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
770 return false;
771
772 return true;
773 };
774 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
775 .legalIf(ExtLegalFunc)
776 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
777 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
779 .clampMaxNumElements(1, s8, 8)
780 .clampMaxNumElements(1, s16, 4)
781 .clampMaxNumElements(1, s32, 2)
782 // Tries to convert a large EXTEND into two smaller EXTENDs
783 .lowerIf([=](const LegalityQuery &Query) {
784 return (Query.Types[0].getScalarSizeInBits() >
785 Query.Types[1].getScalarSizeInBits() * 2) &&
786 Query.Types[0].isVector() &&
787 (Query.Types[1].getScalarSizeInBits() == 8 ||
788 Query.Types[1].getScalarSizeInBits() == 16);
789 })
790 .clampMinNumElements(1, s8, 8)
791 .clampMinNumElements(1, s16, 4)
793
795 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
797 .clampMaxNumElements(0, s8, 8)
798 .clampMaxNumElements(0, s16, 4)
799 .clampMaxNumElements(0, s32, 2)
801 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
802 0, s8)
803 .lowerIf([=](const LegalityQuery &Query) {
804 LLT DstTy = Query.Types[0];
805 LLT SrcTy = Query.Types[1];
806 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
807 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
808 })
809 .clampMinNumElements(0, s8, 8)
810 .clampMinNumElements(0, s16, 4)
811 .alwaysLegal();
812
813 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
814 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
815 .clampNumElements(0, v2s32, v2s32);
816
817 getActionDefinitionsBuilder(G_SEXT_INREG)
818 .legalFor({s32, s64})
819 .legalFor(PackedVectorAllTypeList)
820 .maxScalar(0, s64)
821 .clampNumElements(0, v8s8, v16s8)
822 .clampNumElements(0, v4s16, v8s16)
823 .clampNumElements(0, v2s32, v4s32)
824 .clampMaxNumElements(0, s64, 2)
825 .lower();
826
827 // FP conversions
829 .legalFor(
830 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
831 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
833 .customIf([](const LegalityQuery &Q) {
834 LLT DstTy = Q.Types[0];
835 LLT SrcTy = Q.Types[1];
836 return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
837 SrcTy.getScalarSizeInBits() == 64 &&
838 DstTy.getScalarSizeInBits() == 16;
839 })
840 // Clamp based on input
841 .clampNumElements(1, v4s32, v4s32)
842 .clampNumElements(1, v2s64, v2s64)
843 .scalarize(0);
844
846 .legalFor(
847 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
848 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
851 [](const LegalityQuery &Q) {
852 LLT DstTy = Q.Types[0];
853 LLT SrcTy = Q.Types[1];
854 return SrcTy.isVector() && DstTy.isVector() &&
855 SrcTy.getScalarSizeInBits() == 16 &&
856 DstTy.getScalarSizeInBits() == 64;
857 },
858 changeElementTo(1, s32))
859 .clampNumElements(0, v4s32, v4s32)
860 .clampNumElements(0, v2s64, v2s64)
861 .scalarize(0);
862
863 // Conversions
864 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
865 .legalFor({{s32, s32},
866 {s64, s32},
867 {s32, s64},
868 {s64, s64},
869 {v2s32, v2s32},
870 {v4s32, v4s32},
871 {v2s64, v2s64}})
872 .legalFor(HasFP16,
873 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
874 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
876 // The range of a fp16 value fits into an i17, so we can lower the width
877 // to i64.
879 [=](const LegalityQuery &Query) {
880 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
881 },
882 changeTo(0, s64))
885 .minScalar(0, s32)
886 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
888 [=](const LegalityQuery &Query) {
889 return Query.Types[0].getScalarSizeInBits() <= 64 &&
890 Query.Types[0].getScalarSizeInBits() >
891 Query.Types[1].getScalarSizeInBits();
892 },
894 .widenScalarIf(
895 [=](const LegalityQuery &Query) {
896 return Query.Types[1].getScalarSizeInBits() <= 64 &&
897 Query.Types[0].getScalarSizeInBits() <
898 Query.Types[1].getScalarSizeInBits();
899 },
901 .clampNumElements(0, v4s16, v8s16)
902 .clampNumElements(0, v2s32, v4s32)
903 .clampMaxNumElements(0, s64, 2)
904 .libcallFor(
905 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
906
907 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
908 .legalFor({{s32, s32},
909 {s64, s32},
910 {s32, s64},
911 {s64, s64},
912 {v2s32, v2s32},
913 {v4s32, v4s32},
914 {v2s64, v2s64}})
915 .legalFor(
916 HasFP16,
917 {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
918 // Handle types larger than i64 by scalarizing/lowering.
919 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
921 // The range of a fp16 value fits into an i17, so we can lower the width
922 // to i64.
924 [=](const LegalityQuery &Query) {
925 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
926 },
927 changeTo(0, s64))
928 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
930 .widenScalarToNextPow2(0, /*MinSize=*/32)
931 .minScalar(0, s32)
932 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
934 [=](const LegalityQuery &Query) {
935 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
936 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
937 ITySize > Query.Types[1].getScalarSizeInBits();
938 },
940 .widenScalarIf(
941 [=](const LegalityQuery &Query) {
942 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
943 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
944 Query.Types[0].getScalarSizeInBits() < FTySize;
945 },
948 .clampNumElements(0, v4s16, v8s16)
949 .clampNumElements(0, v2s32, v4s32)
950 .clampMaxNumElements(0, s64, 2);
951
952 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
953 .legalFor({{s32, s32},
954 {s64, s32},
955 {s32, s64},
956 {s64, s64},
957 {v2s32, v2s32},
958 {v4s32, v4s32},
959 {v2s64, v2s64}})
960 .legalFor(HasFP16,
961 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
962 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
966 .minScalar(1, s32)
967 .lowerIf([](const LegalityQuery &Query) {
968 return Query.Types[1].isVector() &&
969 Query.Types[1].getScalarSizeInBits() == 64 &&
970 Query.Types[0].getScalarSizeInBits() == 16;
971 })
972 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
974 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
975 [](const LegalityQuery &Query) {
976 return Query.Types[0].getScalarSizeInBits() == 32 &&
977 Query.Types[1].getScalarSizeInBits() == 64;
978 },
979 0)
980 .widenScalarIf(
981 [](const LegalityQuery &Query) {
982 return Query.Types[1].getScalarSizeInBits() <= 64 &&
983 Query.Types[0].getScalarSizeInBits() <
984 Query.Types[1].getScalarSizeInBits();
985 },
987 .widenScalarIf(
988 [](const LegalityQuery &Query) {
989 return Query.Types[0].getScalarSizeInBits() <= 64 &&
990 Query.Types[0].getScalarSizeInBits() >
991 Query.Types[1].getScalarSizeInBits();
992 },
994 .clampNumElements(0, v4s16, v8s16)
995 .clampNumElements(0, v2s32, v4s32)
996 .clampMaxNumElements(0, s64, 2)
997 .libcallFor({{s16, s128},
998 {s32, s128},
999 {s64, s128},
1000 {s128, s128},
1001 {s128, s32},
1002 {s128, s64}});
1003
1004 // Control-flow
1007 .legalFor({s32})
1008 .clampScalar(0, s32, s32);
1009 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1010
1012 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1013 .widenScalarToNextPow2(0)
1014 .clampScalar(0, s32, s64)
1015 .clampScalar(1, s32, s32)
1018 .lowerIf(isVector(0));
1019
1020 // Pointer-handling
1021 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1022
1023 if (TM.getCodeModel() == CodeModel::Small)
1024 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1025 else
1026 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1027
1028 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1029 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1030
1031 getActionDefinitionsBuilder(G_PTRTOINT)
1032 .legalFor({{s64, p0}, {v2s64, v2p0}})
1033 .widenScalarToNextPow2(0, 64)
1034 .clampScalar(0, s64, s64)
1035 .clampMaxNumElements(0, s64, 2);
1036
1037 getActionDefinitionsBuilder(G_INTTOPTR)
1038 .unsupportedIf([&](const LegalityQuery &Query) {
1039 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1040 })
1041 .legalFor({{p0, s64}, {v2p0, v2s64}})
1042 .clampMaxNumElements(1, s64, 2);
1043
1044 // Casts for 32 and 64-bit width type are just copies.
1045 // Same for 128-bit width type, except they are on the FPR bank.
1047 // Keeping 32-bit instructions legal to prevent regression in some tests
1048 .legalForCartesianProduct({s32, v2s16, v4s8})
1049 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1050 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1051 .customIf([=](const LegalityQuery &Query) {
1052 // Handle casts from i1 vectors to scalars.
1053 LLT DstTy = Query.Types[0];
1054 LLT SrcTy = Query.Types[1];
1055 return DstTy.isScalar() && SrcTy.isVector() &&
1056 SrcTy.getScalarSizeInBits() == 1;
1057 })
1058 .lowerIf([=](const LegalityQuery &Query) {
1059 return Query.Types[0].isVector() != Query.Types[1].isVector();
1060 })
1062 .clampNumElements(0, v8s8, v16s8)
1063 .clampNumElements(0, v4s16, v8s16)
1064 .clampNumElements(0, v2s32, v4s32)
1065 .lower();
1066
1067 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1068
1069 // va_list must be a pointer, but most sized types are pretty easy to handle
1070 // as the destination.
1072 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1073 .clampScalar(0, s8, s64)
1074 .widenScalarToNextPow2(0, /*Min*/ 8);
1075
1076 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1077 .lowerIf(
1078 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1079
1080 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1081
1082 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1083 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1084 .customFor(!UseOutlineAtomics, {{s128, p0}})
1085 .libcallFor(UseOutlineAtomics,
1086 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1087 .clampScalar(0, s32, s64);
1088
1089 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1090 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1091 G_ATOMICRMW_XOR})
1092 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1093 .libcallFor(UseOutlineAtomics,
1094 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1095 .clampScalar(0, s32, s64);
1096
1097 // Do not outline these atomics operations, as per comment in
1098 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1100 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1101 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1102 .clampScalar(0, s32, s64);
1103
1104 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1105
1106 // Merge/Unmerge
1107 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1108 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1109 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1111 .widenScalarToNextPow2(LitTyIdx, 8)
1112 .widenScalarToNextPow2(BigTyIdx, 32)
1113 .clampScalar(LitTyIdx, s8, s64)
1114 .clampScalar(BigTyIdx, s32, s128)
1115 .legalIf([=](const LegalityQuery &Q) {
1116 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1117 case 32:
1118 case 64:
1119 case 128:
1120 break;
1121 default:
1122 return false;
1123 }
1124 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1125 case 8:
1126 case 16:
1127 case 32:
1128 case 64:
1129 return true;
1130 default:
1131 return false;
1132 }
1133 });
1134 }
1135
1136 // TODO : nxv4s16, nxv2s16, nxv2s32
1137 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1138 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1139 {s16, nxv8s16, s64},
1140 {s32, nxv4s32, s64},
1141 {s64, nxv2s64, s64}})
1142 .unsupportedIf([=](const LegalityQuery &Query) {
1143 const LLT &EltTy = Query.Types[1].getElementType();
1144 if (Query.Types[1].isScalableVector())
1145 return false;
1146 return Query.Types[0] != EltTy;
1147 })
1148 .minScalar(2, s64)
1149 .customIf([=](const LegalityQuery &Query) {
1150 const LLT &VecTy = Query.Types[1];
1151 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1152 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1153 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1154 })
1155 .minScalarOrEltIf(
1156 [=](const LegalityQuery &Query) {
1157 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1158 // cause the total vec size to be > 128b.
1159 return Query.Types[1].isFixedVector() &&
1160 Query.Types[1].getNumElements() <= 2;
1161 },
1162 0, s64)
1163 .minScalarOrEltIf(
1164 [=](const LegalityQuery &Query) {
1165 return Query.Types[1].isFixedVector() &&
1166 Query.Types[1].getNumElements() <= 4;
1167 },
1168 0, s32)
1169 .minScalarOrEltIf(
1170 [=](const LegalityQuery &Query) {
1171 return Query.Types[1].isFixedVector() &&
1172 Query.Types[1].getNumElements() <= 8;
1173 },
1174 0, s16)
1175 .minScalarOrEltIf(
1176 [=](const LegalityQuery &Query) {
1177 return Query.Types[1].isFixedVector() &&
1178 Query.Types[1].getNumElements() <= 16;
1179 },
1180 0, s8)
1181 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1183 .clampMaxNumElements(1, s64, 2)
1184 .clampMaxNumElements(1, s32, 4)
1185 .clampMaxNumElements(1, s16, 8)
1186 .clampMaxNumElements(1, s8, 16)
1187 .clampMaxNumElements(1, p0, 2)
1189
1190 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1191 .legalIf(
1192 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1193 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1194 {nxv8s16, s32, s64},
1195 {nxv4s32, s32, s64},
1196 {nxv2s64, s64, s64}})
1199 .clampNumElements(0, v8s8, v16s8)
1200 .clampNumElements(0, v4s16, v8s16)
1201 .clampNumElements(0, v2s32, v4s32)
1202 .clampMaxNumElements(0, s64, 2)
1203 .clampMaxNumElements(0, p0, 2)
1205
1206 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1207 .legalFor({{v8s8, s8},
1208 {v16s8, s8},
1209 {v4s16, s16},
1210 {v8s16, s16},
1211 {v2s32, s32},
1212 {v4s32, s32},
1213 {v2s64, s64},
1214 {v2p0, p0}})
1215 .clampNumElements(0, v4s32, v4s32)
1216 .clampNumElements(0, v2s64, v2s64)
1217 .minScalarOrElt(0, s8)
1220 .minScalarSameAs(1, 0);
1221
1222 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1223
1224 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1225 .legalIf([=](const LegalityQuery &Query) {
1226 const LLT &DstTy = Query.Types[0];
1227 const LLT &SrcTy = Query.Types[1];
1228 // For now just support the TBL2 variant which needs the source vectors
1229 // to be the same size as the dest.
1230 if (DstTy != SrcTy)
1231 return false;
1232 return llvm::is_contained(
1233 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1234 })
1235 .moreElementsIf(
1236 [](const LegalityQuery &Query) {
1237 return Query.Types[0].getNumElements() >
1238 Query.Types[1].getNumElements();
1239 },
1240 changeTo(1, 0))
1243 [](const LegalityQuery &Query) {
1244 return Query.Types[0].getNumElements() <
1245 Query.Types[1].getNumElements();
1246 },
1247 changeTo(0, 1))
1248 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1249 .clampNumElements(0, v8s8, v16s8)
1250 .clampNumElements(0, v4s16, v8s16)
1251 .clampNumElements(0, v4s32, v4s32)
1252 .clampNumElements(0, v2s64, v2s64)
1254 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1255 // Bitcast pointers vector to i64.
1256 const LLT DstTy = Query.Types[0];
1257 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1258 });
1259
1260 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1261 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1262 .bitcastIf(
1263 [=](const LegalityQuery &Query) {
1264 return Query.Types[0].isFixedVector() &&
1265 Query.Types[1].isFixedVector() &&
1266 Query.Types[0].getScalarSizeInBits() >= 8 &&
1267 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1268 Query.Types[0].getSizeInBits() <= 128 &&
1269 Query.Types[1].getSizeInBits() <= 64;
1270 },
1271 [=](const LegalityQuery &Query) {
1272 const LLT DstTy = Query.Types[0];
1273 const LLT SrcTy = Query.Types[1];
1274 return std::pair(
1275 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1278 SrcTy.getNumElements())));
1279 });
1280
1281 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1282 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1284 .immIdx(0); // Inform verifier imm idx 0 is handled.
1285
1286 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1287 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1288 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1289
1290 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1291
1292 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1293
1294 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1295
1296 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1297
1298 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1299
1300 if (ST.hasMOPS()) {
1301 // G_BZERO is not supported. Currently it is only emitted by
1302 // PreLegalizerCombiner for G_MEMSET with zero constant.
1304
1306 .legalForCartesianProduct({p0}, {s64}, {s64})
1307 .customForCartesianProduct({p0}, {s8}, {s64})
1308 .immIdx(0); // Inform verifier imm idx 0 is handled.
1309
1310 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1311 .legalForCartesianProduct({p0}, {p0}, {s64})
1312 .immIdx(0); // Inform verifier imm idx 0 is handled.
1313
1314 // G_MEMCPY_INLINE does not have a tailcall immediate
1315 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1316 .legalForCartesianProduct({p0}, {p0}, {s64});
1317
1318 } else {
1319 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1320 .libcall();
1321 }
1322
1323 // For fadd reductions we have pairwise operations available. We treat the
1324 // usual legal types as legal and handle the lowering to pairwise instructions
1325 // later.
1326 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1327 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1328 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1329 .minScalarOrElt(0, MinFPScalar)
1330 .clampMaxNumElements(1, s64, 2)
1331 .clampMaxNumElements(1, s32, 4)
1332 .clampMaxNumElements(1, s16, 8)
1334 .scalarize(1)
1335 .lower();
1336
1337 // For fmul reductions we need to split up into individual operations. We
1338 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1339 // smaller types, followed by scalarizing what remains.
1340 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1341 .minScalarOrElt(0, MinFPScalar)
1342 .clampMaxNumElements(1, s64, 2)
1343 .clampMaxNumElements(1, s32, 4)
1344 .clampMaxNumElements(1, s16, 8)
1345 .clampMaxNumElements(1, s32, 2)
1346 .clampMaxNumElements(1, s16, 4)
1347 .scalarize(1)
1348 .lower();
1349
1350 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1351 .scalarize(2)
1352 .lower();
1353
1354 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1355 .legalFor({{s8, v8s8},
1356 {s8, v16s8},
1357 {s16, v4s16},
1358 {s16, v8s16},
1359 {s32, v2s32},
1360 {s32, v4s32},
1361 {s64, v2s64}})
1363 .clampMaxNumElements(1, s64, 2)
1364 .clampMaxNumElements(1, s32, 4)
1365 .clampMaxNumElements(1, s16, 8)
1366 .clampMaxNumElements(1, s8, 16)
1368 .scalarize(1);
1369
1370 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1371 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1372 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1373 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1374 .minScalarOrElt(0, MinFPScalar)
1375 .clampMaxNumElements(1, s64, 2)
1376 .clampMaxNumElements(1, s32, 4)
1377 .clampMaxNumElements(1, s16, 8)
1378 .scalarize(1)
1379 .lower();
1380
1381 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1382 .clampMaxNumElements(1, s32, 2)
1383 .clampMaxNumElements(1, s16, 4)
1384 .clampMaxNumElements(1, s8, 8)
1385 .scalarize(1)
1386 .lower();
1387
1389 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1390 .legalFor({{s8, v8s8},
1391 {s8, v16s8},
1392 {s16, v4s16},
1393 {s16, v8s16},
1394 {s32, v2s32},
1395 {s32, v4s32}})
1396 .moreElementsIf(
1397 [=](const LegalityQuery &Query) {
1398 return Query.Types[1].isVector() &&
1399 Query.Types[1].getElementType() != s8 &&
1400 Query.Types[1].getNumElements() & 1;
1401 },
1403 .clampMaxNumElements(1, s64, 2)
1404 .clampMaxNumElements(1, s32, 4)
1405 .clampMaxNumElements(1, s16, 8)
1406 .clampMaxNumElements(1, s8, 16)
1407 .scalarize(1)
1408 .lower();
1409
1411 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1412 // Try to break down into smaller vectors as long as they're at least 64
1413 // bits. This lets us use vector operations for some parts of the
1414 // reduction.
1415 .fewerElementsIf(
1416 [=](const LegalityQuery &Q) {
1417 LLT SrcTy = Q.Types[1];
1418 if (SrcTy.isScalar())
1419 return false;
1420 if (!isPowerOf2_32(SrcTy.getNumElements()))
1421 return false;
1422 // We can usually perform 64b vector operations.
1423 return SrcTy.getSizeInBits() > 64;
1424 },
1425 [=](const LegalityQuery &Q) {
1426 LLT SrcTy = Q.Types[1];
1427 return std::make_pair(1, SrcTy.divide(2));
1428 })
1429 .scalarize(1)
1430 .lower();
1431
1432 // TODO: Update this to correct handling when adding AArch64/SVE support.
1433 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1434
1435 // Access to floating-point environment.
1436 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1437 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1438 .libcall();
1439
1440 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1441
1442 getActionDefinitionsBuilder(G_PREFETCH).custom();
1443
1444 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1445
1447 verify(*ST.getInstrInfo());
1448}
1449
1452 LostDebugLocObserver &LocObserver) const {
1453 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1454 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1455 GISelChangeObserver &Observer = Helper.Observer;
1456 switch (MI.getOpcode()) {
1457 default:
1458 // No idea what to do.
1459 return false;
1460 case TargetOpcode::G_VAARG:
1461 return legalizeVaArg(MI, MRI, MIRBuilder);
1462 case TargetOpcode::G_LOAD:
1463 case TargetOpcode::G_STORE:
1464 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1465 case TargetOpcode::G_SHL:
1466 case TargetOpcode::G_ASHR:
1467 case TargetOpcode::G_LSHR:
1468 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1469 case TargetOpcode::G_GLOBAL_VALUE:
1470 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1471 case TargetOpcode::G_SBFX:
1472 case TargetOpcode::G_UBFX:
1473 return legalizeBitfieldExtract(MI, MRI, Helper);
1474 case TargetOpcode::G_FSHL:
1475 case TargetOpcode::G_FSHR:
1476 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1477 case TargetOpcode::G_ROTR:
1478 return legalizeRotate(MI, MRI, Helper);
1479 case TargetOpcode::G_CTPOP:
1480 return legalizeCTPOP(MI, MRI, Helper);
1481 case TargetOpcode::G_ATOMIC_CMPXCHG:
1482 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1483 case TargetOpcode::G_CTTZ:
1484 return legalizeCTTZ(MI, Helper);
1485 case TargetOpcode::G_BZERO:
1486 case TargetOpcode::G_MEMCPY:
1487 case TargetOpcode::G_MEMMOVE:
1488 case TargetOpcode::G_MEMSET:
1489 return legalizeMemOps(MI, Helper);
1490 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1491 return legalizeExtractVectorElt(MI, MRI, Helper);
1492 case TargetOpcode::G_DYN_STACKALLOC:
1493 return legalizeDynStackAlloc(MI, Helper);
1494 case TargetOpcode::G_PREFETCH:
1495 return legalizePrefetch(MI, Helper);
1496 case TargetOpcode::G_ABS:
1497 return Helper.lowerAbsToCNeg(MI);
1498 case TargetOpcode::G_ICMP:
1499 return legalizeICMP(MI, MRI, MIRBuilder);
1500 case TargetOpcode::G_BITCAST:
1501 return legalizeBitcast(MI, Helper);
1502 case TargetOpcode::G_FPTRUNC:
1503 // In order to lower f16 to f64 properly, we need to use f32 as an
1504 // intermediary
1505 return legalizeFptrunc(MI, MIRBuilder, MRI);
1506 }
1507
1508 llvm_unreachable("expected switch to return");
1509}
1510
1511bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1512 LegalizerHelper &Helper) const {
1513 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1514 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1515 // We're trying to handle casts from i1 vectors to scalars but reloading from
1516 // stack.
1517 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1518 SrcTy.getElementType() != LLT::scalar(1))
1519 return false;
1520
1521 Helper.createStackStoreLoad(DstReg, SrcReg);
1522 MI.eraseFromParent();
1523 return true;
1524}
1525
1526bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1528 MachineIRBuilder &MIRBuilder,
1529 GISelChangeObserver &Observer,
1530 LegalizerHelper &Helper) const {
1531 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1532 MI.getOpcode() == TargetOpcode::G_FSHR);
1533
1534 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1535 // lowering
1536 Register ShiftNo = MI.getOperand(3).getReg();
1537 LLT ShiftTy = MRI.getType(ShiftNo);
1538 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1539
1540 // Adjust shift amount according to Opcode (FSHL/FSHR)
1541 // Convert FSHL to FSHR
1542 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1543 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1544
1545 // Lower non-constant shifts and leave zero shifts to the optimizer.
1546 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1547 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1549
1550 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1551
1552 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1553
1554 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1555 // in the range of 0 <-> BitWidth, it is legal
1556 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1557 VRegAndVal->Value.ult(BitWidth))
1558 return true;
1559
1560 // Cast the ShiftNumber to a 64-bit type
1561 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1562
1563 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1564 Observer.changingInstr(MI);
1565 MI.getOperand(3).setReg(Cast64.getReg(0));
1566 Observer.changedInstr(MI);
1567 }
1568 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1569 // instruction
1570 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1571 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1572 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1573 Cast64.getReg(0)});
1574 MI.eraseFromParent();
1575 }
1576 return true;
1577}
1578
1579bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1581 MachineIRBuilder &MIRBuilder) const {
1582 Register DstReg = MI.getOperand(0).getReg();
1583 Register SrcReg1 = MI.getOperand(2).getReg();
1584 Register SrcReg2 = MI.getOperand(3).getReg();
1585 LLT DstTy = MRI.getType(DstReg);
1586 LLT SrcTy = MRI.getType(SrcReg1);
1587
1588 // Check the vector types are legal
1589 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1590 DstTy.getNumElements() != SrcTy.getNumElements() ||
1591 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1592 return false;
1593
1594 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1595 // following passes
1596 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1597 if (Pred != CmpInst::ICMP_NE)
1598 return true;
1599 Register CmpReg =
1600 MIRBuilder
1601 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1602 .getReg(0);
1603 MIRBuilder.buildNot(DstReg, CmpReg);
1604
1605 MI.eraseFromParent();
1606 return true;
1607}
1608
1609bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1611 LegalizerHelper &Helper) const {
1612 // To allow for imported patterns to match, we ensure that the rotate amount
1613 // is 64b with an extension.
1614 Register AmtReg = MI.getOperand(2).getReg();
1615 LLT AmtTy = MRI.getType(AmtReg);
1616 (void)AmtTy;
1617 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1618 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1619 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1620 Helper.Observer.changingInstr(MI);
1621 MI.getOperand(2).setReg(NewAmt.getReg(0));
1622 Helper.Observer.changedInstr(MI);
1623 return true;
1624}
1625
1626bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1628 GISelChangeObserver &Observer) const {
1629 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1630 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1631 // G_ADD_LOW instructions.
1632 // By splitting this here, we can optimize accesses in the small code model by
1633 // folding in the G_ADD_LOW into the load/store offset.
1634 auto &GlobalOp = MI.getOperand(1);
1635 // Don't modify an intrinsic call.
1636 if (GlobalOp.isSymbol())
1637 return true;
1638 const auto* GV = GlobalOp.getGlobal();
1639 if (GV->isThreadLocal())
1640 return true; // Don't want to modify TLS vars.
1641
1642 auto &TM = ST->getTargetLowering()->getTargetMachine();
1643 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1644
1645 if (OpFlags & AArch64II::MO_GOT)
1646 return true;
1647
1648 auto Offset = GlobalOp.getOffset();
1649 Register DstReg = MI.getOperand(0).getReg();
1650 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1651 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1652 // Set the regclass on the dest reg too.
1653 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1654
1655 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1656 // by creating a MOVK that sets bits 48-63 of the register to (global address
1657 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1658 // prevent an incorrect tag being generated during relocation when the
1659 // global appears before the code section. Without the offset, a global at
1660 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1661 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1662 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1663 // instead of `0xf`.
1664 // This assumes that we're in the small code model so we can assume a binary
1665 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1666 // binary must also be loaded into address range [0, 2^48). Both of these
1667 // properties need to be ensured at runtime when using tagged addresses.
1668 if (OpFlags & AArch64II::MO_TAGGED) {
1669 assert(!Offset &&
1670 "Should not have folded in an offset for a tagged global!");
1671 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1672 .addGlobalAddress(GV, 0x100000000,
1674 .addImm(48);
1675 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1676 }
1677
1678 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1679 .addGlobalAddress(GV, Offset,
1681 MI.eraseFromParent();
1682 return true;
1683}
1684
1686 MachineInstr &MI) const {
1687 MachineIRBuilder &MIB = Helper.MIRBuilder;
1688 MachineRegisterInfo &MRI = *MIB.getMRI();
1689
1690 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1691 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1692 MI.eraseFromParent();
1693 return true;
1694 };
1695 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1696 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1697 {MI.getOperand(2), MI.getOperand(3)});
1698 MI.eraseFromParent();
1699 return true;
1700 };
1701 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1702 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1703 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1704 MI.eraseFromParent();
1705 return true;
1706 };
1707
1708 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1709 switch (IntrinsicID) {
1710 case Intrinsic::vacopy: {
1711 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1712 unsigned VaListSize =
1713 (ST->isTargetDarwin() || ST->isTargetWindows())
1714 ? PtrSize
1715 : ST->isTargetILP32() ? 20 : 32;
1716
1717 MachineFunction &MF = *MI.getMF();
1719 LLT::scalar(VaListSize * 8));
1720 MIB.buildLoad(Val, MI.getOperand(2),
1723 VaListSize, Align(PtrSize)));
1724 MIB.buildStore(Val, MI.getOperand(1),
1727 VaListSize, Align(PtrSize)));
1728 MI.eraseFromParent();
1729 return true;
1730 }
1731 case Intrinsic::get_dynamic_area_offset: {
1732 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1733 MI.eraseFromParent();
1734 return true;
1735 }
1736 case Intrinsic::aarch64_mops_memset_tag: {
1737 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1738 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1739 // the instruction).
1740 auto &Value = MI.getOperand(3);
1741 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1742 Value.setReg(ExtValueReg);
1743 return true;
1744 }
1745 case Intrinsic::aarch64_prefetch: {
1746 auto &AddrVal = MI.getOperand(1);
1747
1748 int64_t IsWrite = MI.getOperand(2).getImm();
1749 int64_t Target = MI.getOperand(3).getImm();
1750 int64_t IsStream = MI.getOperand(4).getImm();
1751 int64_t IsData = MI.getOperand(5).getImm();
1752
1753 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1754 (!IsData << 3) | // IsDataCache bit
1755 (Target << 1) | // Cache level bits
1756 (unsigned)IsStream; // Stream bit
1757
1758 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1759 MI.eraseFromParent();
1760 return true;
1761 }
1762 case Intrinsic::aarch64_range_prefetch: {
1763 auto &AddrVal = MI.getOperand(1);
1764
1765 int64_t IsWrite = MI.getOperand(2).getImm();
1766 int64_t IsStream = MI.getOperand(3).getImm();
1767 unsigned PrfOp = (IsStream << 2) | IsWrite;
1768
1769 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1770 .addImm(PrfOp)
1771 .add(AddrVal)
1772 .addUse(MI.getOperand(4).getReg()); // Metadata
1773 MI.eraseFromParent();
1774 return true;
1775 }
1776 case Intrinsic::aarch64_prefetch_ir: {
1777 auto &AddrVal = MI.getOperand(1);
1778 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1779 MI.eraseFromParent();
1780 return true;
1781 }
1782 case Intrinsic::aarch64_neon_uaddv:
1783 case Intrinsic::aarch64_neon_saddv:
1784 case Intrinsic::aarch64_neon_umaxv:
1785 case Intrinsic::aarch64_neon_smaxv:
1786 case Intrinsic::aarch64_neon_uminv:
1787 case Intrinsic::aarch64_neon_sminv: {
1788 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1789 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1790 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1791
1792 auto OldDst = MI.getOperand(0).getReg();
1793 auto OldDstTy = MRI.getType(OldDst);
1794 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1795 if (OldDstTy == NewDstTy)
1796 return true;
1797
1798 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1799
1800 Helper.Observer.changingInstr(MI);
1801 MI.getOperand(0).setReg(NewDst);
1802 Helper.Observer.changedInstr(MI);
1803
1804 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1805 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1806 OldDst, NewDst);
1807
1808 return true;
1809 }
1810 case Intrinsic::aarch64_neon_uaddlp:
1811 case Intrinsic::aarch64_neon_saddlp: {
1812 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1813 ? AArch64::G_UADDLP
1814 : AArch64::G_SADDLP;
1815 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1816 MI.eraseFromParent();
1817
1818 return true;
1819 }
1820 case Intrinsic::aarch64_neon_uaddlv:
1821 case Intrinsic::aarch64_neon_saddlv: {
1822 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1823 ? AArch64::G_UADDLV
1824 : AArch64::G_SADDLV;
1825 Register DstReg = MI.getOperand(0).getReg();
1826 Register SrcReg = MI.getOperand(2).getReg();
1827 LLT DstTy = MRI.getType(DstReg);
1828
1829 LLT MidTy, ExtTy;
1830 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1831 MidTy = LLT::fixed_vector(4, 32);
1832 ExtTy = LLT::scalar(32);
1833 } else {
1834 MidTy = LLT::fixed_vector(2, 64);
1835 ExtTy = LLT::scalar(64);
1836 }
1837
1838 Register MidReg =
1839 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1840 Register ZeroReg =
1841 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1842 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1843 {MidReg, ZeroReg})
1844 .getReg(0);
1845
1846 if (DstTy.getScalarSizeInBits() < 32)
1847 MIB.buildTrunc(DstReg, ExtReg);
1848 else
1849 MIB.buildCopy(DstReg, ExtReg);
1850
1851 MI.eraseFromParent();
1852
1853 return true;
1854 }
1855 case Intrinsic::aarch64_neon_smax:
1856 return LowerBinOp(TargetOpcode::G_SMAX);
1857 case Intrinsic::aarch64_neon_smin:
1858 return LowerBinOp(TargetOpcode::G_SMIN);
1859 case Intrinsic::aarch64_neon_umax:
1860 return LowerBinOp(TargetOpcode::G_UMAX);
1861 case Intrinsic::aarch64_neon_umin:
1862 return LowerBinOp(TargetOpcode::G_UMIN);
1863 case Intrinsic::aarch64_neon_fmax:
1864 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1865 case Intrinsic::aarch64_neon_fmin:
1866 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1867 case Intrinsic::aarch64_neon_fmaxnm:
1868 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1869 case Intrinsic::aarch64_neon_fminnm:
1870 return LowerBinOp(TargetOpcode::G_FMINNUM);
1871 case Intrinsic::aarch64_neon_pmull:
1872 case Intrinsic::aarch64_neon_pmull64:
1873 return LowerBinOp(AArch64::G_PMULL);
1874 case Intrinsic::aarch64_neon_smull:
1875 return LowerBinOp(AArch64::G_SMULL);
1876 case Intrinsic::aarch64_neon_umull:
1877 return LowerBinOp(AArch64::G_UMULL);
1878 case Intrinsic::aarch64_neon_sabd:
1879 return LowerBinOp(TargetOpcode::G_ABDS);
1880 case Intrinsic::aarch64_neon_uabd:
1881 return LowerBinOp(TargetOpcode::G_ABDU);
1882 case Intrinsic::aarch64_neon_uhadd:
1883 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1884 case Intrinsic::aarch64_neon_urhadd:
1885 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1886 case Intrinsic::aarch64_neon_shadd:
1887 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1888 case Intrinsic::aarch64_neon_srhadd:
1889 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1890 case Intrinsic::aarch64_neon_sqshrn: {
1891 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1892 return true;
1893 // Create right shift instruction. Store the output register in Shr.
1894 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1895 {MRI.getType(MI.getOperand(2).getReg())},
1896 {MI.getOperand(2), MI.getOperand(3).getImm()});
1897 // Build the narrow intrinsic, taking in Shr.
1898 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1899 MI.eraseFromParent();
1900 return true;
1901 }
1902 case Intrinsic::aarch64_neon_sqshrun: {
1903 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1904 return true;
1905 // Create right shift instruction. Store the output register in Shr.
1906 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1907 {MRI.getType(MI.getOperand(2).getReg())},
1908 {MI.getOperand(2), MI.getOperand(3).getImm()});
1909 // Build the narrow intrinsic, taking in Shr.
1910 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1911 MI.eraseFromParent();
1912 return true;
1913 }
1914 case Intrinsic::aarch64_neon_sqrshrn: {
1915 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1916 return true;
1917 // Create right shift instruction. Store the output register in Shr.
1918 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1919 {MRI.getType(MI.getOperand(2).getReg())},
1920 {MI.getOperand(2), MI.getOperand(3).getImm()});
1921 // Build the narrow intrinsic, taking in Shr.
1922 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1923 MI.eraseFromParent();
1924 return true;
1925 }
1926 case Intrinsic::aarch64_neon_sqrshrun: {
1927 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1928 return true;
1929 // Create right shift instruction. Store the output register in Shr.
1930 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1931 {MRI.getType(MI.getOperand(2).getReg())},
1932 {MI.getOperand(2), MI.getOperand(3).getImm()});
1933 // Build the narrow intrinsic, taking in Shr.
1934 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1935 MI.eraseFromParent();
1936 return true;
1937 }
1938 case Intrinsic::aarch64_neon_uqrshrn: {
1939 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1940 return true;
1941 // Create right shift instruction. Store the output register in Shr.
1942 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
1943 {MRI.getType(MI.getOperand(2).getReg())},
1944 {MI.getOperand(2), MI.getOperand(3).getImm()});
1945 // Build the narrow intrinsic, taking in Shr.
1946 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
1947 MI.eraseFromParent();
1948 return true;
1949 }
1950 case Intrinsic::aarch64_neon_uqshrn: {
1951 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1952 return true;
1953 // Create right shift instruction. Store the output register in Shr.
1954 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
1955 {MRI.getType(MI.getOperand(2).getReg())},
1956 {MI.getOperand(2), MI.getOperand(3).getImm()});
1957 // Build the narrow intrinsic, taking in Shr.
1958 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
1959 MI.eraseFromParent();
1960 return true;
1961 }
1962 case Intrinsic::aarch64_neon_sqshlu: {
1963 // Check if last operand is constant vector dup
1964 auto ShiftAmount = isConstantOrConstantSplatVector(
1965 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
1966 if (ShiftAmount) {
1967 // If so, create a new intrinsic with the correct shift amount
1968 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
1969 {MI.getOperand(2)})
1970 .addImm(ShiftAmount->getSExtValue());
1971 MI.eraseFromParent();
1972 return true;
1973 }
1974 return false;
1975 }
1976 case Intrinsic::aarch64_neon_vsli: {
1977 MIB.buildInstr(
1978 AArch64::G_SLI, {MI.getOperand(0)},
1979 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
1980 MI.eraseFromParent();
1981 break;
1982 }
1983 case Intrinsic::aarch64_neon_vsri: {
1984 MIB.buildInstr(
1985 AArch64::G_SRI, {MI.getOperand(0)},
1986 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
1987 MI.eraseFromParent();
1988 break;
1989 }
1990 case Intrinsic::aarch64_neon_abs: {
1991 // Lower the intrinsic to G_ABS.
1992 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1993 MI.eraseFromParent();
1994 return true;
1995 }
1996 case Intrinsic::aarch64_neon_sqadd: {
1997 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1998 return LowerBinOp(TargetOpcode::G_SADDSAT);
1999 break;
2000 }
2001 case Intrinsic::aarch64_neon_sqsub: {
2002 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2003 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2004 break;
2005 }
2006 case Intrinsic::aarch64_neon_uqadd: {
2007 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2008 return LowerBinOp(TargetOpcode::G_UADDSAT);
2009 break;
2010 }
2011 case Intrinsic::aarch64_neon_uqsub: {
2012 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2013 return LowerBinOp(TargetOpcode::G_USUBSAT);
2014 break;
2015 }
2016 case Intrinsic::aarch64_neon_udot:
2017 return LowerTriOp(AArch64::G_UDOT);
2018 case Intrinsic::aarch64_neon_sdot:
2019 return LowerTriOp(AArch64::G_SDOT);
2020 case Intrinsic::aarch64_neon_usdot:
2021 return LowerTriOp(AArch64::G_USDOT);
2022 case Intrinsic::aarch64_neon_sqxtn:
2023 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2024 case Intrinsic::aarch64_neon_sqxtun:
2025 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2026 case Intrinsic::aarch64_neon_uqxtn:
2027 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2028 case Intrinsic::aarch64_neon_fcvtzu:
2029 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2030 case Intrinsic::aarch64_neon_fcvtzs:
2031 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2032
2033 case Intrinsic::vector_reverse:
2034 // TODO: Add support for vector_reverse
2035 return false;
2036 }
2037
2038 return true;
2039}
2040
2041bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2043 GISelChangeObserver &Observer) const {
2044 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2045 MI.getOpcode() == TargetOpcode::G_LSHR ||
2046 MI.getOpcode() == TargetOpcode::G_SHL);
2047 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2048 // imported patterns can select it later. Either way, it will be legal.
2049 Register AmtReg = MI.getOperand(2).getReg();
2050 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2051 if (!VRegAndVal)
2052 return true;
2053 // Check the shift amount is in range for an immediate form.
2054 int64_t Amount = VRegAndVal->Value.getSExtValue();
2055 if (Amount > 31)
2056 return true; // This will have to remain a register variant.
2057 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
2058 Observer.changingInstr(MI);
2059 MI.getOperand(2).setReg(ExtCst.getReg(0));
2060 Observer.changedInstr(MI);
2061 return true;
2062}
2063
2065 MachineRegisterInfo &MRI) {
2066 Base = Root;
2067 Offset = 0;
2068
2069 Register NewBase;
2070 int64_t NewOffset;
2071 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2072 isShiftedInt<7, 3>(NewOffset)) {
2073 Base = NewBase;
2074 Offset = NewOffset;
2075 }
2076}
2077
2078// FIXME: This should be removed and replaced with the generic bitcast legalize
2079// action.
2080bool AArch64LegalizerInfo::legalizeLoadStore(
2082 GISelChangeObserver &Observer) const {
2083 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2084 MI.getOpcode() == TargetOpcode::G_LOAD);
2085 // Here we just try to handle vector loads/stores where our value type might
2086 // have pointer elements, which the SelectionDAG importer can't handle. To
2087 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2088 // the value to use s64 types.
2089
2090 // Custom legalization requires the instruction, if not deleted, must be fully
2091 // legalized. In order to allow further legalization of the inst, we create
2092 // a new instruction and erase the existing one.
2093
2094 Register ValReg = MI.getOperand(0).getReg();
2095 const LLT ValTy = MRI.getType(ValReg);
2096
2097 if (ValTy == LLT::scalar(128)) {
2098
2099 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2100 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2101 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2102 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2103 bool IsRcpC3 =
2104 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2105
2106 LLT s64 = LLT::scalar(64);
2107
2108 unsigned Opcode;
2109 if (IsRcpC3) {
2110 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2111 } else {
2112 // For LSE2, loads/stores should have been converted to monotonic and had
2113 // a fence inserted after them.
2114 assert(Ordering == AtomicOrdering::Monotonic ||
2115 Ordering == AtomicOrdering::Unordered);
2116 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2117
2118 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2119 }
2120
2121 MachineInstrBuilder NewI;
2122 if (IsLoad) {
2123 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2124 MIRBuilder.buildMergeLikeInstr(
2125 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2126 } else {
2127 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2128 NewI = MIRBuilder.buildInstr(
2129 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2130 }
2131
2132 if (IsRcpC3) {
2133 NewI.addUse(MI.getOperand(1).getReg());
2134 } else {
2135 Register Base;
2136 int Offset;
2137 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2138 NewI.addUse(Base);
2139 NewI.addImm(Offset / 8);
2140 }
2141
2142 NewI.cloneMemRefs(MI);
2143 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2144 *MRI.getTargetRegisterInfo(),
2145 *ST->getRegBankInfo());
2146 MI.eraseFromParent();
2147 return true;
2148 }
2149
2150 if (!ValTy.isPointerVector() ||
2151 ValTy.getElementType().getAddressSpace() != 0) {
2152 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2153 return false;
2154 }
2155
2156 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2157 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
2158 auto &MMO = **MI.memoperands_begin();
2159 MMO.setType(NewTy);
2160
2161 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2162 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2163 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2164 } else {
2165 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2166 MIRBuilder.buildBitcast(ValReg, NewLoad);
2167 }
2168 MI.eraseFromParent();
2169 return true;
2170}
2171
2172bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2174 MachineIRBuilder &MIRBuilder) const {
2175 MachineFunction &MF = MIRBuilder.getMF();
2176 Align Alignment(MI.getOperand(2).getImm());
2177 Register Dst = MI.getOperand(0).getReg();
2178 Register ListPtr = MI.getOperand(1).getReg();
2179
2180 LLT PtrTy = MRI.getType(ListPtr);
2181 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2182
2183 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2184 const Align PtrAlign = Align(PtrSize);
2185 auto List = MIRBuilder.buildLoad(
2186 PtrTy, ListPtr,
2187 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2188 PtrTy, PtrAlign));
2189
2190 MachineInstrBuilder DstPtr;
2191 if (Alignment > PtrAlign) {
2192 // Realign the list to the actual required alignment.
2193 auto AlignMinus1 =
2194 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2195 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2196 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2197 } else
2198 DstPtr = List;
2199
2200 LLT ValTy = MRI.getType(Dst);
2201 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2202 MIRBuilder.buildLoad(
2203 Dst, DstPtr,
2204 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2205 ValTy, std::max(Alignment, PtrAlign)));
2206
2207 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2208
2209 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2210
2211 MIRBuilder.buildStore(NewList, ListPtr,
2212 *MF.getMachineMemOperand(MachinePointerInfo(),
2214 PtrTy, PtrAlign));
2215
2216 MI.eraseFromParent();
2217 return true;
2218}
2219
2220bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2221 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2222 // Only legal if we can select immediate forms.
2223 // TODO: Lower this otherwise.
2224 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2225 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2226}
2227
2228bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2230 LegalizerHelper &Helper) const {
2231 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2232 // it can be more efficiently lowered to the following sequence that uses
2233 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2234 // registers are cheap.
2235 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2236 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2237 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2238 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2239 //
2240 // For 128 bit vector popcounts, we lower to the following sequence:
2241 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2242 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2243 // uaddlp.4s v0, v0 // v4s32, v2s64
2244 // uaddlp.2d v0, v0 // v2s64
2245 //
2246 // For 64 bit vector popcounts, we lower to the following sequence:
2247 // cnt.8b v0, v0 // v4s16, v2s32
2248 // uaddlp.4h v0, v0 // v4s16, v2s32
2249 // uaddlp.2s v0, v0 // v2s32
2250
2251 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2252 Register Dst = MI.getOperand(0).getReg();
2253 Register Val = MI.getOperand(1).getReg();
2254 LLT Ty = MRI.getType(Val);
2255 unsigned Size = Ty.getSizeInBits();
2256
2257 assert(Ty == MRI.getType(Dst) &&
2258 "Expected src and dst to have the same type!");
2259
2260 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2261 LLT s64 = LLT::scalar(64);
2262
2263 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2264 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2265 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2266 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2267
2268 MIRBuilder.buildZExt(Dst, Add);
2269 MI.eraseFromParent();
2270 return true;
2271 }
2272
2273 if (!ST->hasNEON() ||
2274 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2275 // Use generic lowering when custom lowering is not possible.
2276 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2277 Helper.lowerBitCount(MI) ==
2279 }
2280
2281 // Pre-conditioning: widen Val up to the nearest vector type.
2282 // s32,s64,v4s16,v2s32 -> v8i8
2283 // v8s16,v4s32,v2s64 -> v16i8
2284 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2285 if (Ty.isScalar()) {
2286 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2287 if (Size == 32) {
2288 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2289 }
2290 }
2291 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2292
2293 // Count bits in each byte-sized lane.
2294 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2295
2296 // Sum across lanes.
2297
2298 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2299 Ty.getScalarSizeInBits() != 16) {
2300 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2301 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2302 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2303 MachineInstrBuilder Sum;
2304
2305 if (Ty == LLT::fixed_vector(2, 64)) {
2306 auto UDOT =
2307 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2308 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2309 } else if (Ty == LLT::fixed_vector(4, 32)) {
2310 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2311 } else if (Ty == LLT::fixed_vector(2, 32)) {
2312 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2313 } else {
2314 llvm_unreachable("unexpected vector shape");
2315 }
2316
2317 Sum->getOperand(0).setReg(Dst);
2318 MI.eraseFromParent();
2319 return true;
2320 }
2321
2322 Register HSum = CTPOP.getReg(0);
2323 unsigned Opc;
2324 SmallVector<LLT> HAddTys;
2325 if (Ty.isScalar()) {
2326 Opc = Intrinsic::aarch64_neon_uaddlv;
2327 HAddTys.push_back(LLT::scalar(32));
2328 } else if (Ty == LLT::fixed_vector(8, 16)) {
2329 Opc = Intrinsic::aarch64_neon_uaddlp;
2330 HAddTys.push_back(LLT::fixed_vector(8, 16));
2331 } else if (Ty == LLT::fixed_vector(4, 32)) {
2332 Opc = Intrinsic::aarch64_neon_uaddlp;
2333 HAddTys.push_back(LLT::fixed_vector(8, 16));
2334 HAddTys.push_back(LLT::fixed_vector(4, 32));
2335 } else if (Ty == LLT::fixed_vector(2, 64)) {
2336 Opc = Intrinsic::aarch64_neon_uaddlp;
2337 HAddTys.push_back(LLT::fixed_vector(8, 16));
2338 HAddTys.push_back(LLT::fixed_vector(4, 32));
2339 HAddTys.push_back(LLT::fixed_vector(2, 64));
2340 } else if (Ty == LLT::fixed_vector(4, 16)) {
2341 Opc = Intrinsic::aarch64_neon_uaddlp;
2342 HAddTys.push_back(LLT::fixed_vector(4, 16));
2343 } else if (Ty == LLT::fixed_vector(2, 32)) {
2344 Opc = Intrinsic::aarch64_neon_uaddlp;
2345 HAddTys.push_back(LLT::fixed_vector(4, 16));
2346 HAddTys.push_back(LLT::fixed_vector(2, 32));
2347 } else
2348 llvm_unreachable("unexpected vector shape");
2350 for (LLT HTy : HAddTys) {
2351 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2352 HSum = UADD.getReg(0);
2353 }
2354
2355 // Post-conditioning.
2356 if (Ty.isScalar() && (Size == 64 || Size == 128))
2357 MIRBuilder.buildZExt(Dst, UADD);
2358 else
2359 UADD->getOperand(0).setReg(Dst);
2360 MI.eraseFromParent();
2361 return true;
2362}
2363
2364bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2365 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2366 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2367 LLT s64 = LLT::scalar(64);
2368 auto Addr = MI.getOperand(1).getReg();
2369 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2370 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2371 auto DstLo = MRI.createGenericVirtualRegister(s64);
2372 auto DstHi = MRI.createGenericVirtualRegister(s64);
2373
2374 MachineInstrBuilder CAS;
2375 if (ST->hasLSE()) {
2376 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2377 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2378 // the rest of the MIR so we must reassemble the extracted registers into a
2379 // 128-bit known-regclass one with code like this:
2380 //
2381 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2382 // %out = CASP %in1, ...
2383 // %OldLo = G_EXTRACT %out, 0
2384 // %OldHi = G_EXTRACT %out, 64
2385 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2386 unsigned Opcode;
2387 switch (Ordering) {
2389 Opcode = AArch64::CASPAX;
2390 break;
2392 Opcode = AArch64::CASPLX;
2393 break;
2396 Opcode = AArch64::CASPALX;
2397 break;
2398 default:
2399 Opcode = AArch64::CASPX;
2400 break;
2401 }
2402
2403 LLT s128 = LLT::scalar(128);
2404 auto CASDst = MRI.createGenericVirtualRegister(s128);
2405 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2406 auto CASNew = MRI.createGenericVirtualRegister(s128);
2407 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2408 .addUse(DesiredI->getOperand(0).getReg())
2409 .addImm(AArch64::sube64)
2410 .addUse(DesiredI->getOperand(1).getReg())
2411 .addImm(AArch64::subo64);
2412 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2413 .addUse(NewI->getOperand(0).getReg())
2414 .addImm(AArch64::sube64)
2415 .addUse(NewI->getOperand(1).getReg())
2416 .addImm(AArch64::subo64);
2417
2418 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2419
2420 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2421 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2422 } else {
2423 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2424 // can take arbitrary registers so it just has the normal GPR64 operands the
2425 // rest of AArch64 is expecting.
2426 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2427 unsigned Opcode;
2428 switch (Ordering) {
2430 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2431 break;
2433 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2434 break;
2437 Opcode = AArch64::CMP_SWAP_128;
2438 break;
2439 default:
2440 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2441 break;
2442 }
2443
2444 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2445 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2446 {Addr, DesiredI->getOperand(0),
2447 DesiredI->getOperand(1), NewI->getOperand(0),
2448 NewI->getOperand(1)});
2449 }
2450
2451 CAS.cloneMemRefs(MI);
2452 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2453 *MRI.getTargetRegisterInfo(),
2454 *ST->getRegBankInfo());
2455
2456 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2457 MI.eraseFromParent();
2458 return true;
2459}
2460
2461bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2462 LegalizerHelper &Helper) const {
2463 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2464 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2465 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2466 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2467 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2468 MI.eraseFromParent();
2469 return true;
2470}
2471
2472bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2473 LegalizerHelper &Helper) const {
2474 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2475
2476 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2477 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2478 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2479 // the instruction).
2480 auto &Value = MI.getOperand(1);
2481 Register ExtValueReg =
2482 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2483 Value.setReg(ExtValueReg);
2484 return true;
2485 }
2486
2487 return false;
2488}
2489
2490bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2491 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2492 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2493 auto VRegAndVal =
2495 if (VRegAndVal)
2496 return true;
2497 LLT VecTy = MRI.getType(Element->getVectorReg());
2498 if (VecTy.isScalableVector())
2499 return true;
2500 return Helper.lowerExtractInsertVectorElt(MI) !=
2502}
2503
2504bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2505 MachineInstr &MI, LegalizerHelper &Helper) const {
2506 MachineFunction &MF = *MI.getParent()->getParent();
2507 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2508 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2509
2510 // If stack probing is not enabled for this function, use the default
2511 // lowering.
2512 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2513 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2514 "inline-asm") {
2515 Helper.lowerDynStackAlloc(MI);
2516 return true;
2517 }
2518
2519 Register Dst = MI.getOperand(0).getReg();
2520 Register AllocSize = MI.getOperand(1).getReg();
2521 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2522
2523 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2524 "Unexpected type for dynamic alloca");
2525 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2526 "Unexpected type for dynamic alloca");
2527
2528 LLT PtrTy = MRI.getType(Dst);
2529 Register SPReg =
2531 Register SPTmp =
2532 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2533 auto NewMI =
2534 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2535 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2536 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2537 MIRBuilder.buildCopy(Dst, SPTmp);
2538
2539 MI.eraseFromParent();
2540 return true;
2541}
2542
2543bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2544 LegalizerHelper &Helper) const {
2545 MachineIRBuilder &MIB = Helper.MIRBuilder;
2546 auto &AddrVal = MI.getOperand(0);
2547
2548 int64_t IsWrite = MI.getOperand(1).getImm();
2549 int64_t Locality = MI.getOperand(2).getImm();
2550 int64_t IsData = MI.getOperand(3).getImm();
2551
2552 bool IsStream = Locality == 0;
2553 if (Locality != 0) {
2554 assert(Locality <= 3 && "Prefetch locality out-of-range");
2555 // The locality degree is the opposite of the cache speed.
2556 // Put the number the other way around.
2557 // The encoding starts at 0 for level 1
2558 Locality = 3 - Locality;
2559 }
2560
2561 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2562
2563 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2564 MI.eraseFromParent();
2565 return true;
2566}
2567
2568bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2569 MachineIRBuilder &MIRBuilder,
2570 MachineRegisterInfo &MRI) const {
2571 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2572 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2573 "Expected a power of 2 elements");
2574
2575 LLT s16 = LLT::scalar(16);
2576 LLT s32 = LLT::scalar(32);
2577 LLT s64 = LLT::scalar(64);
2578 LLT v2s16 = LLT::fixed_vector(2, s16);
2579 LLT v4s16 = LLT::fixed_vector(4, s16);
2580 LLT v2s32 = LLT::fixed_vector(2, s32);
2581 LLT v4s32 = LLT::fixed_vector(4, s32);
2582 LLT v2s64 = LLT::fixed_vector(2, s64);
2583
2584 SmallVector<Register> RegsToUnmergeTo;
2585 SmallVector<Register> TruncOddDstRegs;
2586 SmallVector<Register> RegsToMerge;
2587
2588 unsigned ElemCount = SrcTy.getNumElements();
2589
2590 // Find the biggest size chunks we can work with
2591 int StepSize = ElemCount % 4 ? 2 : 4;
2592
2593 // If we have a power of 2 greater than 2, we need to first unmerge into
2594 // enough pieces
2595 if (ElemCount <= 2)
2596 RegsToUnmergeTo.push_back(Src);
2597 else {
2598 for (unsigned i = 0; i < ElemCount / 2; ++i)
2599 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2600
2601 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2602 }
2603
2604 // Create all of the round-to-odd instructions and store them
2605 for (auto SrcReg : RegsToUnmergeTo) {
2606 Register Mid =
2607 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2608 .getReg(0);
2609 TruncOddDstRegs.push_back(Mid);
2610 }
2611
2612 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2613 // truncate 2s32 to 2s16.
2614 unsigned Index = 0;
2615 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2616 if (StepSize == 4) {
2617 Register ConcatDst =
2618 MIRBuilder
2620 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2621 .getReg(0);
2622
2623 RegsToMerge.push_back(
2624 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2625 } else {
2626 RegsToMerge.push_back(
2627 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2628 }
2629 }
2630
2631 // If there is only one register, replace the destination
2632 if (RegsToMerge.size() == 1) {
2633 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2634 MI.eraseFromParent();
2635 return true;
2636 }
2637
2638 // Merge the rest of the instructions & replace the register
2639 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2640 MRI.replaceRegWith(Dst, Fin);
2641 MI.eraseFromParent();
2642 return true;
2643}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1697
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
CodeModel::Model getCodeModel() const
Returns the code model.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1565
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...