LLVM 23.0.0git
AArch64LegalizerInfo.cpp
Go to the documentation of this file.
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64Subtarget.h"
16#include "llvm/ADT/STLExtras.h"
28#include "llvm/IR/Intrinsics.h"
29#include "llvm/IR/IntrinsicsAArch64.h"
30#include "llvm/IR/Type.h"
32#include <initializer_list>
33
34#define DEBUG_TYPE "aarch64-legalinfo"
35
36using namespace llvm;
37using namespace LegalizeActions;
38using namespace LegalizeMutations;
39using namespace LegalityPredicates;
40using namespace MIPatternMatch;
41
43 : ST(&ST) {
44 using namespace TargetOpcode;
45 const LLT p0 = LLT::pointer(0, 64);
46 const LLT s8 = LLT::scalar(8);
47 const LLT s16 = LLT::scalar(16);
48 const LLT s32 = LLT::scalar(32);
49 const LLT s64 = LLT::scalar(64);
50 const LLT s128 = LLT::scalar(128);
51 const LLT v16s8 = LLT::fixed_vector(16, 8);
52 const LLT v8s8 = LLT::fixed_vector(8, 8);
53 const LLT v4s8 = LLT::fixed_vector(4, 8);
54 const LLT v2s8 = LLT::fixed_vector(2, 8);
55 const LLT v8s16 = LLT::fixed_vector(8, 16);
56 const LLT v4s16 = LLT::fixed_vector(4, 16);
57 const LLT v2s16 = LLT::fixed_vector(2, 16);
58 const LLT v2s32 = LLT::fixed_vector(2, 32);
59 const LLT v4s32 = LLT::fixed_vector(4, 32);
60 const LLT v2s64 = LLT::fixed_vector(2, 64);
61 const LLT v2p0 = LLT::fixed_vector(2, p0);
62
63 const LLT nxv16s8 = LLT::scalable_vector(16, s8);
64 const LLT nxv8s16 = LLT::scalable_vector(8, s16);
65 const LLT nxv4s32 = LLT::scalable_vector(4, s32);
66 const LLT nxv2s64 = LLT::scalable_vector(2, s64);
67
68 std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
69 v16s8, v8s16, v4s32,
70 v2s64, v2p0,
71 /* End 128bit types */
72 /* Begin 64bit types */
73 v8s8, v4s16, v2s32};
74 std::initializer_list<LLT> ScalarAndPtrTypesList = {s8, s16, s32, s64, p0};
75 SmallVector<LLT, 8> PackedVectorAllTypesVec(PackedVectorAllTypeList);
76 SmallVector<LLT, 8> ScalarAndPtrTypesVec(ScalarAndPtrTypesList);
77
78 const TargetMachine &TM = ST.getTargetLowering()->getTargetMachine();
79
80 // FIXME: support subtargets which have neon/fp-armv8 disabled.
81 if (!ST.hasNEON() || !ST.hasFPARMv8()) {
83 return;
84 }
85
86 // Some instructions only support s16 if the subtarget has full 16-bit FP
87 // support.
88 const bool HasFP16 = ST.hasFullFP16();
89 const LLT &MinFPScalar = HasFP16 ? s16 : s32;
90
91 const bool HasCSSC = ST.hasCSSC();
92 const bool HasRCPC3 = ST.hasRCPC3();
93 const bool HasSVE = ST.hasSVE();
94
96 {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
97 .legalFor({p0, s8, s16, s32, s64})
98 .legalFor({v2s8, v4s8, v8s8, v16s8, v2s16, v4s16, v8s16, v2s32, v4s32,
99 v2s64, v2p0})
100 .widenScalarToNextPow2(0)
101 .clampScalar(0, s8, s64)
104 .clampNumElements(0, v8s8, v16s8)
105 .clampNumElements(0, v4s16, v8s16)
106 .clampNumElements(0, v2s32, v4s32)
107 .clampMaxNumElements(0, s64, 2)
108 .clampMaxNumElements(0, p0, 2)
110
112 .legalFor({p0, s16, s32, s64})
113 .legalFor(PackedVectorAllTypeList)
117 .clampScalar(0, s16, s64)
118 .clampNumElements(0, v8s8, v16s8)
119 .clampNumElements(0, v4s16, v8s16)
120 .clampNumElements(0, v2s32, v4s32)
121 .clampMaxNumElements(0, s64, 2)
122 .clampMaxNumElements(0, p0, 2);
123
125 .legalIf(all(typeInSet(0, {s32, s64, p0}), typeInSet(1, {s8, s16, s32}),
126 smallerThan(1, 0)))
127 .widenScalarToNextPow2(0)
128 .clampScalar(0, s32, s64)
130 .minScalar(1, s8)
131 .maxScalarIf(typeInSet(0, {s32}), 1, s16)
132 .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
133
135 .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
136 typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
137 .widenScalarToNextPow2(1)
138 .clampScalar(1, s32, s128)
140 .minScalar(0, s16)
141 .maxScalarIf(typeInSet(1, {s32}), 0, s16)
142 .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
143 .maxScalarIf(typeInSet(1, {s128}), 0, s64);
144
145 getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
146 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
147 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
148 .widenScalarToNextPow2(0)
149 .clampScalar(0, s32, s64)
150 .clampMaxNumElements(0, s8, 16)
151 .clampMaxNumElements(0, s16, 8)
152 .clampNumElements(0, v2s32, v4s32)
153 .clampNumElements(0, v2s64, v2s64)
155 [=](const LegalityQuery &Query) {
156 return Query.Types[0].getNumElements() <= 2;
157 },
158 0, s32)
159 .minScalarOrEltIf(
160 [=](const LegalityQuery &Query) {
161 return Query.Types[0].getNumElements() <= 4;
162 },
163 0, s16)
164 .minScalarOrEltIf(
165 [=](const LegalityQuery &Query) {
166 return Query.Types[0].getNumElements() <= 16;
167 },
168 0, s8)
169 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
171
173 .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
174 .widenScalarToNextPow2(0)
175 .clampScalar(0, s32, s64)
176 .clampMaxNumElements(0, s8, 16)
177 .clampMaxNumElements(0, s16, 8)
178 .clampNumElements(0, v2s32, v4s32)
179 .clampNumElements(0, v2s64, v2s64)
181 [=](const LegalityQuery &Query) {
182 return Query.Types[0].getNumElements() <= 2;
183 },
184 0, s32)
185 .minScalarOrEltIf(
186 [=](const LegalityQuery &Query) {
187 return Query.Types[0].getNumElements() <= 4;
188 },
189 0, s16)
190 .minScalarOrEltIf(
191 [=](const LegalityQuery &Query) {
192 return Query.Types[0].getNumElements() <= 16;
193 },
194 0, s8)
195 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
197
198 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
199 .customIf([=](const LegalityQuery &Query) {
200 const auto &SrcTy = Query.Types[0];
201 const auto &AmtTy = Query.Types[1];
202 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
203 AmtTy.getSizeInBits() == 32;
204 })
205 .legalFor({
206 {s32, s32},
207 {s32, s64},
208 {s64, s64},
209 {v8s8, v8s8},
210 {v16s8, v16s8},
211 {v4s16, v4s16},
212 {v8s16, v8s16},
213 {v2s32, v2s32},
214 {v4s32, v4s32},
215 {v2s64, v2s64},
216 })
217 .widenScalarToNextPow2(0)
218 .clampScalar(1, s32, s64)
219 .clampScalar(0, s32, s64)
220 .clampNumElements(0, v8s8, v16s8)
221 .clampNumElements(0, v4s16, v8s16)
222 .clampNumElements(0, v2s32, v4s32)
223 .clampNumElements(0, v2s64, v2s64)
225 .minScalarSameAs(1, 0)
229
231 .legalFor({{p0, s64}, {v2p0, v2s64}})
232 .clampScalarOrElt(1, s64, s64)
233 .clampNumElements(0, v2p0, v2p0);
234
235 getActionDefinitionsBuilder(G_PTRMASK).legalFor({{p0, s64}});
236
237 getActionDefinitionsBuilder({G_SDIV, G_UDIV})
238 .legalFor({s32, s64})
239 .libcallFor({s128})
240 .clampScalar(0, s32, s64)
242 .scalarize(0);
243
244 getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
245 .lowerFor({s8, s16, s32, s64, v2s32, v4s32, v2s64})
246 .libcallFor({s128})
248 .minScalarOrElt(0, s32)
249 .clampNumElements(0, v2s32, v4s32)
250 .clampNumElements(0, v2s64, v2s64)
251 .scalarize(0);
252
253 getActionDefinitionsBuilder({G_SMULO, G_UMULO})
254 .widenScalarToNextPow2(0, /*Min = */ 32)
255 .clampScalar(0, s32, s64)
256 .lower();
257
258 getActionDefinitionsBuilder({G_SMULH, G_UMULH})
259 .legalFor({s64, v16s8, v8s16, v4s32})
260 .lower();
261
262 getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
263 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
264 .legalFor(HasCSSC, {s32, s64})
265 .minScalar(HasCSSC, 0, s32)
266 .clampNumElements(0, v8s8, v16s8)
267 .clampNumElements(0, v4s16, v8s16)
268 .clampNumElements(0, v2s32, v4s32)
269 .lower();
270
271 // FIXME: Legal vector types are only legal with NEON.
273 .legalFor(HasCSSC, {s32, s64})
274 .legalFor(PackedVectorAllTypeList)
275 .customIf([=](const LegalityQuery &Q) {
276 // TODO: Fix suboptimal codegen for 128+ bit types.
277 LLT SrcTy = Q.Types[0];
278 return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
279 })
280 .widenScalarIf(
281 [=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
282 [=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
283 .widenScalarIf(
284 [=](const LegalityQuery &Query) { return Query.Types[0] == v2s16; },
285 [=](const LegalityQuery &Query) { return std::make_pair(0, v2s32); })
286 .clampNumElements(0, v8s8, v16s8)
287 .clampNumElements(0, v4s16, v8s16)
288 .clampNumElements(0, v2s32, v4s32)
289 .clampNumElements(0, v2s64, v2s64)
291 .lower();
292
294 {G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
295 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
296 .lower();
297
299 {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
300 .legalFor({{s32, s32}, {s64, s32}})
301 .clampScalar(0, s32, s64)
302 .clampScalar(1, s32, s64)
304
305 getActionDefinitionsBuilder({G_FSHL, G_FSHR})
306 .customFor({{s32, s32}, {s32, s64}, {s64, s64}})
307 .lower();
308
310 .legalFor({{s32, s64}, {s64, s64}})
311 .customIf([=](const LegalityQuery &Q) {
312 return Q.Types[0].isScalar() && Q.Types[1].getScalarSizeInBits() < 64;
313 })
314 .lower();
316
317 getActionDefinitionsBuilder({G_SBFX, G_UBFX})
318 .customFor({{s32, s32}, {s64, s64}});
319
320 auto always = [=](const LegalityQuery &Q) { return true; };
322 .legalFor(HasCSSC, {{s32, s32}, {s64, s64}})
323 .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
324 .customFor(!HasCSSC, {{s32, s32}, {s64, s64}})
325 .customFor({{s128, s128},
326 {v4s16, v4s16},
327 {v8s16, v8s16},
328 {v2s32, v2s32},
329 {v4s32, v4s32},
330 {v2s64, v2s64}})
331 .clampScalar(0, s32, s128)
334 .minScalarEltSameAsIf(always, 1, 0)
335 .maxScalarEltSameAsIf(always, 1, 0)
336 .clampNumElements(0, v8s8, v16s8)
337 .clampNumElements(0, v4s16, v8s16)
338 .clampNumElements(0, v2s32, v4s32)
339 .clampNumElements(0, v2s64, v2s64)
342
343 getActionDefinitionsBuilder({G_CTLZ, G_CTLS})
344 .legalFor({{s32, s32},
345 {s64, s64},
346 {v8s8, v8s8},
347 {v16s8, v16s8},
348 {v4s16, v4s16},
349 {v8s16, v8s16},
350 {v2s32, v2s32},
351 {v4s32, v4s32}})
352 .widenScalarToNextPow2(1, /*Min=*/32)
353 .clampScalar(1, s32, s64)
355 .clampNumElements(0, v8s8, v16s8)
356 .clampNumElements(0, v4s16, v8s16)
357 .clampNumElements(0, v2s32, v4s32)
360 .scalarSameSizeAs(0, 1);
361
362 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
363
365 .lowerIf(isVector(0))
366 .widenScalarToNextPow2(1, /*Min=*/32)
367 .clampScalar(1, s32, s64)
368 .scalarSameSizeAs(0, 1)
369 .legalFor(HasCSSC, {s32, s64})
370 .customFor(!HasCSSC, {s32, s64});
371
372 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
373
374 getActionDefinitionsBuilder(G_BITREVERSE)
375 .legalFor({s32, s64, v8s8, v16s8})
376 .widenScalarToNextPow2(0, /*Min = */ 32)
378 .clampScalar(0, s32, s64)
379 .clampNumElements(0, v8s8, v16s8)
380 .clampNumElements(0, v4s16, v8s16)
381 .clampNumElements(0, v2s32, v4s32)
382 .clampNumElements(0, v2s64, v2s64)
385 .lower();
386
388 .legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
390 .clampScalar(0, s32, s64)
391 .clampNumElements(0, v4s16, v8s16)
392 .clampNumElements(0, v2s32, v4s32)
393 .clampNumElements(0, v2s64, v2s64)
395
396 getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
397 .legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64})
398 .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
399 .clampNumElements(0, v8s8, v16s8)
400 .clampNumElements(0, v4s16, v8s16)
401 .clampNumElements(0, v2s32, v4s32)
402 .clampMaxNumElements(0, s64, 2)
405 .lower();
406
408 {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
409 G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
410 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
411 .legalFor({s32, s64, v2s32, v4s32, v2s64})
412 .legalFor(HasFP16, {s16, v4s16, v8s16})
413 .libcallFor({s128})
414 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
415 .minScalarOrElt(0, MinFPScalar)
416 .clampNumElements(0, v4s16, v8s16)
417 .clampNumElements(0, v2s32, v4s32)
418 .clampNumElements(0, v2s64, v2s64)
420
421 getActionDefinitionsBuilder({G_FABS, G_FNEG})
422 .legalFor({s32, s64, v2s32, v4s32, v2s64})
423 .legalFor(HasFP16, {s16, v4s16, v8s16})
424 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
426 .clampNumElements(0, v4s16, v8s16)
427 .clampNumElements(0, v2s32, v4s32)
428 .clampNumElements(0, v2s64, v2s64)
430 .lowerFor({s16, v4s16, v8s16});
431
433 .libcallFor({s32, s64, s128})
434 .minScalar(0, s32)
435 .scalarize(0);
436
437 getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
438 G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
439 G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
440 G_FSINH, G_FTANH, G_FMODF})
441 // We need a call for these, so we always need to scalarize.
442 .scalarize(0)
443 // Regardless of FP16 support, widen 16-bit elements to 32-bits.
444 .minScalar(0, s32)
445 .libcallFor({s32, s64, s128});
446 getActionDefinitionsBuilder({G_FPOWI, G_FLDEXP})
447 .scalarize(0)
448 .minScalar(0, s32)
449 .libcallFor({{s32, s32}, {s64, s32}, {s128, s32}});
450
451 getActionDefinitionsBuilder({G_LROUND, G_INTRINSIC_LRINT})
452 .legalFor({{s32, s32}, {s32, s64}, {s64, s32}, {s64, s64}})
453 .legalFor(HasFP16, {{s32, s16}, {s64, s16}})
454 .minScalar(1, s32)
455 .libcallFor({{s64, s128}})
456 .lower();
457 getActionDefinitionsBuilder({G_LLROUND, G_INTRINSIC_LLRINT})
458 .legalFor({{s64, s32}, {s64, s64}})
459 .legalFor(HasFP16, {{s64, s16}})
460 .minScalar(0, s64)
461 .minScalar(1, s32)
462 .libcallFor({{s64, s128}})
463 .lower();
464
465 // TODO: Custom legalization for mismatched types.
466 getActionDefinitionsBuilder(G_FCOPYSIGN)
468 [](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
469 [=](const LegalityQuery &Query) {
470 const LLT Ty = Query.Types[0];
471 return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
472 })
473 .lower();
474
476
477 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
478 auto &Actions = getActionDefinitionsBuilder(Op);
479
480 if (Op == G_SEXTLOAD)
482
483 // Atomics have zero extending behavior.
484 Actions
485 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
486 {s32, p0, s16, 8},
487 {s32, p0, s32, 8},
488 {s64, p0, s8, 2},
489 {s64, p0, s16, 2},
490 {s64, p0, s32, 4},
491 {s64, p0, s64, 8},
492 {p0, p0, s64, 8},
493 {v2s32, p0, s64, 8}})
494 .widenScalarToNextPow2(0)
495 .clampScalar(0, s32, s64)
496 // TODO: We could support sum-of-pow2's but the lowering code doesn't know
497 // how to do that yet.
498 .unsupportedIfMemSizeNotPow2()
499 // Lower anything left over into G_*EXT and G_LOAD
500 .lower();
501 }
502
503 auto IsPtrVecPred = [=](const LegalityQuery &Query) {
504 const LLT &ValTy = Query.Types[0];
505 return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0;
506 };
507
509 .customIf([=](const LegalityQuery &Query) {
510 return HasRCPC3 && Query.Types[0] == s128 &&
511 Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
512 })
513 .customIf([=](const LegalityQuery &Query) {
514 return Query.Types[0] == s128 &&
515 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
516 })
517 .legalForTypesWithMemDesc({{s8, p0, s8, 8},
518 {s16, p0, s16, 8},
519 {s32, p0, s32, 8},
520 {s64, p0, s64, 8},
521 {p0, p0, s64, 8},
522 {s128, p0, s128, 8},
523 {v8s8, p0, s64, 8},
524 {v16s8, p0, s128, 8},
525 {v4s16, p0, s64, 8},
526 {v8s16, p0, s128, 8},
527 {v2s32, p0, s64, 8},
528 {v4s32, p0, s128, 8},
529 {v2s64, p0, s128, 8}})
530 // These extends are also legal
531 .legalForTypesWithMemDesc(
532 {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
533 .legalForTypesWithMemDesc({
534 // SVE vscale x 128 bit base sizes
535 {nxv16s8, p0, nxv16s8, 8},
536 {nxv8s16, p0, nxv8s16, 8},
537 {nxv4s32, p0, nxv4s32, 8},
538 {nxv2s64, p0, nxv2s64, 8},
539 })
540 .widenScalarToNextPow2(0, /* MinSize = */ 8)
541 .clampMaxNumElements(0, s8, 16)
542 .clampMaxNumElements(0, s16, 8)
543 .clampMaxNumElements(0, s32, 4)
544 .clampMaxNumElements(0, s64, 2)
545 .clampMaxNumElements(0, p0, 2)
547 .clampScalar(0, s8, s64)
549 [=](const LegalityQuery &Query) {
550 // Clamp extending load results to 32-bits.
551 return Query.Types[0].isScalar() &&
552 Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
553 Query.Types[0].getSizeInBits() > 32;
554 },
555 changeTo(0, s32))
556 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
557 .bitcastIf(typeInSet(0, {v4s8}),
558 [=](const LegalityQuery &Query) {
559 const LLT VecTy = Query.Types[0];
560 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
561 })
562 .customIf(IsPtrVecPred)
563 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
564 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
565
567 .customIf([=](const LegalityQuery &Query) {
568 return HasRCPC3 && Query.Types[0] == s128 &&
569 Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
570 })
571 .customIf([=](const LegalityQuery &Query) {
572 return Query.Types[0] == s128 &&
573 Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
574 })
575 .widenScalarIf(
576 all(scalarNarrowerThan(0, 32),
578 changeTo(0, s32))
580 {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
581 {s32, p0, s8, 8}, // truncstorei8 from s32
582 {s64, p0, s8, 8}, // truncstorei8 from s64
583 {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
584 {s64, p0, s16, 8}, // truncstorei16 from s64
585 {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
586 {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
587 {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
588 {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
589 {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
590 .legalForTypesWithMemDesc({
591 // SVE vscale x 128 bit base sizes
592 // TODO: Add nxv2p0. Consider bitcastIf.
593 // See #92130
594 // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
595 {nxv16s8, p0, nxv16s8, 8},
596 {nxv8s16, p0, nxv8s16, 8},
597 {nxv4s32, p0, nxv4s32, 8},
598 {nxv2s64, p0, nxv2s64, 8},
599 })
600 .clampScalar(0, s8, s64)
601 .minScalarOrElt(0, s8)
602 .lowerIf([=](const LegalityQuery &Query) {
603 return Query.Types[0].isScalar() &&
604 Query.Types[0] != Query.MMODescrs[0].MemoryTy;
605 })
606 // Maximum: sN * k = 128
607 .clampMaxNumElements(0, s8, 16)
608 .clampMaxNumElements(0, s16, 8)
609 .clampMaxNumElements(0, s32, 4)
610 .clampMaxNumElements(0, s64, 2)
611 .clampMaxNumElements(0, p0, 2)
613 // TODO: Use BITCAST for v2i8, v2i16 after G_TRUNC gets sorted out
614 .bitcastIf(all(typeInSet(0, {v4s8}),
615 LegalityPredicate([=](const LegalityQuery &Query) {
616 return Query.Types[0].getSizeInBits() ==
617 Query.MMODescrs[0].MemoryTy.getSizeInBits();
618 })),
619 [=](const LegalityQuery &Query) {
620 const LLT VecTy = Query.Types[0];
621 return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
622 })
623 .customIf(IsPtrVecPred)
624 .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
625 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
626 .lower();
627
628 getActionDefinitionsBuilder(G_INDEXED_STORE)
629 // Idx 0 == Ptr, Idx 1 == Val
630 // TODO: we can implement legalizations but as of now these are
631 // generated in a very specific way.
633 {p0, s8, s8, 8},
634 {p0, s16, s16, 8},
635 {p0, s32, s8, 8},
636 {p0, s32, s16, 8},
637 {p0, s32, s32, 8},
638 {p0, s64, s64, 8},
639 {p0, p0, p0, 8},
640 {p0, v8s8, v8s8, 8},
641 {p0, v16s8, v16s8, 8},
642 {p0, v4s16, v4s16, 8},
643 {p0, v8s16, v8s16, 8},
644 {p0, v2s32, v2s32, 8},
645 {p0, v4s32, v4s32, 8},
646 {p0, v2s64, v2s64, 8},
647 {p0, v2p0, v2p0, 8},
648 {p0, s128, s128, 8},
649 })
650 .unsupported();
651
652 auto IndexedLoadBasicPred = [=](const LegalityQuery &Query) {
653 LLT LdTy = Query.Types[0];
654 LLT PtrTy = Query.Types[1];
655 if (!llvm::is_contained(PackedVectorAllTypesVec, LdTy) &&
656 !llvm::is_contained(ScalarAndPtrTypesVec, LdTy) && LdTy != s128)
657 return false;
658 if (PtrTy != p0)
659 return false;
660 return true;
661 };
662 getActionDefinitionsBuilder(G_INDEXED_LOAD)
665 .legalIf(IndexedLoadBasicPred)
666 .unsupported();
667 getActionDefinitionsBuilder({G_INDEXED_SEXTLOAD, G_INDEXED_ZEXTLOAD})
668 .unsupportedIf(
670 .legalIf(all(typeInSet(0, {s16, s32, s64}),
671 LegalityPredicate([=](const LegalityQuery &Q) {
672 LLT LdTy = Q.Types[0];
673 LLT PtrTy = Q.Types[1];
674 LLT MemTy = Q.MMODescrs[0].MemoryTy;
675 if (PtrTy != p0)
676 return false;
677 if (LdTy == s16)
678 return MemTy == s8;
679 if (LdTy == s32)
680 return MemTy == s8 || MemTy == s16;
681 if (LdTy == s64)
682 return MemTy == s8 || MemTy == s16 || MemTy == s32;
683 return false;
684 })))
685 .unsupported();
686
687 // Constants
689 .legalFor({p0, s8, s16, s32, s64})
690 .widenScalarToNextPow2(0)
691 .clampScalar(0, s8, s64);
692 getActionDefinitionsBuilder(G_FCONSTANT)
693 // Always legalize s16 to prevent G_FCONSTANT being widened to G_CONSTANT
694 .legalFor({s16, s32, s64, s128})
695 .clampScalar(0, MinFPScalar, s128);
696
697 // FIXME: fix moreElementsToNextPow2
699 .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
701 .clampScalar(1, s32, s64)
702 .clampScalar(0, s32, s32)
705 [=](const LegalityQuery &Query) {
706 const LLT &Ty = Query.Types[0];
707 const LLT &SrcTy = Query.Types[1];
708 return Ty.isVector() && !SrcTy.isPointerVector() &&
709 Ty.getElementType() != SrcTy.getElementType();
710 },
711 0, 1)
712 .minScalarOrEltIf(
713 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
714 1, s32)
715 .minScalarOrEltIf(
716 [=](const LegalityQuery &Query) {
717 return Query.Types[1].isPointerVector();
718 },
719 0, s64)
721 .clampNumElements(1, v8s8, v16s8)
722 .clampNumElements(1, v4s16, v8s16)
723 .clampNumElements(1, v2s32, v4s32)
724 .clampNumElements(1, v2s64, v2s64)
725 .clampNumElements(1, v2p0, v2p0)
726 .customIf(isVector(0));
727
729 .legalFor({{s32, s32},
730 {s32, s64},
731 {v4s32, v4s32},
732 {v2s32, v2s32},
733 {v2s64, v2s64}})
734 .legalFor(HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
736 .clampScalar(0, s32, s32)
737 .minScalarOrElt(1, MinFPScalar)
740 [=](const LegalityQuery &Query) {
741 const LLT &Ty = Query.Types[0];
742 const LLT &SrcTy = Query.Types[1];
743 return Ty.isVector() && !SrcTy.isPointerVector() &&
744 Ty.getElementType() != SrcTy.getElementType();
745 },
746 0, 1)
747 .clampNumElements(1, v4s16, v8s16)
748 .clampNumElements(1, v2s32, v4s32)
749 .clampMaxNumElements(1, s64, 2)
751 .libcallFor({{s32, s128}});
752
753 // Extensions
754 auto ExtLegalFunc = [=](const LegalityQuery &Query) {
755 unsigned DstSize = Query.Types[0].getSizeInBits();
756
757 // Handle legal vectors using legalFor
758 if (Query.Types[0].isVector())
759 return false;
760
761 if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize))
762 return false; // Extending to a scalar s128 needs narrowing.
763
764 const LLT &SrcTy = Query.Types[1];
765
766 // Make sure we fit in a register otherwise. Don't bother checking that
767 // the source type is below 128 bits. We shouldn't be allowing anything
768 // through which is wider than the destination in the first place.
769 unsigned SrcSize = SrcTy.getSizeInBits();
770 if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
771 return false;
772
773 return true;
774 };
775 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
776 .legalIf(ExtLegalFunc)
777 .legalFor({{v8s16, v8s8}, {v4s32, v4s16}, {v2s64, v2s32}})
778 .clampScalar(0, s64, s64) // Just for s128, others are handled above.
780 .clampMaxNumElements(1, s8, 8)
781 .clampMaxNumElements(1, s16, 4)
782 .clampMaxNumElements(1, s32, 2)
783 // Tries to convert a large EXTEND into two smaller EXTENDs
784 .lowerIf([=](const LegalityQuery &Query) {
785 return (Query.Types[0].getScalarSizeInBits() >
786 Query.Types[1].getScalarSizeInBits() * 2) &&
787 Query.Types[0].isVector() &&
788 (Query.Types[1].getScalarSizeInBits() == 8 ||
789 Query.Types[1].getScalarSizeInBits() == 16);
790 })
791 .clampMinNumElements(1, s8, 8)
792 .clampMinNumElements(1, s16, 4)
794
796 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
798 .clampMaxNumElements(0, s8, 8)
799 .clampMaxNumElements(0, s16, 4)
800 .clampMaxNumElements(0, s32, 2)
802 [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
803 0, s8)
804 .lowerIf([=](const LegalityQuery &Query) {
805 LLT DstTy = Query.Types[0];
806 LLT SrcTy = Query.Types[1];
807 return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
808 DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
809 })
810 .clampMinNumElements(0, s8, 8)
811 .clampMinNumElements(0, s16, 4)
812 .alwaysLegal();
813
814 getActionDefinitionsBuilder({G_TRUNC_SSAT_S, G_TRUNC_SSAT_U, G_TRUNC_USAT_U})
815 .legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
816 .clampNumElements(0, v2s32, v2s32);
817
818 getActionDefinitionsBuilder(G_SEXT_INREG)
819 .legalFor({s32, s64})
820 .legalFor(PackedVectorAllTypeList)
821 .maxScalar(0, s64)
822 .clampNumElements(0, v8s8, v16s8)
823 .clampNumElements(0, v4s16, v8s16)
824 .clampNumElements(0, v2s32, v4s32)
825 .clampMaxNumElements(0, s64, 2)
826 .lower();
827
828 // FP conversions
830 .legalFor(
831 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
832 .libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
834 .customIf([](const LegalityQuery &Q) {
835 LLT DstTy = Q.Types[0];
836 LLT SrcTy = Q.Types[1];
837 return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
838 SrcTy.getScalarSizeInBits() == 64 &&
839 DstTy.getScalarSizeInBits() == 16;
840 })
841 // Clamp based on input
842 .clampNumElements(1, v4s32, v4s32)
843 .clampNumElements(1, v2s64, v2s64)
844 .scalarize(0);
845
847 .legalFor(
848 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
849 .libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
852 [](const LegalityQuery &Q) {
853 LLT DstTy = Q.Types[0];
854 LLT SrcTy = Q.Types[1];
855 return SrcTy.isVector() && DstTy.isVector() &&
856 SrcTy.getScalarSizeInBits() == 16 &&
857 DstTy.getScalarSizeInBits() == 64;
858 },
859 changeElementTo(1, s32))
860 .clampNumElements(0, v4s32, v4s32)
861 .clampNumElements(0, v2s64, v2s64)
862 .scalarize(0);
863
864 // Conversions
865 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
866 .legalFor({{s32, s32},
867 {s64, s32},
868 {s32, s64},
869 {s64, s64},
870 {v2s32, v2s32},
871 {v4s32, v4s32},
872 {v2s64, v2s64}})
873 .legalFor(HasFP16,
874 {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
875 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
877 // The range of a fp16 value fits into an i17, so we can lower the width
878 // to i64.
880 [=](const LegalityQuery &Query) {
881 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
882 },
883 changeTo(0, s64))
886 .minScalar(0, s32)
887 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
889 [=](const LegalityQuery &Query) {
890 return Query.Types[0].getScalarSizeInBits() <= 64 &&
891 Query.Types[0].getScalarSizeInBits() >
892 Query.Types[1].getScalarSizeInBits();
893 },
895 .widenScalarIf(
896 [=](const LegalityQuery &Query) {
897 return Query.Types[1].getScalarSizeInBits() <= 64 &&
898 Query.Types[0].getScalarSizeInBits() <
899 Query.Types[1].getScalarSizeInBits();
900 },
902 .clampNumElements(0, v4s16, v8s16)
903 .clampNumElements(0, v2s32, v4s32)
904 .clampMaxNumElements(0, s64, 2)
905 .libcallFor(
906 {{s32, s128}, {s64, s128}, {s128, s128}, {s128, s32}, {s128, s64}});
907
908 getActionDefinitionsBuilder({G_FPTOSI_SAT, G_FPTOUI_SAT})
909 .legalFor({{s32, s32},
910 {s64, s32},
911 {s32, s64},
912 {s64, s64},
913 {v2s32, v2s32},
914 {v4s32, v4s32},
915 {v2s64, v2s64}})
916 .legalFor(
917 HasFP16,
918 {{s16, s16}, {s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
919 // Handle types larger than i64 by scalarizing/lowering.
920 .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
922 // The range of a fp16 value fits into an i17, so we can lower the width
923 // to i64.
925 [=](const LegalityQuery &Query) {
926 return Query.Types[1] == s16 && Query.Types[0].getSizeInBits() > 64;
927 },
928 changeTo(0, s64))
929 .lowerIf(::any(scalarWiderThan(0, 64), scalarWiderThan(1, 64)), 0)
931 .widenScalarToNextPow2(0, /*MinSize=*/32)
932 .minScalar(0, s32)
933 .widenScalarOrEltToNextPow2OrMinSize(1, /*MinSize=*/HasFP16 ? 16 : 32)
935 [=](const LegalityQuery &Query) {
936 unsigned ITySize = Query.Types[0].getScalarSizeInBits();
937 return (ITySize == 16 || ITySize == 32 || ITySize == 64) &&
938 ITySize > Query.Types[1].getScalarSizeInBits();
939 },
941 .widenScalarIf(
942 [=](const LegalityQuery &Query) {
943 unsigned FTySize = Query.Types[1].getScalarSizeInBits();
944 return (FTySize == 16 || FTySize == 32 || FTySize == 64) &&
945 Query.Types[0].getScalarSizeInBits() < FTySize;
946 },
949 .clampNumElements(0, v4s16, v8s16)
950 .clampNumElements(0, v2s32, v4s32)
951 .clampMaxNumElements(0, s64, 2);
952
953 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
954 .legalFor({{s32, s32},
955 {s64, s32},
956 {s32, s64},
957 {s64, s64},
958 {v2s32, v2s32},
959 {v4s32, v4s32},
960 {v2s64, v2s64}})
961 .legalFor(HasFP16,
962 {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
963 .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
967 .minScalar(1, s32)
968 .lowerIf([](const LegalityQuery &Query) {
969 return Query.Types[1].isVector() &&
970 Query.Types[1].getScalarSizeInBits() == 64 &&
971 Query.Types[0].getScalarSizeInBits() == 16;
972 })
973 .widenScalarOrEltToNextPow2OrMinSize(0, /*MinSize=*/HasFP16 ? 16 : 32)
975 // v2i64->v2f32 needs to scalarize to avoid double-rounding issues.
976 [](const LegalityQuery &Query) {
977 return Query.Types[0].getScalarSizeInBits() == 32 &&
978 Query.Types[1].getScalarSizeInBits() == 64;
979 },
980 0)
981 .widenScalarIf(
982 [](const LegalityQuery &Query) {
983 return Query.Types[1].getScalarSizeInBits() <= 64 &&
984 Query.Types[0].getScalarSizeInBits() <
985 Query.Types[1].getScalarSizeInBits();
986 },
988 .widenScalarIf(
989 [](const LegalityQuery &Query) {
990 return Query.Types[0].getScalarSizeInBits() <= 64 &&
991 Query.Types[0].getScalarSizeInBits() >
992 Query.Types[1].getScalarSizeInBits();
993 },
995 .clampNumElements(0, v4s16, v8s16)
996 .clampNumElements(0, v2s32, v4s32)
997 .clampMaxNumElements(0, s64, 2)
998 .libcallFor({{s16, s128},
999 {s32, s128},
1000 {s64, s128},
1001 {s128, s128},
1002 {s128, s32},
1003 {s128, s64}});
1004
1005 // Control-flow
1008 .legalFor({s32})
1009 .clampScalar(0, s32, s32);
1010 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
1011
1013 .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
1014 .widenScalarToNextPow2(0)
1015 .clampScalar(0, s32, s64)
1016 .clampScalar(1, s32, s32)
1019 .lowerIf(isVector(0));
1020
1021 // Pointer-handling
1022 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
1023
1024 if (TM.getCodeModel() == CodeModel::Small)
1025 getActionDefinitionsBuilder(G_GLOBAL_VALUE).custom();
1026 else
1027 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
1028
1029 getActionDefinitionsBuilder(G_PTRAUTH_GLOBAL_VALUE)
1030 .legalIf(all(typeIs(0, p0), typeIs(1, p0)));
1031
1032 getActionDefinitionsBuilder(G_PTRTOINT)
1033 .legalFor({{s64, p0}, {v2s64, v2p0}})
1034 .widenScalarToNextPow2(0, 64)
1035 .clampScalar(0, s64, s64)
1036 .clampMaxNumElements(0, s64, 2);
1037
1038 getActionDefinitionsBuilder(G_INTTOPTR)
1039 .unsupportedIf([&](const LegalityQuery &Query) {
1040 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
1041 })
1042 .legalFor({{p0, s64}, {v2p0, v2s64}})
1043 .clampMaxNumElements(1, s64, 2);
1044
1045 // Casts for 32 and 64-bit width type are just copies.
1046 // Same for 128-bit width type, except they are on the FPR bank.
1048 // Keeping 32-bit instructions legal to prevent regression in some tests
1049 .legalForCartesianProduct({s32, v2s16, v4s8})
1050 .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
1051 .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
1052 .customIf([=](const LegalityQuery &Query) {
1053 // Handle casts from i1 vectors to scalars.
1054 LLT DstTy = Query.Types[0];
1055 LLT SrcTy = Query.Types[1];
1056 return DstTy.isScalar() && SrcTy.isVector() &&
1057 SrcTy.getScalarSizeInBits() == 1;
1058 })
1059 .lowerIf([=](const LegalityQuery &Query) {
1060 return Query.Types[0].isVector() != Query.Types[1].isVector();
1061 })
1063 .clampNumElements(0, v8s8, v16s8)
1064 .clampNumElements(0, v4s16, v8s16)
1065 .clampNumElements(0, v2s32, v4s32)
1066 .clampMaxNumElements(0, s64, 2)
1067 .lower();
1068
1069 getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
1070
1071 // va_list must be a pointer, but most sized types are pretty easy to handle
1072 // as the destination.
1074 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0})
1075 .clampScalar(0, s8, s64)
1076 .widenScalarToNextPow2(0, /*Min*/ 8);
1077
1078 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
1079 .lowerIf(
1080 all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
1081
1082 bool UseOutlineAtomics = ST.outlineAtomics() && !ST.hasLSE();
1083
1084 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
1085 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1086 .customFor(!UseOutlineAtomics, {{s128, p0}})
1087 .libcallFor(UseOutlineAtomics,
1088 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}, {s128, p0}})
1089 .clampScalar(0, s32, s64);
1090
1091 getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
1092 G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR,
1093 G_ATOMICRMW_XOR})
1094 .legalFor(!UseOutlineAtomics, {{s32, p0}, {s64, p0}})
1095 .libcallFor(UseOutlineAtomics,
1096 {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
1097 .clampScalar(0, s32, s64);
1098
1099 // Do not outline these atomics operations, as per comment in
1100 // AArch64ISelLowering.cpp's shouldExpandAtomicRMWInIR().
1102 {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
1103 .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)))
1104 .clampScalar(0, s32, s64);
1105
1106 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
1107
1108 // Merge/Unmerge
1109 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
1110 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
1111 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
1113 .widenScalarToNextPow2(LitTyIdx, 8)
1114 .widenScalarToNextPow2(BigTyIdx, 32)
1115 .clampScalar(LitTyIdx, s8, s64)
1116 .clampScalar(BigTyIdx, s32, s128)
1117 .legalIf([=](const LegalityQuery &Q) {
1118 switch (Q.Types[BigTyIdx].getSizeInBits()) {
1119 case 32:
1120 case 64:
1121 case 128:
1122 break;
1123 default:
1124 return false;
1125 }
1126 switch (Q.Types[LitTyIdx].getSizeInBits()) {
1127 case 8:
1128 case 16:
1129 case 32:
1130 case 64:
1131 return true;
1132 default:
1133 return false;
1134 }
1135 });
1136 }
1137
1138 // TODO : nxv4s16, nxv2s16, nxv2s32
1139 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
1140 .legalFor(HasSVE, {{s16, nxv16s8, s64},
1141 {s16, nxv8s16, s64},
1142 {s32, nxv4s32, s64},
1143 {s64, nxv2s64, s64}})
1144 .unsupportedIf([=](const LegalityQuery &Query) {
1145 const LLT &EltTy = Query.Types[1].getElementType();
1146 if (Query.Types[1].isScalableVector())
1147 return false;
1148 return Query.Types[0] != EltTy;
1149 })
1150 .minScalar(2, s64)
1151 .customIf([=](const LegalityQuery &Query) {
1152 const LLT &VecTy = Query.Types[1];
1153 return VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s16 ||
1154 VecTy == v4s16 || VecTy == v8s16 || VecTy == v2s32 ||
1155 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2p0;
1156 })
1157 .minScalarOrEltIf(
1158 [=](const LegalityQuery &Query) {
1159 // We want to promote to <M x s1> to <M x s64> if that wouldn't
1160 // cause the total vec size to be > 128b.
1161 return Query.Types[1].isFixedVector() &&
1162 Query.Types[1].getNumElements() <= 2;
1163 },
1164 0, s64)
1165 .minScalarOrEltIf(
1166 [=](const LegalityQuery &Query) {
1167 return Query.Types[1].isFixedVector() &&
1168 Query.Types[1].getNumElements() <= 4;
1169 },
1170 0, s32)
1171 .minScalarOrEltIf(
1172 [=](const LegalityQuery &Query) {
1173 return Query.Types[1].isFixedVector() &&
1174 Query.Types[1].getNumElements() <= 8;
1175 },
1176 0, s16)
1177 .minScalarOrEltIf(
1178 [=](const LegalityQuery &Query) {
1179 return Query.Types[1].isFixedVector() &&
1180 Query.Types[1].getNumElements() <= 16;
1181 },
1182 0, s8)
1183 .minScalarOrElt(0, s8) // Worst case, we need at least s8.
1185 .clampMaxNumElements(1, s64, 2)
1186 .clampMaxNumElements(1, s32, 4)
1187 .clampMaxNumElements(1, s16, 8)
1188 .clampMaxNumElements(1, s8, 16)
1189 .clampMaxNumElements(1, p0, 2)
1191
1192 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
1193 .legalIf(
1194 typeInSet(0, {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64, v2p0}))
1195 .legalFor(HasSVE, {{nxv16s8, s32, s64},
1196 {nxv8s16, s32, s64},
1197 {nxv4s32, s32, s64},
1198 {nxv2s64, s64, s64}})
1201 .clampNumElements(0, v8s8, v16s8)
1202 .clampNumElements(0, v4s16, v8s16)
1203 .clampNumElements(0, v2s32, v4s32)
1204 .clampMaxNumElements(0, s64, 2)
1205 .clampMaxNumElements(0, p0, 2)
1207
1208 getActionDefinitionsBuilder(G_BUILD_VECTOR)
1209 .legalFor({{v8s8, s8},
1210 {v16s8, s8},
1211 {v4s16, s16},
1212 {v8s16, s16},
1213 {v2s32, s32},
1214 {v4s32, s32},
1215 {v2s64, s64},
1216 {v2p0, p0}})
1217 .clampNumElements(0, v4s32, v4s32)
1218 .clampNumElements(0, v2s64, v2s64)
1219 .minScalarOrElt(0, s8)
1222 .minScalarSameAs(1, 0);
1223
1224 getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
1225
1226 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
1227 .legalIf([=](const LegalityQuery &Query) {
1228 const LLT &DstTy = Query.Types[0];
1229 const LLT &SrcTy = Query.Types[1];
1230 // For now just support the TBL2 variant which needs the source vectors
1231 // to be the same size as the dest.
1232 if (DstTy != SrcTy)
1233 return false;
1234 return llvm::is_contained(
1235 {v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}, DstTy);
1236 })
1237 .moreElementsIf(
1238 [](const LegalityQuery &Query) {
1239 return Query.Types[0].getNumElements() >
1240 Query.Types[1].getNumElements();
1241 },
1242 changeTo(1, 0))
1245 [](const LegalityQuery &Query) {
1246 return Query.Types[0].getNumElements() <
1247 Query.Types[1].getNumElements();
1248 },
1249 changeTo(0, 1))
1250 .widenScalarOrEltToNextPow2OrMinSize(0, 8)
1251 .clampNumElements(0, v8s8, v16s8)
1252 .clampNumElements(0, v4s16, v8s16)
1253 .clampNumElements(0, v4s32, v4s32)
1254 .clampNumElements(0, v2s64, v2s64)
1256 .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1257 // Bitcast pointers vector to i64.
1258 const LLT DstTy = Query.Types[0];
1259 return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1260 });
1261
1262 getActionDefinitionsBuilder(G_CONCAT_VECTORS)
1263 .legalFor({{v16s8, v8s8}, {v8s16, v4s16}, {v4s32, v2s32}})
1264 .bitcastIf(
1265 [=](const LegalityQuery &Query) {
1266 return Query.Types[0].isFixedVector() &&
1267 Query.Types[1].isFixedVector() &&
1268 Query.Types[0].getScalarSizeInBits() >= 8 &&
1269 isPowerOf2_64(Query.Types[0].getScalarSizeInBits()) &&
1270 Query.Types[0].getSizeInBits() <= 128 &&
1271 Query.Types[1].getSizeInBits() <= 64;
1272 },
1273 [=](const LegalityQuery &Query) {
1274 const LLT DstTy = Query.Types[0];
1275 const LLT SrcTy = Query.Types[1];
1276 return std::pair(
1277 0, DstTy.changeElementSize(SrcTy.getSizeInBits())
1280 SrcTy.getNumElements())));
1281 });
1282
1283 getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
1284 .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1286 .immIdx(0); // Inform verifier imm idx 0 is handled.
1287
1288 // TODO: {nxv16s8, s8}, {nxv8s16, s16}
1289 getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1290 .legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1291
1292 getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({p0});
1293
1294 getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, s64}});
1295
1296 getActionDefinitionsBuilder({G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
1297
1298 getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
1299
1300 getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
1301
1302 if (ST.hasMOPS()) {
1303 // G_BZERO is not supported. Currently it is only emitted by
1304 // PreLegalizerCombiner for G_MEMSET with zero constant.
1306
1308 .legalForCartesianProduct({p0}, {s64}, {s64})
1309 .customForCartesianProduct({p0}, {s8}, {s64})
1310 .immIdx(0); // Inform verifier imm idx 0 is handled.
1311
1312 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE})
1313 .legalForCartesianProduct({p0}, {p0}, {s64})
1314 .immIdx(0); // Inform verifier imm idx 0 is handled.
1315
1316 // G_MEMCPY_INLINE does not have a tailcall immediate
1317 getActionDefinitionsBuilder(G_MEMCPY_INLINE)
1318 .legalForCartesianProduct({p0}, {p0}, {s64});
1319
1320 } else {
1321 getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET})
1322 .libcall();
1323 }
1324
1325 // For fadd reductions we have pairwise operations available. We treat the
1326 // usual legal types as legal and handle the lowering to pairwise instructions
1327 // later.
1328 getActionDefinitionsBuilder(G_VECREDUCE_FADD)
1329 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1330 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1331 .minScalarOrElt(0, MinFPScalar)
1332 .clampMaxNumElements(1, s64, 2)
1333 .clampMaxNumElements(1, s32, 4)
1334 .clampMaxNumElements(1, s16, 8)
1336 .scalarize(1)
1337 .lower();
1338
1339 // For fmul reductions we need to split up into individual operations. We
1340 // clamp to 128 bit vectors then to 64bit vectors to produce a cascade of
1341 // smaller types, followed by scalarizing what remains.
1342 getActionDefinitionsBuilder(G_VECREDUCE_FMUL)
1343 .minScalarOrElt(0, MinFPScalar)
1344 .clampMaxNumElements(1, s64, 2)
1345 .clampMaxNumElements(1, s32, 4)
1346 .clampMaxNumElements(1, s16, 8)
1347 .clampMaxNumElements(1, s32, 2)
1348 .clampMaxNumElements(1, s16, 4)
1349 .scalarize(1)
1350 .lower();
1351
1352 getActionDefinitionsBuilder({G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL})
1353 .scalarize(2)
1354 .lower();
1355
1356 getActionDefinitionsBuilder(G_VECREDUCE_ADD)
1357 .legalFor({{s8, v8s8},
1358 {s8, v16s8},
1359 {s16, v4s16},
1360 {s16, v8s16},
1361 {s32, v2s32},
1362 {s32, v4s32},
1363 {s64, v2s64}})
1365 .clampMaxNumElements(1, s64, 2)
1366 .clampMaxNumElements(1, s32, 4)
1367 .clampMaxNumElements(1, s16, 8)
1368 .clampMaxNumElements(1, s8, 16)
1370 .scalarize(1);
1371
1372 getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1373 G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1374 .legalFor({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1375 .legalFor(HasFP16, {{s16, v4s16}, {s16, v8s16}})
1376 .minScalarOrElt(0, MinFPScalar)
1377 .clampMaxNumElements(1, s64, 2)
1378 .clampMaxNumElements(1, s32, 4)
1379 .clampMaxNumElements(1, s16, 8)
1380 .scalarize(1)
1381 .lower();
1382
1383 getActionDefinitionsBuilder(G_VECREDUCE_MUL)
1384 .clampMaxNumElements(1, s32, 2)
1385 .clampMaxNumElements(1, s16, 4)
1386 .clampMaxNumElements(1, s8, 8)
1387 .scalarize(1)
1388 .lower();
1389
1391 {G_VECREDUCE_SMIN, G_VECREDUCE_SMAX, G_VECREDUCE_UMIN, G_VECREDUCE_UMAX})
1392 .legalFor({{s8, v8s8},
1393 {s8, v16s8},
1394 {s16, v4s16},
1395 {s16, v8s16},
1396 {s32, v2s32},
1397 {s32, v4s32}})
1398 .moreElementsIf(
1399 [=](const LegalityQuery &Query) {
1400 return Query.Types[1].isVector() &&
1401 Query.Types[1].getElementType() != s8 &&
1402 Query.Types[1].getNumElements() & 1;
1403 },
1405 .clampMaxNumElements(1, s64, 2)
1406 .clampMaxNumElements(1, s32, 4)
1407 .clampMaxNumElements(1, s16, 8)
1408 .clampMaxNumElements(1, s8, 16)
1409 .scalarize(1)
1410 .lower();
1411
1413 {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
1414 // Try to break down into smaller vectors as long as they're at least 64
1415 // bits. This lets us use vector operations for some parts of the
1416 // reduction.
1417 .fewerElementsIf(
1418 [=](const LegalityQuery &Q) {
1419 LLT SrcTy = Q.Types[1];
1420 if (SrcTy.isScalar())
1421 return false;
1422 if (!isPowerOf2_32(SrcTy.getNumElements()))
1423 return false;
1424 // We can usually perform 64b vector operations.
1425 return SrcTy.getSizeInBits() > 64;
1426 },
1427 [=](const LegalityQuery &Q) {
1428 LLT SrcTy = Q.Types[1];
1429 return std::make_pair(1, SrcTy.divide(2));
1430 })
1431 .scalarize(1)
1432 .lower();
1433
1434 // TODO: Update this to correct handling when adding AArch64/SVE support.
1435 getActionDefinitionsBuilder(G_VECTOR_COMPRESS).lower();
1436
1437 // Access to floating-point environment.
1438 getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
1439 G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
1440 .libcall();
1441
1442 getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
1443
1444 getActionDefinitionsBuilder(G_PREFETCH).custom();
1445
1446 getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
1447
1449 verify(*ST.getInstrInfo());
1450}
1451
1454 LostDebugLocObserver &LocObserver) const {
1455 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1456 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1457 GISelChangeObserver &Observer = Helper.Observer;
1458 switch (MI.getOpcode()) {
1459 default:
1460 // No idea what to do.
1461 return false;
1462 case TargetOpcode::G_VAARG:
1463 return legalizeVaArg(MI, MRI, MIRBuilder);
1464 case TargetOpcode::G_LOAD:
1465 case TargetOpcode::G_STORE:
1466 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
1467 case TargetOpcode::G_SHL:
1468 case TargetOpcode::G_ASHR:
1469 case TargetOpcode::G_LSHR:
1470 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
1471 case TargetOpcode::G_GLOBAL_VALUE:
1472 return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
1473 case TargetOpcode::G_SBFX:
1474 case TargetOpcode::G_UBFX:
1475 return legalizeBitfieldExtract(MI, MRI, Helper);
1476 case TargetOpcode::G_FSHL:
1477 case TargetOpcode::G_FSHR:
1478 return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1479 case TargetOpcode::G_ROTR:
1480 return legalizeRotate(MI, MRI, Helper);
1481 case TargetOpcode::G_CTPOP:
1482 return legalizeCTPOP(MI, MRI, Helper);
1483 case TargetOpcode::G_ATOMIC_CMPXCHG:
1484 return legalizeAtomicCmpxchg128(MI, MRI, Helper);
1485 case TargetOpcode::G_CTTZ:
1486 return legalizeCTTZ(MI, Helper);
1487 case TargetOpcode::G_BZERO:
1488 case TargetOpcode::G_MEMCPY:
1489 case TargetOpcode::G_MEMMOVE:
1490 case TargetOpcode::G_MEMSET:
1491 return legalizeMemOps(MI, Helper);
1492 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1493 return legalizeExtractVectorElt(MI, MRI, Helper);
1494 case TargetOpcode::G_DYN_STACKALLOC:
1495 return legalizeDynStackAlloc(MI, Helper);
1496 case TargetOpcode::G_PREFETCH:
1497 return legalizePrefetch(MI, Helper);
1498 case TargetOpcode::G_ABS:
1499 return Helper.lowerAbsToCNeg(MI);
1500 case TargetOpcode::G_ICMP:
1501 return legalizeICMP(MI, MRI, MIRBuilder);
1502 case TargetOpcode::G_BITCAST:
1503 return legalizeBitcast(MI, Helper);
1504 case TargetOpcode::G_FPTRUNC:
1505 // In order to lower f16 to f64 properly, we need to use f32 as an
1506 // intermediary
1507 return legalizeFptrunc(MI, MIRBuilder, MRI);
1508 }
1509
1510 llvm_unreachable("expected switch to return");
1511}
1512
1513bool AArch64LegalizerInfo::legalizeBitcast(MachineInstr &MI,
1514 LegalizerHelper &Helper) const {
1515 assert(MI.getOpcode() == TargetOpcode::G_BITCAST && "Unexpected opcode");
1516 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1517 // We're trying to handle casts from i1 vectors to scalars but reloading from
1518 // stack.
1519 if (!DstTy.isScalar() || !SrcTy.isVector() ||
1520 SrcTy.getElementType() != LLT::scalar(1))
1521 return false;
1522
1523 Helper.createStackStoreLoad(DstReg, SrcReg);
1524 MI.eraseFromParent();
1525 return true;
1526}
1527
1528bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
1530 MachineIRBuilder &MIRBuilder,
1531 GISelChangeObserver &Observer,
1532 LegalizerHelper &Helper) const {
1533 assert(MI.getOpcode() == TargetOpcode::G_FSHL ||
1534 MI.getOpcode() == TargetOpcode::G_FSHR);
1535
1536 // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic
1537 // lowering
1538 Register ShiftNo = MI.getOperand(3).getReg();
1539 LLT ShiftTy = MRI.getType(ShiftNo);
1540 auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI);
1541
1542 // Adjust shift amount according to Opcode (FSHL/FSHR)
1543 // Convert FSHL to FSHR
1544 LLT OperationTy = MRI.getType(MI.getOperand(0).getReg());
1545 APInt BitWidth(ShiftTy.getSizeInBits(), OperationTy.getSizeInBits(), false);
1546
1547 // Lower non-constant shifts and leave zero shifts to the optimizer.
1548 if (!VRegAndVal || VRegAndVal->Value.urem(BitWidth) == 0)
1549 return (Helper.lowerFunnelShiftAsShifts(MI) ==
1551
1552 APInt Amount = VRegAndVal->Value.urem(BitWidth);
1553
1554 Amount = MI.getOpcode() == TargetOpcode::G_FSHL ? BitWidth - Amount : Amount;
1555
1556 // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount
1557 // in the range of 0 <-> BitWidth, it is legal
1558 if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR &&
1559 VRegAndVal->Value.ult(BitWidth))
1560 return true;
1561
1562 // Cast the ShiftNumber to a 64-bit type
1563 auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount.zext(64));
1564
1565 if (MI.getOpcode() == TargetOpcode::G_FSHR) {
1566 Observer.changingInstr(MI);
1567 MI.getOperand(3).setReg(Cast64.getReg(0));
1568 Observer.changedInstr(MI);
1569 }
1570 // If Opcode is FSHL, remove the FSHL instruction and create a FSHR
1571 // instruction
1572 else if (MI.getOpcode() == TargetOpcode::G_FSHL) {
1573 MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()},
1574 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(),
1575 Cast64.getReg(0)});
1576 MI.eraseFromParent();
1577 }
1578 return true;
1579}
1580
1581bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1583 MachineIRBuilder &MIRBuilder) const {
1584 Register DstReg = MI.getOperand(0).getReg();
1585 Register SrcReg1 = MI.getOperand(2).getReg();
1586 Register SrcReg2 = MI.getOperand(3).getReg();
1587 LLT DstTy = MRI.getType(DstReg);
1588 LLT SrcTy = MRI.getType(SrcReg1);
1589
1590 // Check the vector types are legal
1591 if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1592 DstTy.getNumElements() != SrcTy.getNumElements() ||
1593 (DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1594 return false;
1595
1596 // Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1597 // following passes
1598 CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1599 if (Pred != CmpInst::ICMP_NE)
1600 return true;
1601 Register CmpReg =
1602 MIRBuilder
1603 .buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1604 .getReg(0);
1605 MIRBuilder.buildNot(DstReg, CmpReg);
1606
1607 MI.eraseFromParent();
1608 return true;
1609}
1610
1611bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
1613 LegalizerHelper &Helper) const {
1614 // To allow for imported patterns to match, we ensure that the rotate amount
1615 // is 64b with an extension.
1616 Register AmtReg = MI.getOperand(2).getReg();
1617 LLT AmtTy = MRI.getType(AmtReg);
1618 (void)AmtTy;
1619 assert(AmtTy.isScalar() && "Expected a scalar rotate");
1620 assert(AmtTy.getSizeInBits() < 64 && "Expected this rotate to be legal");
1621 auto NewAmt = Helper.MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
1622 Helper.Observer.changingInstr(MI);
1623 MI.getOperand(2).setReg(NewAmt.getReg(0));
1624 Helper.Observer.changedInstr(MI);
1625 return true;
1626}
1627
1628bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
1630 GISelChangeObserver &Observer) const {
1631 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1632 // We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1633 // G_ADD_LOW instructions.
1634 // By splitting this here, we can optimize accesses in the small code model by
1635 // folding in the G_ADD_LOW into the load/store offset.
1636 auto &GlobalOp = MI.getOperand(1);
1637 // Don't modify an intrinsic call.
1638 if (GlobalOp.isSymbol())
1639 return true;
1640 const auto* GV = GlobalOp.getGlobal();
1641 if (GV->isThreadLocal())
1642 return true; // Don't want to modify TLS vars.
1643
1644 auto &TM = ST->getTargetLowering()->getTargetMachine();
1645 unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1646
1647 if (OpFlags & AArch64II::MO_GOT)
1648 return true;
1649
1650 auto Offset = GlobalOp.getOffset();
1651 Register DstReg = MI.getOperand(0).getReg();
1652 auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1653 .addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
1654 // Set the regclass on the dest reg too.
1655 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1656
1657 // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so
1658 // by creating a MOVK that sets bits 48-63 of the register to (global address
1659 // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to
1660 // prevent an incorrect tag being generated during relocation when the
1661 // global appears before the code section. Without the offset, a global at
1662 // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced
1663 // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 =
1664 // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe`
1665 // instead of `0xf`.
1666 // This assumes that we're in the small code model so we can assume a binary
1667 // size of <= 4GB, which makes the untagged PC relative offset positive. The
1668 // binary must also be loaded into address range [0, 2^48). Both of these
1669 // properties need to be ensured at runtime when using tagged addresses.
1670 if (OpFlags & AArch64II::MO_TAGGED) {
1671 assert(!Offset &&
1672 "Should not have folded in an offset for a tagged global!");
1673 ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
1674 .addGlobalAddress(GV, 0x100000000,
1676 .addImm(48);
1677 MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1678 }
1679
1680 MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1681 .addGlobalAddress(GV, Offset,
1683 MI.eraseFromParent();
1684 return true;
1685}
1686
1688 MachineInstr &MI) const {
1689 MachineIRBuilder &MIB = Helper.MIRBuilder;
1690 MachineRegisterInfo &MRI = *MIB.getMRI();
1691
1692 auto LowerUnaryOp = [&MI, &MIB](unsigned Opcode) {
1693 MIB.buildInstr(Opcode, {MI.getOperand(0)}, {MI.getOperand(2)});
1694 MI.eraseFromParent();
1695 return true;
1696 };
1697 auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
1698 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1699 {MI.getOperand(2), MI.getOperand(3)});
1700 MI.eraseFromParent();
1701 return true;
1702 };
1703 auto LowerTriOp = [&MI, &MIB](unsigned Opcode) {
1704 MIB.buildInstr(Opcode, {MI.getOperand(0)},
1705 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4)});
1706 MI.eraseFromParent();
1707 return true;
1708 };
1709
1710 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1711 switch (IntrinsicID) {
1712 case Intrinsic::vacopy: {
1713 unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
1714 unsigned VaListSize =
1715 (ST->isTargetDarwin() || ST->isTargetWindows())
1716 ? PtrSize
1717 : ST->isTargetILP32() ? 20 : 32;
1718
1719 MachineFunction &MF = *MI.getMF();
1721 LLT::scalar(VaListSize * 8));
1722 MIB.buildLoad(Val, MI.getOperand(2),
1725 VaListSize, Align(PtrSize)));
1726 MIB.buildStore(Val, MI.getOperand(1),
1729 VaListSize, Align(PtrSize)));
1730 MI.eraseFromParent();
1731 return true;
1732 }
1733 case Intrinsic::get_dynamic_area_offset: {
1734 MIB.buildConstant(MI.getOperand(0).getReg(), 0);
1735 MI.eraseFromParent();
1736 return true;
1737 }
1738 case Intrinsic::aarch64_mops_memset_tag: {
1739 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1740 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
1741 // the instruction).
1742 auto &Value = MI.getOperand(3);
1743 Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
1744 Value.setReg(ExtValueReg);
1745 return true;
1746 }
1747 case Intrinsic::aarch64_prefetch: {
1748 auto &AddrVal = MI.getOperand(1);
1749
1750 int64_t IsWrite = MI.getOperand(2).getImm();
1751 int64_t Target = MI.getOperand(3).getImm();
1752 int64_t IsStream = MI.getOperand(4).getImm();
1753 int64_t IsData = MI.getOperand(5).getImm();
1754
1755 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1756 (!IsData << 3) | // IsDataCache bit
1757 (Target << 1) | // Cache level bits
1758 (unsigned)IsStream; // Stream bit
1759
1760 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
1761 MI.eraseFromParent();
1762 return true;
1763 }
1764 case Intrinsic::aarch64_range_prefetch: {
1765 auto &AddrVal = MI.getOperand(1);
1766
1767 int64_t IsWrite = MI.getOperand(2).getImm();
1768 int64_t IsStream = MI.getOperand(3).getImm();
1769 unsigned PrfOp = (IsStream << 2) | IsWrite;
1770
1771 MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
1772 .addImm(PrfOp)
1773 .add(AddrVal)
1774 .addUse(MI.getOperand(4).getReg()); // Metadata
1775 MI.eraseFromParent();
1776 return true;
1777 }
1778 case Intrinsic::aarch64_prefetch_ir: {
1779 auto &AddrVal = MI.getOperand(1);
1780 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(24).add(AddrVal);
1781 MI.eraseFromParent();
1782 return true;
1783 }
1784 case Intrinsic::aarch64_neon_uaddv:
1785 case Intrinsic::aarch64_neon_saddv:
1786 case Intrinsic::aarch64_neon_umaxv:
1787 case Intrinsic::aarch64_neon_smaxv:
1788 case Intrinsic::aarch64_neon_uminv:
1789 case Intrinsic::aarch64_neon_sminv: {
1790 bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1791 IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1792 IntrinsicID == Intrinsic::aarch64_neon_sminv;
1793
1794 auto OldDst = MI.getOperand(0).getReg();
1795 auto OldDstTy = MRI.getType(OldDst);
1796 LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1797 if (OldDstTy == NewDstTy)
1798 return true;
1799
1800 auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1801
1802 Helper.Observer.changingInstr(MI);
1803 MI.getOperand(0).setReg(NewDst);
1804 Helper.Observer.changedInstr(MI);
1805
1806 MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1807 MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1808 OldDst, NewDst);
1809
1810 return true;
1811 }
1812 case Intrinsic::aarch64_neon_uaddlp:
1813 case Intrinsic::aarch64_neon_saddlp: {
1814 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1815 ? AArch64::G_UADDLP
1816 : AArch64::G_SADDLP;
1817 MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1818 MI.eraseFromParent();
1819
1820 return true;
1821 }
1822 case Intrinsic::aarch64_neon_uaddlv:
1823 case Intrinsic::aarch64_neon_saddlv: {
1824 unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1825 ? AArch64::G_UADDLV
1826 : AArch64::G_SADDLV;
1827 Register DstReg = MI.getOperand(0).getReg();
1828 Register SrcReg = MI.getOperand(2).getReg();
1829 LLT DstTy = MRI.getType(DstReg);
1830
1831 LLT MidTy, ExtTy;
1832 if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1833 MidTy = LLT::fixed_vector(4, 32);
1834 ExtTy = LLT::scalar(32);
1835 } else {
1836 MidTy = LLT::fixed_vector(2, 64);
1837 ExtTy = LLT::scalar(64);
1838 }
1839
1840 Register MidReg =
1841 MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1842 Register ZeroReg =
1843 MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1844 Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1845 {MidReg, ZeroReg})
1846 .getReg(0);
1847
1848 if (DstTy.getScalarSizeInBits() < 32)
1849 MIB.buildTrunc(DstReg, ExtReg);
1850 else
1851 MIB.buildCopy(DstReg, ExtReg);
1852
1853 MI.eraseFromParent();
1854
1855 return true;
1856 }
1857 case Intrinsic::aarch64_neon_smax:
1858 return LowerBinOp(TargetOpcode::G_SMAX);
1859 case Intrinsic::aarch64_neon_smin:
1860 return LowerBinOp(TargetOpcode::G_SMIN);
1861 case Intrinsic::aarch64_neon_umax:
1862 return LowerBinOp(TargetOpcode::G_UMAX);
1863 case Intrinsic::aarch64_neon_umin:
1864 return LowerBinOp(TargetOpcode::G_UMIN);
1865 case Intrinsic::aarch64_neon_fmax:
1866 return LowerBinOp(TargetOpcode::G_FMAXIMUM);
1867 case Intrinsic::aarch64_neon_fmin:
1868 return LowerBinOp(TargetOpcode::G_FMINIMUM);
1869 case Intrinsic::aarch64_neon_fmaxnm:
1870 return LowerBinOp(TargetOpcode::G_FMAXNUM);
1871 case Intrinsic::aarch64_neon_fminnm:
1872 return LowerBinOp(TargetOpcode::G_FMINNUM);
1873 case Intrinsic::aarch64_neon_pmull:
1874 case Intrinsic::aarch64_neon_pmull64:
1875 return LowerBinOp(AArch64::G_PMULL);
1876 case Intrinsic::aarch64_neon_smull:
1877 return LowerBinOp(AArch64::G_SMULL);
1878 case Intrinsic::aarch64_neon_umull:
1879 return LowerBinOp(AArch64::G_UMULL);
1880 case Intrinsic::aarch64_neon_sabd:
1881 return LowerBinOp(TargetOpcode::G_ABDS);
1882 case Intrinsic::aarch64_neon_uabd:
1883 return LowerBinOp(TargetOpcode::G_ABDU);
1884 case Intrinsic::aarch64_neon_uhadd:
1885 return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1886 case Intrinsic::aarch64_neon_urhadd:
1887 return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1888 case Intrinsic::aarch64_neon_shadd:
1889 return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1890 case Intrinsic::aarch64_neon_srhadd:
1891 return LowerBinOp(TargetOpcode::G_SAVGCEIL);
1892 case Intrinsic::aarch64_neon_sqshrn: {
1893 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1894 return true;
1895 // Create right shift instruction. Store the output register in Shr.
1896 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1897 {MRI.getType(MI.getOperand(2).getReg())},
1898 {MI.getOperand(2), MI.getOperand(3).getImm()});
1899 // Build the narrow intrinsic, taking in Shr.
1900 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1901 MI.eraseFromParent();
1902 return true;
1903 }
1904 case Intrinsic::aarch64_neon_sqshrun: {
1905 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1906 return true;
1907 // Create right shift instruction. Store the output register in Shr.
1908 auto Shr = MIB.buildInstr(AArch64::G_VASHR,
1909 {MRI.getType(MI.getOperand(2).getReg())},
1910 {MI.getOperand(2), MI.getOperand(3).getImm()});
1911 // Build the narrow intrinsic, taking in Shr.
1912 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1913 MI.eraseFromParent();
1914 return true;
1915 }
1916 case Intrinsic::aarch64_neon_sqrshrn: {
1917 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1918 return true;
1919 // Create right shift instruction. Store the output register in Shr.
1920 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1921 {MRI.getType(MI.getOperand(2).getReg())},
1922 {MI.getOperand(2), MI.getOperand(3).getImm()});
1923 // Build the narrow intrinsic, taking in Shr.
1924 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_S, {MI.getOperand(0)}, {Shr});
1925 MI.eraseFromParent();
1926 return true;
1927 }
1928 case Intrinsic::aarch64_neon_sqrshrun: {
1929 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1930 return true;
1931 // Create right shift instruction. Store the output register in Shr.
1932 auto Shr = MIB.buildInstr(AArch64::G_SRSHR_I,
1933 {MRI.getType(MI.getOperand(2).getReg())},
1934 {MI.getOperand(2), MI.getOperand(3).getImm()});
1935 // Build the narrow intrinsic, taking in Shr.
1936 MIB.buildInstr(TargetOpcode::G_TRUNC_SSAT_U, {MI.getOperand(0)}, {Shr});
1937 MI.eraseFromParent();
1938 return true;
1939 }
1940 case Intrinsic::aarch64_neon_uqrshrn: {
1941 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1942 return true;
1943 // Create right shift instruction. Store the output register in Shr.
1944 auto Shr = MIB.buildInstr(AArch64::G_URSHR_I,
1945 {MRI.getType(MI.getOperand(2).getReg())},
1946 {MI.getOperand(2), MI.getOperand(3).getImm()});
1947 // Build the narrow intrinsic, taking in Shr.
1948 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
1949 MI.eraseFromParent();
1950 return true;
1951 }
1952 case Intrinsic::aarch64_neon_uqshrn: {
1953 if (!MRI.getType(MI.getOperand(0).getReg()).isVector())
1954 return true;
1955 // Create right shift instruction. Store the output register in Shr.
1956 auto Shr = MIB.buildInstr(AArch64::G_VLSHR,
1957 {MRI.getType(MI.getOperand(2).getReg())},
1958 {MI.getOperand(2), MI.getOperand(3).getImm()});
1959 // Build the narrow intrinsic, taking in Shr.
1960 MIB.buildInstr(TargetOpcode::G_TRUNC_USAT_U, {MI.getOperand(0)}, {Shr});
1961 MI.eraseFromParent();
1962 return true;
1963 }
1964 case Intrinsic::aarch64_neon_sqshlu: {
1965 // Check if last operand is constant vector dup
1966 auto ShiftAmount = isConstantOrConstantSplatVector(
1967 *MRI.getVRegDef(MI.getOperand(3).getReg()), MRI);
1968 if (ShiftAmount) {
1969 // If so, create a new intrinsic with the correct shift amount
1970 MIB.buildInstr(AArch64::G_SQSHLU_I, {MI.getOperand(0)},
1971 {MI.getOperand(2)})
1972 .addImm(ShiftAmount->getSExtValue());
1973 MI.eraseFromParent();
1974 return true;
1975 }
1976 return false;
1977 }
1978 case Intrinsic::aarch64_neon_vsli: {
1979 MIB.buildInstr(
1980 AArch64::G_SLI, {MI.getOperand(0)},
1981 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
1982 MI.eraseFromParent();
1983 break;
1984 }
1985 case Intrinsic::aarch64_neon_vsri: {
1986 MIB.buildInstr(
1987 AArch64::G_SRI, {MI.getOperand(0)},
1988 {MI.getOperand(2), MI.getOperand(3), MI.getOperand(4).getImm()});
1989 MI.eraseFromParent();
1990 break;
1991 }
1992 case Intrinsic::aarch64_neon_abs: {
1993 // Lower the intrinsic to G_ABS.
1994 MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
1995 MI.eraseFromParent();
1996 return true;
1997 }
1998 case Intrinsic::aarch64_neon_sqadd: {
1999 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2000 return LowerBinOp(TargetOpcode::G_SADDSAT);
2001 break;
2002 }
2003 case Intrinsic::aarch64_neon_sqsub: {
2004 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2005 return LowerBinOp(TargetOpcode::G_SSUBSAT);
2006 break;
2007 }
2008 case Intrinsic::aarch64_neon_uqadd: {
2009 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2010 return LowerBinOp(TargetOpcode::G_UADDSAT);
2011 break;
2012 }
2013 case Intrinsic::aarch64_neon_uqsub: {
2014 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
2015 return LowerBinOp(TargetOpcode::G_USUBSAT);
2016 break;
2017 }
2018 case Intrinsic::aarch64_neon_udot:
2019 return LowerTriOp(AArch64::G_UDOT);
2020 case Intrinsic::aarch64_neon_sdot:
2021 return LowerTriOp(AArch64::G_SDOT);
2022 case Intrinsic::aarch64_neon_usdot:
2023 return LowerTriOp(AArch64::G_USDOT);
2024 case Intrinsic::aarch64_neon_sqxtn:
2025 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
2026 case Intrinsic::aarch64_neon_sqxtun:
2027 return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_U);
2028 case Intrinsic::aarch64_neon_uqxtn:
2029 return LowerUnaryOp(TargetOpcode::G_TRUNC_USAT_U);
2030 case Intrinsic::aarch64_neon_fcvtzu:
2031 return LowerUnaryOp(TargetOpcode::G_FPTOUI_SAT);
2032 case Intrinsic::aarch64_neon_fcvtzs:
2033 return LowerUnaryOp(TargetOpcode::G_FPTOSI_SAT);
2034
2035 case Intrinsic::vector_reverse:
2036 // TODO: Add support for vector_reverse
2037 return false;
2038 }
2039
2040 return true;
2041}
2042
2043bool AArch64LegalizerInfo::legalizeShlAshrLshr(
2045 GISelChangeObserver &Observer) const {
2046 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
2047 MI.getOpcode() == TargetOpcode::G_LSHR ||
2048 MI.getOpcode() == TargetOpcode::G_SHL);
2049 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
2050 // imported patterns can select it later. Either way, it will be legal.
2051 Register AmtReg = MI.getOperand(2).getReg();
2052 auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
2053 if (!VRegAndVal)
2054 return true;
2055 // Check the shift amount is in range for an immediate form.
2056 int64_t Amount = VRegAndVal->Value.getSExtValue();
2057 if (Amount > 31)
2058 return true; // This will have to remain a register variant.
2059 auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);
2060 Observer.changingInstr(MI);
2061 MI.getOperand(2).setReg(ExtCst.getReg(0));
2062 Observer.changedInstr(MI);
2063 return true;
2064}
2065
2067 MachineRegisterInfo &MRI) {
2068 Base = Root;
2069 Offset = 0;
2070
2071 Register NewBase;
2072 int64_t NewOffset;
2073 if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
2074 isShiftedInt<7, 3>(NewOffset)) {
2075 Base = NewBase;
2076 Offset = NewOffset;
2077 }
2078}
2079
2080// FIXME: This should be removed and replaced with the generic bitcast legalize
2081// action.
2082bool AArch64LegalizerInfo::legalizeLoadStore(
2084 GISelChangeObserver &Observer) const {
2085 assert(MI.getOpcode() == TargetOpcode::G_STORE ||
2086 MI.getOpcode() == TargetOpcode::G_LOAD);
2087 // Here we just try to handle vector loads/stores where our value type might
2088 // have pointer elements, which the SelectionDAG importer can't handle. To
2089 // allow the existing patterns for s64 to fire for p0, we just try to bitcast
2090 // the value to use s64 types.
2091
2092 // Custom legalization requires the instruction, if not deleted, must be fully
2093 // legalized. In order to allow further legalization of the inst, we create
2094 // a new instruction and erase the existing one.
2095
2096 Register ValReg = MI.getOperand(0).getReg();
2097 const LLT ValTy = MRI.getType(ValReg);
2098
2099 if (ValTy == LLT::scalar(128)) {
2100
2101 AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
2102 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2103 bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
2104 bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
2105 bool IsRcpC3 =
2106 ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
2107
2108 LLT s64 = LLT::scalar(64);
2109
2110 unsigned Opcode;
2111 if (IsRcpC3) {
2112 Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
2113 } else {
2114 // For LSE2, loads/stores should have been converted to monotonic and had
2115 // a fence inserted after them.
2116 assert(Ordering == AtomicOrdering::Monotonic ||
2117 Ordering == AtomicOrdering::Unordered);
2118 assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
2119
2120 Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
2121 }
2122
2123 MachineInstrBuilder NewI;
2124 if (IsLoad) {
2125 NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
2126 MIRBuilder.buildMergeLikeInstr(
2127 ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
2128 } else {
2129 auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
2130 NewI = MIRBuilder.buildInstr(
2131 Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
2132 }
2133
2134 if (IsRcpC3) {
2135 NewI.addUse(MI.getOperand(1).getReg());
2136 } else {
2137 Register Base;
2138 int Offset;
2139 matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
2140 NewI.addUse(Base);
2141 NewI.addImm(Offset / 8);
2142 }
2143
2144 NewI.cloneMemRefs(MI);
2145 constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
2146 *MRI.getTargetRegisterInfo(),
2147 *ST->getRegBankInfo());
2148 MI.eraseFromParent();
2149 return true;
2150 }
2151
2152 if (!ValTy.isPointerVector() ||
2153 ValTy.getElementType().getAddressSpace() != 0) {
2154 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
2155 return false;
2156 }
2157
2158 unsigned PtrSize = ValTy.getElementType().getSizeInBits();
2159 const LLT NewTy = LLT::vector(ValTy.getElementCount(), PtrSize);
2160 auto &MMO = **MI.memoperands_begin();
2161 MMO.setType(NewTy);
2162
2163 if (MI.getOpcode() == TargetOpcode::G_STORE) {
2164 auto Bitcast = MIRBuilder.buildBitcast(NewTy, ValReg);
2165 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1), MMO);
2166 } else {
2167 auto NewLoad = MIRBuilder.buildLoad(NewTy, MI.getOperand(1), MMO);
2168 MIRBuilder.buildBitcast(ValReg, NewLoad);
2169 }
2170 MI.eraseFromParent();
2171 return true;
2172}
2173
2174bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
2176 MachineIRBuilder &MIRBuilder) const {
2177 MachineFunction &MF = MIRBuilder.getMF();
2178 Align Alignment(MI.getOperand(2).getImm());
2179 Register Dst = MI.getOperand(0).getReg();
2180 Register ListPtr = MI.getOperand(1).getReg();
2181
2182 LLT PtrTy = MRI.getType(ListPtr);
2183 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
2184
2185 const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
2186 const Align PtrAlign = Align(PtrSize);
2187 auto List = MIRBuilder.buildLoad(
2188 PtrTy, ListPtr,
2189 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2190 PtrTy, PtrAlign));
2191
2192 MachineInstrBuilder DstPtr;
2193 if (Alignment > PtrAlign) {
2194 // Realign the list to the actual required alignment.
2195 auto AlignMinus1 =
2196 MIRBuilder.buildConstant(IntPtrTy, Alignment.value() - 1);
2197 auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0));
2198 DstPtr = MIRBuilder.buildMaskLowPtrBits(PtrTy, ListTmp, Log2(Alignment));
2199 } else
2200 DstPtr = List;
2201
2202 LLT ValTy = MRI.getType(Dst);
2203 uint64_t ValSize = ValTy.getSizeInBits() / 8;
2204 MIRBuilder.buildLoad(
2205 Dst, DstPtr,
2206 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
2207 ValTy, std::max(Alignment, PtrAlign)));
2208
2209 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrAlign));
2210
2211 auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0));
2212
2213 MIRBuilder.buildStore(NewList, ListPtr,
2214 *MF.getMachineMemOperand(MachinePointerInfo(),
2216 PtrTy, PtrAlign));
2217
2218 MI.eraseFromParent();
2219 return true;
2220}
2221
2222bool AArch64LegalizerInfo::legalizeBitfieldExtract(
2223 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2224 // Only legal if we can select immediate forms.
2225 // TODO: Lower this otherwise.
2226 return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
2227 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
2228}
2229
2230bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
2232 LegalizerHelper &Helper) const {
2233 // When there is no integer popcount instruction (FEAT_CSSC isn't available),
2234 // it can be more efficiently lowered to the following sequence that uses
2235 // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
2236 // registers are cheap.
2237 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
2238 // CNT V0.8B, V0.8B // 8xbyte pop-counts
2239 // ADDV B0, V0.8B // sum 8xbyte pop-counts
2240 // UMOV X0, V0.B[0] // copy byte result back to integer reg
2241 //
2242 // For 128 bit vector popcounts, we lower to the following sequence:
2243 // cnt.16b v0, v0 // v8s16, v4s32, v2s64
2244 // uaddlp.8h v0, v0 // v8s16, v4s32, v2s64
2245 // uaddlp.4s v0, v0 // v4s32, v2s64
2246 // uaddlp.2d v0, v0 // v2s64
2247 //
2248 // For 64 bit vector popcounts, we lower to the following sequence:
2249 // cnt.8b v0, v0 // v4s16, v2s32
2250 // uaddlp.4h v0, v0 // v4s16, v2s32
2251 // uaddlp.2s v0, v0 // v2s32
2252
2253 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2254 Register Dst = MI.getOperand(0).getReg();
2255 Register Val = MI.getOperand(1).getReg();
2256 LLT Ty = MRI.getType(Val);
2257 unsigned Size = Ty.getSizeInBits();
2258
2259 assert(Ty == MRI.getType(Dst) &&
2260 "Expected src and dst to have the same type!");
2261
2262 if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
2263 LLT s64 = LLT::scalar(64);
2264
2265 auto Split = MIRBuilder.buildUnmerge(s64, Val);
2266 auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
2267 auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
2268 auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
2269
2270 MIRBuilder.buildZExt(Dst, Add);
2271 MI.eraseFromParent();
2272 return true;
2273 }
2274
2275 if (!ST->hasNEON() ||
2276 MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {
2277 // Use generic lowering when custom lowering is not possible.
2278 return Ty.isScalar() && (Size == 32 || Size == 64) &&
2279 Helper.lowerBitCount(MI) ==
2281 }
2282
2283 // Pre-conditioning: widen Val up to the nearest vector type.
2284 // s32,s64,v4s16,v2s32 -> v8i8
2285 // v8s16,v4s32,v2s64 -> v16i8
2286 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
2287 if (Ty.isScalar()) {
2288 assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
2289 if (Size == 32) {
2290 Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
2291 }
2292 }
2293 Val = MIRBuilder.buildBitcast(VTy, Val).getReg(0);
2294
2295 // Count bits in each byte-sized lane.
2296 auto CTPOP = MIRBuilder.buildCTPOP(VTy, Val);
2297
2298 // Sum across lanes.
2299
2300 if (ST->hasDotProd() && Ty.isVector() && Ty.getNumElements() >= 2 &&
2301 Ty.getScalarSizeInBits() != 16) {
2302 LLT Dt = Ty == LLT::fixed_vector(2, 64) ? LLT::fixed_vector(4, 32) : Ty;
2303 auto Zeros = MIRBuilder.buildConstant(Dt, 0);
2304 auto Ones = MIRBuilder.buildConstant(VTy, 1);
2305 MachineInstrBuilder Sum;
2306
2307 if (Ty == LLT::fixed_vector(2, 64)) {
2308 auto UDOT =
2309 MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2310 Sum = MIRBuilder.buildInstr(AArch64::G_UADDLP, {Ty}, {UDOT});
2311 } else if (Ty == LLT::fixed_vector(4, 32)) {
2312 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2313 } else if (Ty == LLT::fixed_vector(2, 32)) {
2314 Sum = MIRBuilder.buildInstr(AArch64::G_UDOT, {Dt}, {Zeros, Ones, CTPOP});
2315 } else {
2316 llvm_unreachable("unexpected vector shape");
2317 }
2318
2319 Sum->getOperand(0).setReg(Dst);
2320 MI.eraseFromParent();
2321 return true;
2322 }
2323
2324 Register HSum = CTPOP.getReg(0);
2325 unsigned Opc;
2326 SmallVector<LLT> HAddTys;
2327 if (Ty.isScalar()) {
2328 Opc = Intrinsic::aarch64_neon_uaddlv;
2329 HAddTys.push_back(LLT::scalar(32));
2330 } else if (Ty == LLT::fixed_vector(8, 16)) {
2331 Opc = Intrinsic::aarch64_neon_uaddlp;
2332 HAddTys.push_back(LLT::fixed_vector(8, 16));
2333 } else if (Ty == LLT::fixed_vector(4, 32)) {
2334 Opc = Intrinsic::aarch64_neon_uaddlp;
2335 HAddTys.push_back(LLT::fixed_vector(8, 16));
2336 HAddTys.push_back(LLT::fixed_vector(4, 32));
2337 } else if (Ty == LLT::fixed_vector(2, 64)) {
2338 Opc = Intrinsic::aarch64_neon_uaddlp;
2339 HAddTys.push_back(LLT::fixed_vector(8, 16));
2340 HAddTys.push_back(LLT::fixed_vector(4, 32));
2341 HAddTys.push_back(LLT::fixed_vector(2, 64));
2342 } else if (Ty == LLT::fixed_vector(4, 16)) {
2343 Opc = Intrinsic::aarch64_neon_uaddlp;
2344 HAddTys.push_back(LLT::fixed_vector(4, 16));
2345 } else if (Ty == LLT::fixed_vector(2, 32)) {
2346 Opc = Intrinsic::aarch64_neon_uaddlp;
2347 HAddTys.push_back(LLT::fixed_vector(4, 16));
2348 HAddTys.push_back(LLT::fixed_vector(2, 32));
2349 } else
2350 llvm_unreachable("unexpected vector shape");
2352 for (LLT HTy : HAddTys) {
2353 UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
2354 HSum = UADD.getReg(0);
2355 }
2356
2357 // Post-conditioning.
2358 if (Ty.isScalar() && (Size == 64 || Size == 128))
2359 MIRBuilder.buildZExt(Dst, UADD);
2360 else
2361 UADD->getOperand(0).setReg(Dst);
2362 MI.eraseFromParent();
2363 return true;
2364}
2365
2366bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(
2367 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2368 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2369 LLT s64 = LLT::scalar(64);
2370 auto Addr = MI.getOperand(1).getReg();
2371 auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2));
2372 auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3));
2373 auto DstLo = MRI.createGenericVirtualRegister(s64);
2374 auto DstHi = MRI.createGenericVirtualRegister(s64);
2375
2376 MachineInstrBuilder CAS;
2377 if (ST->hasLSE()) {
2378 // We have 128-bit CASP instructions taking XSeqPair registers, which are
2379 // s128. We need the merge/unmerge to bracket the expansion and pair up with
2380 // the rest of the MIR so we must reassemble the extracted registers into a
2381 // 128-bit known-regclass one with code like this:
2382 //
2383 // %in1 = REG_SEQUENCE Lo, Hi ; One for each input
2384 // %out = CASP %in1, ...
2385 // %OldLo = G_EXTRACT %out, 0
2386 // %OldHi = G_EXTRACT %out, 64
2387 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2388 unsigned Opcode;
2389 switch (Ordering) {
2391 Opcode = AArch64::CASPAX;
2392 break;
2394 Opcode = AArch64::CASPLX;
2395 break;
2398 Opcode = AArch64::CASPALX;
2399 break;
2400 default:
2401 Opcode = AArch64::CASPX;
2402 break;
2403 }
2404
2405 LLT s128 = LLT::scalar(128);
2406 auto CASDst = MRI.createGenericVirtualRegister(s128);
2407 auto CASDesired = MRI.createGenericVirtualRegister(s128);
2408 auto CASNew = MRI.createGenericVirtualRegister(s128);
2409 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {})
2410 .addUse(DesiredI->getOperand(0).getReg())
2411 .addImm(AArch64::sube64)
2412 .addUse(DesiredI->getOperand(1).getReg())
2413 .addImm(AArch64::subo64);
2414 MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {})
2415 .addUse(NewI->getOperand(0).getReg())
2416 .addImm(AArch64::sube64)
2417 .addUse(NewI->getOperand(1).getReg())
2418 .addImm(AArch64::subo64);
2419
2420 CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr});
2421
2422 MIRBuilder.buildExtract({DstLo}, {CASDst}, 0);
2423 MIRBuilder.buildExtract({DstHi}, {CASDst}, 64);
2424 } else {
2425 // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP
2426 // can take arbitrary registers so it just has the normal GPR64 operands the
2427 // rest of AArch64 is expecting.
2428 auto Ordering = (*MI.memoperands_begin())->getMergedOrdering();
2429 unsigned Opcode;
2430 switch (Ordering) {
2432 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
2433 break;
2435 Opcode = AArch64::CMP_SWAP_128_RELEASE;
2436 break;
2439 Opcode = AArch64::CMP_SWAP_128;
2440 break;
2441 default:
2442 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
2443 break;
2444 }
2445
2446 auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2447 CAS = MIRBuilder.buildInstr(Opcode, {DstLo, DstHi, Scratch},
2448 {Addr, DesiredI->getOperand(0),
2449 DesiredI->getOperand(1), NewI->getOperand(0),
2450 NewI->getOperand(1)});
2451 }
2452
2453 CAS.cloneMemRefs(MI);
2454 constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(),
2455 *MRI.getTargetRegisterInfo(),
2456 *ST->getRegBankInfo());
2457
2458 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {DstLo, DstHi});
2459 MI.eraseFromParent();
2460 return true;
2461}
2462
2463bool AArch64LegalizerInfo::legalizeCTTZ(MachineInstr &MI,
2464 LegalizerHelper &Helper) const {
2465 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2466 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2467 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
2468 auto BitReverse = MIRBuilder.buildBitReverse(Ty, MI.getOperand(1));
2469 MIRBuilder.buildCTLZ(MI.getOperand(0).getReg(), BitReverse);
2470 MI.eraseFromParent();
2471 return true;
2472}
2473
2474bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
2475 LegalizerHelper &Helper) const {
2476 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2477
2478 // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic
2479 if (MI.getOpcode() == TargetOpcode::G_MEMSET) {
2480 // Anyext the value being set to 64 bit (only the bottom 8 bits are read by
2481 // the instruction).
2482 auto &Value = MI.getOperand(1);
2483 Register ExtValueReg =
2484 MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0);
2485 Value.setReg(ExtValueReg);
2486 return true;
2487 }
2488
2489 return false;
2490}
2491
2492bool AArch64LegalizerInfo::legalizeExtractVectorElt(
2493 MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2494 const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
2495 auto VRegAndVal =
2497 if (VRegAndVal)
2498 return true;
2499 LLT VecTy = MRI.getType(Element->getVectorReg());
2500 if (VecTy.isScalableVector())
2501 return true;
2502 return Helper.lowerExtractInsertVectorElt(MI) !=
2504}
2505
2506bool AArch64LegalizerInfo::legalizeDynStackAlloc(
2507 MachineInstr &MI, LegalizerHelper &Helper) const {
2508 MachineFunction &MF = *MI.getParent()->getParent();
2509 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
2510 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
2511
2512 // If stack probing is not enabled for this function, use the default
2513 // lowering.
2514 if (!MF.getFunction().hasFnAttribute("probe-stack") ||
2515 MF.getFunction().getFnAttribute("probe-stack").getValueAsString() !=
2516 "inline-asm") {
2517 Helper.lowerDynStackAlloc(MI);
2518 return true;
2519 }
2520
2521 Register Dst = MI.getOperand(0).getReg();
2522 Register AllocSize = MI.getOperand(1).getReg();
2523 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
2524
2525 assert(MRI.getType(Dst) == LLT::pointer(0, 64) &&
2526 "Unexpected type for dynamic alloca");
2527 assert(MRI.getType(AllocSize) == LLT::scalar(64) &&
2528 "Unexpected type for dynamic alloca");
2529
2530 LLT PtrTy = MRI.getType(Dst);
2531 Register SPReg =
2533 Register SPTmp =
2534 Helper.getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
2535 auto NewMI =
2536 MIRBuilder.buildInstr(AArch64::PROBED_STACKALLOC_DYN, {}, {SPTmp});
2537 MRI.setRegClass(NewMI.getReg(0), &AArch64::GPR64commonRegClass);
2538 MIRBuilder.setInsertPt(*NewMI->getParent(), NewMI);
2539 MIRBuilder.buildCopy(Dst, SPTmp);
2540
2541 MI.eraseFromParent();
2542 return true;
2543}
2544
2545bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
2546 LegalizerHelper &Helper) const {
2547 MachineIRBuilder &MIB = Helper.MIRBuilder;
2548 auto &AddrVal = MI.getOperand(0);
2549
2550 int64_t IsWrite = MI.getOperand(1).getImm();
2551 int64_t Locality = MI.getOperand(2).getImm();
2552 int64_t IsData = MI.getOperand(3).getImm();
2553
2554 bool IsStream = Locality == 0;
2555 if (Locality != 0) {
2556 assert(Locality <= 3 && "Prefetch locality out-of-range");
2557 // The locality degree is the opposite of the cache speed.
2558 // Put the number the other way around.
2559 // The encoding starts at 0 for level 1
2560 Locality = 3 - Locality;
2561 }
2562
2563 unsigned PrfOp = (IsWrite << 4) | (!IsData << 3) | (Locality << 1) | IsStream;
2564
2565 MIB.buildInstr(AArch64::G_AARCH64_PREFETCH).addImm(PrfOp).add(AddrVal);
2566 MI.eraseFromParent();
2567 return true;
2568}
2569
2570bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2571 MachineIRBuilder &MIRBuilder,
2572 MachineRegisterInfo &MRI) const {
2573 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2574 assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2575 "Expected a power of 2 elements");
2576
2577 LLT s16 = LLT::scalar(16);
2578 LLT s32 = LLT::scalar(32);
2579 LLT s64 = LLT::scalar(64);
2580 LLT v2s16 = LLT::fixed_vector(2, s16);
2581 LLT v4s16 = LLT::fixed_vector(4, s16);
2582 LLT v2s32 = LLT::fixed_vector(2, s32);
2583 LLT v4s32 = LLT::fixed_vector(4, s32);
2584 LLT v2s64 = LLT::fixed_vector(2, s64);
2585
2586 SmallVector<Register> RegsToUnmergeTo;
2587 SmallVector<Register> TruncOddDstRegs;
2588 SmallVector<Register> RegsToMerge;
2589
2590 unsigned ElemCount = SrcTy.getNumElements();
2591
2592 // Find the biggest size chunks we can work with
2593 int StepSize = ElemCount % 4 ? 2 : 4;
2594
2595 // If we have a power of 2 greater than 2, we need to first unmerge into
2596 // enough pieces
2597 if (ElemCount <= 2)
2598 RegsToUnmergeTo.push_back(Src);
2599 else {
2600 for (unsigned i = 0; i < ElemCount / 2; ++i)
2601 RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2602
2603 MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2604 }
2605
2606 // Create all of the round-to-odd instructions and store them
2607 for (auto SrcReg : RegsToUnmergeTo) {
2608 Register Mid =
2609 MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2610 .getReg(0);
2611 TruncOddDstRegs.push_back(Mid);
2612 }
2613
2614 // Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise
2615 // truncate 2s32 to 2s16.
2616 unsigned Index = 0;
2617 for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2618 if (StepSize == 4) {
2619 Register ConcatDst =
2620 MIRBuilder
2622 {v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2623 .getReg(0);
2624
2625 RegsToMerge.push_back(
2626 MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2627 } else {
2628 RegsToMerge.push_back(
2629 MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2630 }
2631 }
2632
2633 // If there is only one register, replace the destination
2634 if (RegsToMerge.size() == 1) {
2635 MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2636 MI.eraseFromParent();
2637 return true;
2638 }
2639
2640 // Merge the rest of the instructions & replace the register
2641 Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2642 MRI.replaceRegWith(Dst, Fin);
2643 MI.eraseFromParent();
2644 return true;
2645}
static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset, MachineRegisterInfo &MRI)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Error unsupported(const char *Str, const Triple &T)
Definition MachO.cpp:71
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Interface for Targets to specify which operations they can successfully select and how the others sho...
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ppc ctr loops verify
if(PassOpts->AAPipeline)
static constexpr MCPhysReg SPReg
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const override
Called for instructions with the Custom LegalizationAction.
bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const override
AArch64LegalizerInfo(const AArch64Subtarget &ST)
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1043
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1697
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
LLVM_ABI void computeTables()
Compute any ancillary tables needed to quickly decide how an operation should be handled.
LegalizeRuleSet & minScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet & widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalFor(std::initializer_list< LLT > Types)
The instruction is legal when type index 0 is any type in the given list.
LegalizeRuleSet & maxScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned SmallTypeIdx)
Conditionally narrow the scalar or elt to match the size of another.
LegalizeRuleSet & unsupported()
The instruction is unsupported.
LegalizeRuleSet & scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx)
Change the type TypeIdx to have the same scalar size as type SameSizeIdx.
LegalizeRuleSet & bitcastIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
The specified type index is coerced if predicate is true.
LegalizeRuleSet & libcallFor(std::initializer_list< LLT > Types)
LegalizeRuleSet & maxScalar(unsigned TypeIdx, const LLT Ty)
Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet & minScalarOrElt(unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements)
Limit the number of elements in EltTy vectors to at most MaxElements.
LegalizeRuleSet & clampMinNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MinElements)
Limit the number of elements in EltTy vectors to at least MinElements.
LegalizeRuleSet & widenVectorEltsToVectorMinSize(unsigned TypeIdx, unsigned VectorSize)
Ensure the vector size is at least as wide as VectorSize by promoting the element.
LegalizeRuleSet & lowerIfMemSizeNotPow2()
Lower a memory operation if the memory size, rounded to bytes, is not a power of 2.
LegalizeRuleSet & minScalarEltSameAsIf(LegalityPredicate Predicate, unsigned TypeIdx, unsigned LargeTypeIdx)
Conditionally widen the scalar or elt to match the size of another.
LegalizeRuleSet & customForCartesianProduct(std::initializer_list< LLT > Types)
LegalizeRuleSet & lowerIfMemSizeNotByteSizePow2()
Lower a memory operation if the memory access size is not a round power of 2 byte size.
LegalizeRuleSet & moreElementsToNextPow2(unsigned TypeIdx)
Add more elements to the vector to reach the next power of two.
LegalizeRuleSet & narrowScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Narrow the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & lower()
The instruction is lowered.
LegalizeRuleSet & moreElementsIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Add more elements to reach the type selected by the mutation if the predicate is true.
LegalizeRuleSet & lowerFor(std::initializer_list< LLT > Types)
The instruction is lowered when type index 0 is any type in the given list.
LegalizeRuleSet & scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx)
LegalizeRuleSet & lowerIf(LegalityPredicate Predicate)
The instruction is lowered if predicate is true.
LegalizeRuleSet & clampScalar(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the range of scalar sizes to MinTy and MaxTy.
LegalizeRuleSet & custom()
Unconditionally custom lower.
LegalizeRuleSet & minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx)
Widen the scalar to match the size of another.
LegalizeRuleSet & unsupportedIf(LegalityPredicate Predicate)
LegalizeRuleSet & minScalarOrEltIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Ensure the scalar or element is at least as wide as Ty.
LegalizeRuleSet & widenScalarIf(LegalityPredicate Predicate, LegalizeMutation Mutation)
Widen the scalar to the one selected by the mutation if the predicate is true.
LegalizeRuleSet & alwaysLegal()
LegalizeRuleSet & clampNumElements(unsigned TypeIdx, const LLT MinTy, const LLT MaxTy)
Limit the number of elements for the given vectors to at least MinTy's number of elements and at most...
LegalizeRuleSet & maxScalarIf(LegalityPredicate Predicate, unsigned TypeIdx, const LLT Ty)
Conditionally limit the maximum size of the scalar.
LegalizeRuleSet & customIf(LegalityPredicate Predicate)
LegalizeRuleSet & widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar to the next power of two that is at least MinSize.
LegalizeRuleSet & scalarize(unsigned TypeIdx)
LegalizeRuleSet & legalForCartesianProduct(std::initializer_list< LLT > Types)
The instruction is legal when type indexes 0 and 1 are both in the given list.
LegalizeRuleSet & legalForTypesWithMemDesc(std::initializer_list< LegalityPredicates::TypePairAndMemDesc > TypesAndMemDesc)
The instruction is legal when type indexes 0 and 1 along with the memory size and minimum alignment i...
unsigned immIdx(unsigned ImmIdx)
LegalizeRuleSet & widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned MinSize=0)
Widen the scalar or vector element type to the next power of two that is at least MinSize.
LegalizeRuleSet & legalIf(LegalityPredicate Predicate)
The instruction is legal if predicate is true.
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeRuleSet & getActionDefinitionsBuilder(unsigned Opcode)
Get the action definition builder for the given opcode.
const LegacyLegalizerInfo & getLegacyLegalizerInfo() const
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef< Register > Res, bool HasSideEffects, bool isConvergent)
Build and insert a G_INTRINSIC instruction.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildBitReverse(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITREVERSE Src.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
CodeModel::Model getCodeModel() const
Returns the code model.
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition Value.h:75
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
LLVM_ABI LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar or a vector with an element type that's wider than the ...
LLVM_ABI LegalityPredicate isPointerVector(unsigned TypeIdx)
True iff the specified type index is a vector of pointers (with any address space).
LLVM_ABI LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list< LLT > TypesInit)
True iff the given type index is one of the specified types.
LLVM_ABI LegalityPredicate smallerThan(unsigned TypeIdx0, unsigned TypeIdx1)
True iff the first type index has a smaller total bit size than second type index.
LLVM_ABI LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx, AtomicOrdering Ordering)
True iff the specified MMO index has at an atomic ordering of at Ordering or stronger.
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
LLVM_ABI LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit)
True iff the given type index is the specified type.
LLVM_ABI LegalityPredicate scalarWiderThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's wider than the given size.
LLVM_ABI LegalityPredicate scalarNarrowerThan(unsigned TypeIdx, unsigned Size)
True iff the specified type index is a scalar that's narrower than the given size.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LLVM_ABI LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min=0)
Add more elements to the type for the given type index to the next power of.
LLVM_ABI LegalizeMutation scalarize(unsigned TypeIdx)
Break up the vector type for the given type index into the element type.
LLVM_ABI LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx)
Keep the same scalar or element type as the given type index.
LLVM_ABI LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min=0)
Widen the scalar type or vector element type for the given type index to the next power of 2.
LLVM_ABI LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty)
Select this specific type for the given type index.
LLVM_ABI LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx)
Change the scalar size or element size to have the same scalar size as type index FromIndex.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1506
std::function< bool(const LegalityQuery &)> LegalityPredicate
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
ArrayRef< MemDesc > MMODescrs
Operations which require memory can use this to place requirements on the memory type for each MMO.
ArrayRef< LLT > Types
This class contains a discriminated union of information about pointers in memory operands,...